├── LICENSE
├── Makefile
├── NOTICE
├── README.md
├── data
    └── .gitignore
├── docker
    └── Dockerfile
├── experiments
    ├── crowdhuman.sh
    ├── kitti_fulltrain.sh
    ├── kitti_half.sh
    ├── mot17_fulltrain.sh
    ├── mot17_half.sh
    ├── nuScenes_3Dtracking.sh
    └── pd.sh
├── readme
    ├── DATA.md
    ├── GETTING_STARTED.md
    ├── INSTALL.md
    └── method.png
├── src
    ├── _init_paths.py
    ├── lib
    │   ├── dataset
    │   │   ├── dataset_factory.py
    │   │   ├── datasets
    │   │   │   ├── crowdhuman.py
    │   │   │   ├── custom_dataset.py
    │   │   │   ├── kitti_tracking.py
    │   │   │   ├── mot.py
    │   │   │   ├── nuscenes_tracking.py
    │   │   │   └── pd_tracking.py
    │   │   ├── generic_dataset.py
    │   │   ├── joint_loader.py
    │   │   └── video_dataset.py
    │   ├── detector.py
    │   ├── external
    │   │   ├── .gitignore
    │   │   ├── Makefile
    │   │   ├── __init__.py
    │   │   ├── nms.pyx
    │   │   └── setup.py
    │   ├── logger.py
    │   ├── model
    │   │   ├── ConvGRU.py
    │   │   ├── data_parallel.py
    │   │   ├── decode.py
    │   │   ├── losses.py
    │   │   ├── matcher.py
    │   │   ├── model.py
    │   │   ├── networks
    │   │   │   ├── backbones
    │   │   │   │   ├── dla.py
    │   │   │   │   ├── mobilenet.py
    │   │   │   │   └── resnet.py
    │   │   │   ├── base_model.py
    │   │   │   ├── dla.py
    │   │   │   ├── dlav0.py
    │   │   │   ├── generic_network.py
    │   │   │   ├── necks
    │   │   │   │   ├── dlaup.py
    │   │   │   │   └── msraup.py
    │   │   │   ├── resdcn.py
    │   │   │   └── resnet.py
    │   │   ├── scatter_gather.py
    │   │   └── utils.py
    │   ├── opts.py
    │   ├── trainer.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── ddd_utils.py
    │   │   ├── debugger.py
    │   │   ├── image.py
    │   │   ├── pose.py
    │   │   ├── post_process.py
    │   │   ├── tracker.py
    │   │   └── utils.py
    ├── main.py
    ├── test.py
    └── tools
    │   ├── _init_paths.py
    │   ├── annot_bbox.py
    │   ├── convert_crowdhuman_to_coco.py
    │   ├── convert_kitti_to_tao.py
    │   ├── convert_kittitrack_to_coco.py
    │   ├── convert_mot_det_to_results.py
    │   ├── convert_mot_to_coco.py
    │   ├── convert_nuScenes.py
    │   ├── eval_kitti_track
    │       ├── data
    │       │   └── tracking
    │       │   │   ├── evaluate_tracking.seqmap
    │       │   │   ├── evaluate_tracking.seqmap.test
    │       │   │   ├── evaluate_tracking.seqmap.training
    │       │   │   ├── evaluate_trackingtrain_1-2.seqmap
    │       │   │   ├── evaluate_trackingtrain_2-2.seqmap
    │       │   │   ├── evaluate_trackingval_half.seqmap
    │       │   │   ├── label_02
    │       │   │       ├── 0000.txt
    │       │   │       ├── 0001.txt
    │       │   │       ├── 0002.txt
    │       │   │       ├── 0003.txt
    │       │   │       ├── 0004.txt
    │       │   │       ├── 0005.txt
    │       │   │       ├── 0006.txt
    │       │   │       ├── 0007.txt
    │       │   │       ├── 0008.txt
    │       │   │       ├── 0009.txt
    │       │   │       ├── 0010.txt
    │       │   │       ├── 0011.txt
    │       │   │       ├── 0012.txt
    │       │   │       ├── 0013.txt
    │       │   │       ├── 0014.txt
    │       │   │       ├── 0015.txt
    │       │   │       ├── 0016.txt
    │       │   │       ├── 0017.txt
    │       │   │       ├── 0018.txt
    │       │   │       ├── 0019.txt
    │       │   │       └── 0020.txt
    │       │   │   ├── label_02_train_half
    │       │   │       ├── 0000.txt
    │       │   │       ├── 0001.txt
    │       │   │       ├── 0002.txt
    │       │   │       ├── 0003.txt
    │       │   │       ├── 0004.txt
    │       │   │       ├── 0005.txt
    │       │   │       ├── 0006.txt
    │       │   │       ├── 0007.txt
    │       │   │       ├── 0008.txt
    │       │   │       ├── 0009.txt
    │       │   │       ├── 0010.txt
    │       │   │       ├── 0011.txt
    │       │   │       ├── 0012.txt
    │       │   │       ├── 0013.txt
    │       │   │       ├── 0014.txt
    │       │   │       ├── 0015.txt
    │       │   │       ├── 0016.txt
    │       │   │       ├── 0017.txt
    │       │   │       ├── 0018.txt
    │       │   │       ├── 0019.txt
    │       │   │       └── 0020.txt
    │       │   │   └── label_02_val_half
    │       │   │       ├── 0000.txt
    │       │   │       ├── 0001.txt
    │       │   │       ├── 0002.txt
    │       │   │       ├── 0003.txt
    │       │   │       ├── 0004.txt
    │       │   │       ├── 0005.txt
    │       │   │       ├── 0006.txt
    │       │   │       ├── 0007.txt
    │       │   │       ├── 0008.txt
    │       │   │       ├── 0009.txt
    │       │   │       ├── 0010.txt
    │       │   │       ├── 0011.txt
    │       │   │       ├── 0012.txt
    │       │   │       ├── 0013.txt
    │       │   │       ├── 0014.txt
    │       │   │       ├── 0015.txt
    │       │   │       ├── 0016.txt
    │       │   │       ├── 0017.txt
    │       │   │       ├── 0018.txt
    │       │   │       ├── 0019.txt
    │       │   │       └── 0020.txt
    │       ├── evaluate_tracking.py
    │       ├── mailpy.py
    │       └── munkres.py
    │   ├── eval_motchallenge.py
    │   ├── get_mot_17.sh
    │   ├── interp_mot.py
    │   ├── nuScenes_lib
    │       ├── export_kitti.py
    │       └── utils_kitti.py
    │   ├── remove_optimizers.py
    │   ├── vis_tracking_kitti.py
    │   └── vis_tracking_mot.py
└── tao
    ├── .gitignore
    ├── LICENSE
    ├── README.md
    ├── docs
        ├── challenge.md
        ├── detector_train.md
        ├── download.md
        ├── download_hacs_alt.md
        ├── evaluation.md
        ├── faqs.md
        ├── manual_download.md
        └── trackers.md
    ├── scripts
        ├── detectors
        │   ├── detectron2_infer.py
        │   ├── detectron2_train_net.py
        │   └── merge_coco_with_lvis.py
        ├── download
        │   ├── download_annotations.py
        │   ├── download_ava.py
        │   ├── download_cfg.yaml
        │   ├── download_hacs.py
        │   ├── download_helper.py
        │   ├── extract_frames.py
        │   ├── gen_checksums.py
        │   ├── meta
        │   │   ├── ava_file_names_test_v2.1.txt
        │   │   └── ava_file_names_trainval_v2.1.txt
        │   └── verify.py
        ├── evaluation
        │   ├── configs
        │   │   └── default.yaml
        │   └── evaluate.py
        └── trackers
        │   └── sort
        │       ├── LICENSE
        │       ├── README.md
        │       ├── __init__.py
        │       ├── create_json_for_eval.py
        │       ├── requirements.txt
        │       ├── sort.py
        │       ├── sort_with_detection_id.py
        │       └── track.py
    ├── setup.py
    └── tao
        ├── __init__.py
        ├── toolkit
            ├── __init__.py
            └── tao
            │   ├── __init__.py
            │   ├── eval.py
            │   ├── results.py
            │   └── tao.py
        └── utils
            ├── __init__.py
            ├── detectron2
                └── datasets.py
            ├── download.py
            ├── evaluation.py
            ├── fs.py
            ├── misc.py
            ├── parallel
                ├── __init__.py
                ├── fixed_gpu_pool.py
                └── pool_context.py
            ├── s3.py
            ├── video.py
            ├── yacs_util.py
            └── ytdl.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Pavel Tokmakov
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Handy commands:
 2 | # - `make docker-build`: builds DOCKERIMAGE
 3 | PROJECT ?= permatrack
 4 | WORKSPACE ?= /workspace/$(PROJECT)
 5 | DOCKER_IMAGE ?= ${PROJECT}:latest
 6 | 
 7 | SHMSIZE ?= 444G
 8 | DOCKER_OPTS := \
 9 | 			--name ${PROJECT} \
10 | 			--rm -it \
11 | 			--shm-size=${SHMSIZE} \
12 | 			-e AWS_DEFAULT_REGION \
13 | 			-e AWS_ACCESS_KEY_ID \
14 | 			-e AWS_SECRET_ACCESS_KEY \
15 | 			-e HOST_HOSTNAME= \
16 | 			-e NCCL_DEBUG=VERSION \
17 |             -e DISPLAY=${DISPLAY} \
18 |             -e XAUTHORITY \
19 |             -e NVIDIA_DRIVER_CAPABILITIES=all \
20 | 			-v ~/.aws:/root/.aws \
21 | 			-v /root/.ssh:/root/.ssh \
22 | 			-v ~/.cache:/root/.cache \
23 | 			-v /data:/data \
24 | 			-v /mnt/fsx/:/mnt/fsx \
25 | 			-v /dev/null:/dev/raw1394 \
26 | 			-v /tmp:/tmp \
27 | 			-v /tmp/.X11-unix/X0:/tmp/.X11-unix/X0 \
28 | 			-v /var/run/docker.sock:/var/run/docker.sock \
29 | 			-v ${PWD}:${WORKSPACE} \
30 | 			-w ${WORKSPACE} \
31 | 			--privileged \
32 | 			--ipc=host \
33 | 			--network=host
34 | 
35 | NGPUS=$(shell nvidia-smi -L | wc -l)
36 | 
37 | 
38 | .PHONY: all clean docker-build
39 | 
40 | all: clean
41 | 
42 | clean:
43 | 	find . -name "*.pyc" | xargs rm -f && \
44 | 	find . -name "__pycache__" | xargs rm -rf
45 | 
46 | docker-build:
47 | 	docker build \
48 | 		-f docker/Dockerfile \
49 | 		-t ${DOCKER_IMAGE} .
50 | 
51 | docker-start-interactive: docker-build
52 | 	nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} bash
53 | 
54 | docker-run: docker-build
55 | 	nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \
56 | 		bash -c "${COMMAND}"


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Learning to Track with Object Permanence
 2 | A video-based MOT approach capable of tracking through full occlusions:
 3 | ![](readme/method.png)
 4 | > [**Learning to Track with Object Permanence**](https://arxiv.org/pdf/2103.14258.pdf),            
 5 | > Pavel Tokmakov, Jie Li, Wolfram Burgard, Adrien Gaidon,        
 6 | > *arXiv technical report ([arXiv 2103.14258](https://arxiv.org/pdf/2103.14258.pdf))*  
 7 | 
 8 | 
 9 |     @inproceedings{tokmakov2021learning,
10 |       title={Learning to Track with Object Permanence},
11 |       author={Tokmakov, Pavel and Li, Jie and Burgard, Wolfram and Gaidon, Adrien},
12 |       booktitle={ICCV},
13 |       year={2021}
14 |     }
15 | 
16 | Check out our self-supervised extension publised at ICML'22:
17 | > [**Object Permanence Emerges in a Random Walk along Memory**](https://arxiv.org/abs/2204.01784),    
18 | > Pavel Tokmakov, Allan Jabri, Jie Li, Adrien Gaidon,   
19 | > *arXiv technical report ([arXiv 2204.01784](https://arxiv.org/pdf/2204.01784.pdf))*
20 | 
21 | 
22 |     @inproceedings{tokmakov2022object,
23 |       title={Object Permanence Emerges in a Random Walk along Memory},
24 |       author={Tokmakov, Pavel and Jabri, Allan and Li, Jie and Gaidon, Adrien},
25 |       booktitle={ICML},
26 |       year={2022}
27 |     }
28 | 
29 | ## Abstract
30 | Tracking by detection, the dominant approach for online multi-object tracking, alternates between localization and association steps. As a result, it strongly depends on the quality of instantaneous observations, often failing when objects are not fully visible. In contrast, tracking in humans is underlined by the notion of object permanence: once an object is recognized, we are aware of its physical existence and can approximately localize it even under full occlusions. In this work, we introduce an end-to-end trainable approach for joint object detection and tracking that is capable of such reasoning. We build on top of the recent CenterTrack architecture, which takes pairs of frames as input, and extend it to videos of arbitrary length. To this end, we augment the model with a spatio-temporal, recurrent memory module, allowing it to reason about object locations and identities in the current frame using all the previous history. It is, however, not obvious how to train such an approach. We study this question on a new, large-scale, synthetic dataset for multi-object tracking, which provides ground truth annotations for invisible objects, and propose several approaches for supervising tracking behind occlusions. Our model, trained jointly on synthetic and real data, outperforms the state of the art on KITTI and MOT17 datasets thanks to its robustness to occlusions.
31 | 
32 | ## Installation
33 | 
34 | Please refer to [INSTALL.md](readme/INSTALL.md) for installation instructions.
35 | 
36 | ## Benchmark Evaluation and Training
37 | 
38 | After [installation](readme/INSTALL.md), follow the instructions in [DATA.md](readme/DATA.md) to setup the datasets. Then check [GETTING_STARTED.md](readme/GETTING_STARTED.md) to reproduce the results in the paper.
39 | We provide scripts for all the experiments in the [experiments](experiments) folder.
40 | 
41 | ## License
42 | 
43 | PermaTrack is developed upon [CenterTrack](https://github.com/xingyizhou/CenterTrack). Both codebases are released under MIT License themselves. Some code of CenterTrack are from third-parties with different licenses, please check the CenterTrack repo for details. In addition, this repo uses [py-motmetrics](https://github.com/cheind/py-motmetrics) for MOT evaluation, [nuscenes-devkit](https://github.com/nutonomy/nuscenes-devkit) for nuScenes evaluation and preprocessing, and [TAO codebase](https://github.com/TAO-Dataset/tao) for computing Track AP. ConvGRU implementation is adopted from [this](https://github.com/happyjin/ConvGRU-pytorch) repo. See [NOTICE](NOTICE) for detail. Please note the licenses of each dataset. Most of the datasets we used in this project are under non-commercial licenses.
44 | 
45 | 


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | # Except this file
4 | !.gitignore
5 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | FROM nvidia/cuda:10.0-devel-ubuntu18.04
 4 | 
 5 | ENV PROJECT=permatrack
 6 | ENV PYTORCH_VERSION=1.4
 7 | ENV TORCHVISION_VERSION=0.5.0
 8 | ENV CUDNN_VERSION=7.6.5.32-1+cuda10.1
 9 | ENV NCCL_VERSION=2.4.8-1+cuda10.1
10 | ENV TRT_VERSION=6.0.1.5
11 | ENV LC_ALL=C.UTF-8
12 | ENV LANG=C.UTF-8
13 | 
14 | ARG python=3.6
15 | ENV PYTHON_VERSION=${python}
16 | ENV DEBIAN_FRONTEND=noninteractive
17 | 
18 | # Set default shell to /bin/bash
19 | SHELL ["/bin/bash", "-cu"]
20 | 
21 | RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
22 |     build-essential \
23 |     cmake \
24 |     g++-4.8 \
25 |     git \
26 |     curl \
27 |     docker.io \
28 |     vim \
29 |     wget \
30 |     ca-certificates \
31 |     libcudnn7=${CUDNN_VERSION} \
32 |     libnccl2=${NCCL_VERSION} \
33 |     libnccl-dev=${NCCL_VERSION} \
34 |     libjpeg-dev \
35 |     libpng-dev \
36 |     python${PYTHON_VERSION} \
37 |     python${PYTHON_VERSION}-dev \
38 |     python3-tk \
39 |     librdmacm1 \
40 |     libibverbs1 \
41 |     libgtk2.0-dev \
42 |     unzip \
43 |     bzip2 \
44 |     htop \
45 |     gnuplot \
46 |     ffmpeg
47 | 
48 | # Install OpenSSH for MPI to communicate between containers
49 | RUN apt-get install -y --no-install-recommends openssh-client openssh-server && \
50 |     mkdir -p /var/run/sshd
51 | 
52 | RUN ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python
53 | 
54 | RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
55 |     python get-pip.py && \
56 |     rm get-pip.py
57 | 
58 | # Install Pydata and other deps
59 | RUN pip install easydict scipy numpy pyquaternion matplotlib jupyter h5py \
60 |     awscli nuscenes-devkit tqdm progress path.py pyyaml opencv-python \
61 |     pycuda numba cython motmetrics scikit-learn==0.22.2 moviepy imageio yacs
62 | 
63 | # Install PyTorch
64 | RUN pip install torch==${PYTORCH_VERSION} \
65 |     torchvision==${TORCHVISION_VERSION} && ldconfig
66 | 
67 | RUN pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
68 | 
69 | RUN pip3 install git+https://github.com/achalddave/python-script-utils.git@v0.0.2#egg=script_utils
70 | 
71 | # create project workspace dir
72 | RUN mkdir -p /workspace/experiments
73 | RUN mkdir -p /workspace/${PROJECT}
74 | WORKDIR /workspace/${PROJECT}
75 | 
76 | # Copy project source last (to avoid cache busting)
77 | WORKDIR /workspace/${PROJECT}
78 | COPY . /workspace/${PROJECT}
79 | ENV PYTHONPATH="/workspace/${PROJECT}:$PYTHONPATH"


--------------------------------------------------------------------------------
/experiments/crowdhuman.sh:
--------------------------------------------------------------------------------
1 | # Initial model pre-trained on PD: https://tri-ml-public.s3.amazonaws.com/github/permatrack/pd_17fr_21ep_vis.pth
2 | # Resulting model trained on CrowdHuman: https://tri-ml-public.s3.amazonaws.com/github/permatrack/crowdhuman.pth
3 | 
4 | cd src
5 | # train
6 | python main.py tracking --exp_id crowdhuman --occlusion_thresh 0.15 --visibility_thresh 0.05 --dataset joint --dataset1 crowdhuman --dataset2 pd_tracking --dataset_version x --same_aug_pre --hm_disturb 0.0 --lost_disturb 0.0 --fp_disturb 0.0 --gpus 0,1,2,3,4,5,6,7 --batch_size 2 --load_model ../models/pd_17fr_21ep_vis.pth --val_intervals 100 --is_recurrent --gru_filter_size 7 --input_len 17 --pre_thresh 0.4 --hm_weight 0.5 --const_v_over_occl --sup_invis --invis_hm_weight 20 --use_occl_len --occl_len_mult 5 --visibility --num_iter 5000 --num_epochs 9 --lr_step 5 --ltrb_amodal --only_ped --reuse_hm
7 | cd ..
8 | 


--------------------------------------------------------------------------------
/experiments/kitti_fulltrain.sh:
--------------------------------------------------------------------------------
1 | # Initial model pre-trained on PD: https://tri-ml-public.s3.amazonaws.com/github/permatrack/pd_17fr_21ep_vis.pth
2 | # Resulting model trained on KITTI full train: https://tri-ml-public.s3.amazonaws.com/github/permatrack/kitti_full.pth
3 | 
4 | cd src
5 | # train
6 | python main.py tracking --exp_id kitti_fulltrain --occlusion_thresh 0.15 --visibility_thresh 0.05 --dataset joint --dataset1 kitti_tracking --dataset2 pd_tracking --dataset_version train --same_aug_pre --hm_disturb 0.0 --lost_disturb 0.0 --fp_disturb 0.0 --gpus 0,1,2,3,4,5,6,7 --batch_size 2 --load_model ../models/pd_17fr_21ep_vis.pth --val_intervals 1 --is_recurrent --gru_filter_size 7 --input_len 17 --pre_thresh 0.4 --hm_weight 0.5 --const_v_over_occl --sup_invis --invis_hm_weight 20 --use_occl_len --occl_len_mult 5 --visibility --num_iter 5000 --num_epochs 5 --lr_step 4 --visibility_thresh_eval 0.2 
7 | # test
8 | CUDA_VISIBLE_DEVICES=0 python test.py tracking --exp_id kitti_fulltrain --dataset kitti_tracking --dataset_version test --track_thresh 0.4 --resume --is_recurrent --gru_filter_size 7  --num_gru_layers 1 --visibility --visibility_thresh_eval 0.2 --stream_test --flip_test --trainval
9 | 


--------------------------------------------------------------------------------
/experiments/kitti_half.sh:
--------------------------------------------------------------------------------
1 | # Initial model pre-trained on PD: https://tri-ml-public.s3.amazonaws.com/github/permatrack/pd_17fr_21ep_vis.pth
2 | # Resulting model trained on KITTI half train: https://tri-ml-public.s3.amazonaws.com/github/permatrack/kitti_half_pd_5ep.pth
3 | 
4 | cd src
5 | # train
6 | python main.py tracking --exp_id kitti_half --occlusion_thresh 0.15 --visibility_thresh 0.05 --dataset joint --dataset1 kitti_tracking --dataset2 pd_tracking --dataset_version train_half --same_aug_pre --hm_disturb 0.0 --lost_disturb 0.0 --fp_disturb 0.0 --gpus 0,1,2,3,4,5,6,7 --batch_size 2 --load_model ../models/pd_17fr_21ep_vis.pth --val_intervals 1 --is_recurrent --gru_filter_size 7 --input_len 17 --pre_thresh 0.4 --hm_weight 0.5 --const_v_over_occl --sup_invis --invis_hm_weight 20 --use_occl_len --occl_len_mult 5 --visibility --num_iter 5000 --num_epochs 5 --lr_step 4 --visibility_thresh_eval 0.2 
7 | # test
8 | CUDA_VISIBLE_DEVICES=0 python test.py tracking --exp_id kitti_half --dataset kitti_tracking --dataset_version val_half --track_thresh 0.4 --resume --is_recurrent --gru_filter_size 7  --num_gru_layers 1 --visibility --visibility_thresh_eval 0.2 --stream_test
9 | 


--------------------------------------------------------------------------------
/experiments/mot17_fulltrain.sh:
--------------------------------------------------------------------------------
 1 | # Initial model pre-trained on PD + CrowdHuman: https://tri-ml-public.s3.amazonaws.com/github/permatrack/crowdhuman.pth
 2 | # Resulting model trained on MOT17 full train: https://tri-ml-public.s3.amazonaws.com/github/permatrack/mot_full.pth
 3 | 
 4 | cd src
 5 | # train
 6 | python main.py tracking --exp_id mot17_half --occlusion_thresh 0.15 --visibility_thresh 0.05 --dataset joint --dataset1 mot --dataset2 pd_tracking --dataset_version 17trainval --same_aug_pre --hm_disturb 0.0 --lost_disturb 0.0 --fp_disturb 0.0 --gpus 0,1,2,3,4,5,6,7 --batch_size 2 --load_model ../models/crowdhuman.pth --val_intervals 1 --is_recurrent --gru_filter_size 7 --input_len 17 --pre_thresh 0.4 --hm_weight 0.5 --const_v_over_occl --sup_invis --invis_hm_weight 20 --use_occl_len --occl_len_mult 5 --visibility --num_iter 1600 --num_epochs 5 --lr_step 4 --visibility_thresh_eval 0.1 --ltrb_amodal --only_ped --reuse_hm
 7 | # test
 8 | CUDA_VISIBLE_DEVICES=0 python test.py tracking --exp_id mot17_fulltrain --dataset mot --dataset_version test --track_thresh 0.4 --resume --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility_thresh_eval 0.1 --stream_test --only_ped --ltrb_amodal --visibility --max_age 32 --trainval
 9 | cd ..
10 | 


--------------------------------------------------------------------------------
/experiments/mot17_half.sh:
--------------------------------------------------------------------------------
 1 | # Initial model pre-trained on PD + CrowdHuman: https://tri-ml-public.s3.amazonaws.com/github/permatrack/crowdhuman.pth
 2 | # Resulting model trained on MOT17 half train: https://tri-ml-public.s3.amazonaws.com/github/permatrack/mot_half.pth
 3 | 
 4 | cd src
 5 | # train
 6 | python main.py tracking --exp_id mot17_half --occlusion_thresh 0.15 --visibility_thresh 0.05 --dataset joint --dataset1 mot --dataset2 pd_tracking --dataset_version 17halftrain --same_aug_pre --hm_disturb 0.0 --lost_disturb 0.0 --fp_disturb 0.0 --gpus 0,1,2,3,4,5,6,7 --batch_size 2 --load_model ../models/crowdhuman.pth --val_intervals 1 --is_recurrent --gru_filter_size 7 --input_len 17 --pre_thresh 0.4 --hm_weight 0.5 --const_v_over_occl --sup_invis --invis_hm_weight 20 --use_occl_len --occl_len_mult 5 --visibility --num_iter 1600 --num_epochs 5 --lr_step 4 --visibility_thresh_eval 0.1 --ltrb_amodal --only_ped --reuse_hm
 7 | # test
 8 | CUDA_VISIBLE_DEVICES=0 python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --track_thresh 0.4 --resume --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility_thresh_eval 0.1 --stream_test --only_ped --ltrb_amodal --visibility
 9 | # test with T.R.
10 | CUDA_VISIBLE_DEVICES=0 python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --track_thresh 0.4 --resume --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility_thresh_eval 0.1 --stream_test --only_ped --ltrb_amodal --visibility --max_age 32
11 | # test with public detection
12 | CUDA_VISIBLE_DEVICES=0 python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --track_thresh 0.4 --resume --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility_thresh_eval 0.1 --stream_test --only_ped --ltrb_amodal --visibility --public_det --load_results ../data/mot17/results/val_half_det.json
13 | cd ..
14 | 


--------------------------------------------------------------------------------
/experiments/nuScenes_3Dtracking.sh:
--------------------------------------------------------------------------------
1 | TBD


--------------------------------------------------------------------------------
/experiments/pd.sh:
--------------------------------------------------------------------------------
 1 | # Initial model pre-trained on NuScenes3D: https://drive.google.com/open?id=1ZSG9swryMEfBJ104WH8CP7kcypCobFlU
 2 | # Resulting model trained on PD: https://tri-ml-public.s3.amazonaws.com/github/permatrack/pd_17fr_21ep_vis.pth
 3 | 
 4 | cd src
 5 | # train
 6 | python main.py tracking --exp_id pd_supinvis --occlusion_thresh 0.15 --visibility_thresh 0.05 --dataset pd_tracking --dataset_version val --same_aug_pre --hm_disturb 0.0 --lost_disturb 0.0 --fp_disturb 0.0 --gpus 0,1,2,3,4,5,6,7 --batch_size 2 --load_model ../models/nuScenes_3Ddetection_e140.pth --val_intervals 2 --is_recurrent --gru_filter_size 7 --input_len 17 --pre_thresh 0.4 --hm_weight 0.5 --num_epochs 21 --lr_step 7 --const_v_over_occl --sup_invis --invis_hm_weight 20 --use_occl_len --occl_len_mult 5 --num_iter 5000 --visibility --visibility_thresh_eval 0.2
 7 | # test
 8 | CUDA_VISIBLE_DEVICES=0 python test.py tracking --exp_id pd_supinvis --dataset pd_tracking --dataset_version val --track_thresh 0.4 --resume --is_recurrent --debug 4 --gru_filter_size 7 --num_gru_layers 1 --stream_test 
 9 | cd ..
10 | 


--------------------------------------------------------------------------------
/readme/GETTING_STARTED.md:
--------------------------------------------------------------------------------
 1 | # Getting Started
 2 | 
 3 | This document provides tutorials to train and evaluate PermaTrack. Before getting started, make sure you have finished [installation](INSTALL.md) and [dataset setup](DATA.md).
 4 | 
 5 | ## Benchmark evaluation
 6 | 
 7 | ### PD
 8 | 
 9 | To test our pretrained model on the validation set of PD, download the [model](https://tri-ml-public.s3.amazonaws.com/github/permatrack/pd_17fr_21ep_vis.pth), copy it to `$PermaTrack_ROOT/models/`, and run
10 | 
11 | ~~~
12 | cd $PermaTrack_ROOT/src
13 | python test.py tracking --exp_id pd --dataset pd_tracking --dataset_version val --track_thresh 0.4 --load_model ../models/pd_17fr_21ep_vis.pth --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --stream_test
14 | ~~~
15 | 
16 | This will give a Track mAP of `66.96` if set up correctly. You can append `--debug 4` to the above command to visualize the predictions.
17 | 
18 | Please note that we are ignoring ground truth invisible object annotations in the validation set of PD (methods are not penalized for missing those boxes), but we are using them to filter out predictions which have a high overlap with a ground truth invisible box (to avoid conting such predictions as false positives; this was important for a fair evaluation before we introdiced the visibility head). As a result, the perfromance of our method with and without visiblity estimation described in the paper does not change much on PD. In the main experiments we did not use the visiblity estimation during evaluation on PD, but you can add it by appending `--visibility --visibility_thresh_eval 0.2` to the above command. The expected Track mAP is `66.78`.
19 | 
20 | ### KITTI Tracking
21 | 
22 | To test the tracking performance on the validation set of KITTI with our pretrained model, download the [model](https://tri-ml-public.s3.amazonaws.com/github/permatrack/kitti_half_pd_5ep.pth), copy it to `$PermaTrack_ROOT/models/`, and run
23 | 
24 | ~~~
25 | python test.py tracking --exp_id kitti_half --dataset kitti_tracking --dataset_version val_half --track_thresh 0.4 --load_model ../models/kitti_half_pd_5ep.pth --is_recurrent --gru_filter_size 7  --num_gru_layers 1 --visibility --visibility_thresh_eval 0.2 --stream_test
26 | ~~~
27 | 
28 | The expected Track mAP is `70.53`. Here Track AP evluation also takes into account ignore regions in KITTI annotations (detections falling into these regions are not counted as false positives).
29 | 
30 | ### MOT17
31 | 
32 | To test the tracking performance on the validation set of MOT17, download the [model](https://tri-ml-public.s3.amazonaws.com/github/permatrack/mot_half.pth), copy it to `$PermaTrack_ROOT/models/`, and run
33 | 
34 | ~~~
35 | python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --track_thresh 0.4 --load_model ../models/mot_half_13fr_5ep_occlasinvis.pth --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility_thresh_eval 0.1 --stream_test --only_ped --ltrb_amodal --visibility
36 | ~~~
37 | 
38 | The expected IDF1 is `68.2`.
39 | 
40 | To test with Track Rebirth, run
41 | 
42 | ~~~
43 | python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --track_thresh 0.4 --load_model ../models/mot_half_13fr_5ep_occlasinvis.pth --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility_thresh_eval 0.1 --stream_test --only_ped --ltrb_amodal --visibility --max_age 32
44 | ~~~
45 | 
46 | The expected IDF1 is `71.9`.
47 | 
48 | To test with public detections, run
49 | 
50 | ~~~
51 | python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --track_thresh 0.4 --load_model ../models/mot_half_13fr_5ep_occlasinvis.pth --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility_thresh_eval 0.1 --stream_test --only_ped --ltrb_amodal --visibility --public_det --load_results ../data/mot17/results/val_half_det.json
52 | ~~~
53 | 
54 | The expected IDF1 is `67.0`.
55 | 
56 | ### nuScenes
57 | 
58 | To test the tracking performance on the validation set of nuScenes, download the [model](https://tri-ml-public.s3.amazonaws.com/github/permatrack/nu_stage_3_17fr.pth), copy it to `$PermaTrack_ROOT/models/`, update `motmetrics` with
59 | 
60 | ~~~
61 | pip install motmetrics==1.1.3
62 | ~~~
63 | 
64 | then run
65 | 
66 | ~~~
67 | CUDA_VISIBLE_DEVICES=1 python test.py tracking,ddd --exp_id nuscenes_tracking  --dataset nuscenes_tracking --track_thresh 0.1 --resume --is_recurrent --gru_filter_size 7 --stream_test --load_model ../models/nu_stage_3_17fr.pth --visibility
68 | ~~~
69 | 
70 | The expected AMOTA is `10.9`.
71 | 
72 | ## Training
73 | We have packed all the training scripts in the [experiments](../experiments) folder.
74 | Each model is trained on 8 Tesla V100 GPUs with 32GB of memory.
75 | If the training is terminated before finishing, you can use the same command with `--resume` to resume training. It will found the latest model with the same `exp_id`.
76 | All experiments rely on existing pretrained models, we provide the links to the corresponding models directly in the training scripts.
77 | 


--------------------------------------------------------------------------------
/readme/INSTALL.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | 
 4 | 1. We provide a Docker file to re-create the environment which was used in our experiments under `$PermaTrack_ROOT/docker/Dockerfile`. You can either configure the environment yourself using the docker file as a guide or build it via:
 5 |   ~~~
 6 |     cd $PermaTrack_ROOT
 7 |     make docker-build
 8 |     make docker-start-interactive
 9 |   ~~~ 
10 | 
11 | 2. The only step that has to be done manually is compiling of deformabel convolutions module.
12 | 
13 |   ~~~
14 |     cd $PermaTrack_ROOT/src/lib/model/networks/
15 |     git clone https://github.com/CharlesShang/DCNv2/ 
16 |     cd DCNv2
17 |     ./make.sh
18 |   ~~~
19 | 


--------------------------------------------------------------------------------
/readme/method.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/readme/method.png


--------------------------------------------------------------------------------
/src/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, 'lib')
12 | add_path(lib_path)
13 | 


--------------------------------------------------------------------------------
/src/lib/dataset/dataset_factory.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import pycocotools.coco as coco
 6 | from pycocotools.cocoeval import COCOeval
 7 | import numpy as np
 8 | import json
 9 | import os
10 | 
11 | from .datasets.mot import MOT
12 | from .datasets.crowdhuman import CrowdHuman
13 | from .datasets.kitti_tracking import KITTITracking
14 | from .datasets.pd_tracking import PDTracking
15 | from .datasets.custom_dataset import CustomDataset
16 | from .datasets.nuscenes_tracking import nuScenesTracking
17 | 
18 | dataset_factory = {
19 |   'custom': CustomDataset,
20 |   'mot': MOT,
21 |   'crowdhuman': CrowdHuman,
22 |   'kitti_tracking': KITTITracking,
23 |   'pd_tracking': PDTracking,
24 |   'nuscenes_tracking': nuScenesTracking
25 | }
26 | 
27 | 
28 | def get_dataset(dataset):
29 |   return dataset_factory[dataset]
30 | 


--------------------------------------------------------------------------------
/src/lib/dataset/datasets/crowdhuman.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import pycocotools.coco as coco
 6 | from pycocotools.cocoeval import COCOeval
 7 | import numpy as np
 8 | import json
 9 | import os
10 | 
11 | from ..generic_dataset import GenericDataset
12 | from ..video_dataset import VideoDataset
13 | 
14 | class CrowdHuman(VideoDataset):
15 |   num_categories = 1
16 |   num_joints = 17
17 |   default_resolution = [512, 512]
18 |   max_objs = 500
19 |   class_name = ['person']
20 |   cat_ids = {1: 1}
21 |   def __init__(self, opt, split):
22 |     data_dir = os.path.join(opt.data_dir, 'crowdhuman')
23 |     img_dir = os.path.join(
24 |       data_dir, 'CrowdHuman_{}'.format(split), 'Images')
25 |     ann_path = os.path.join(data_dir, 'annotations', 
26 |       '{}.json').format(split)
27 | 
28 |     print('==> initializing CityPersons {} data.'.format(split))
29 | 
30 |     self.images = None
31 |     # load image list and coco
32 |     super(CrowdHuman, self).__init__(opt, split, ann_path, img_dir)
33 | 
34 |     self.num_samples = len(self.images)
35 |     self.same_aug_pre = False
36 |     self.stride = 1
37 |     self.shift = 0.05
38 |     self.box_size_thresh = [0]
39 | 
40 |     print('Loaded {} {} samples'.format(split, self.num_samples))
41 | 
42 |   def _to_float(self, x):
43 |     return float("{:.2f}".format(x))
44 | 
45 |   def _save_results(self, records, fpath):
46 |     with open(fpath,'w') as fid:
47 |       for record in records:
48 |         line = json.dumps(record)+'\n'
49 |         fid.write(line)
50 |     return fpath
51 | 
52 |   def convert_eval_format(self, all_bboxes):
53 |     detections = []
54 |     person_id = 1
55 |     for image_id in all_bboxes:
56 |       if type(all_bboxes[image_id]) != type({}):
57 |         # newest format
58 |         dtboxes = []
59 |         for j in range(len(all_bboxes[image_id])):
60 |           item = all_bboxes[image_id][j]
61 |           if item['class'] != person_id:
62 |             continue
63 |           bbox = item['bbox']
64 |           bbox[2] -= bbox[0]
65 |           bbox[3] -= bbox[1]
66 |           bbox_out  = list(map(self._to_float, bbox[0:4]))
67 |           detection = {
68 |               "tag": 1,
69 |               "box": bbox_out,
70 |               "score": float("{:.2f}".format(item['score']))
71 |           }
72 |           dtboxes.append(detection)
73 |       img_info = self.coco.loadImgs(ids=[image_id])[0]
74 |       file_name = img_info['file_name']
75 |       detections.append({'ID': file_name[:-4], 'dtboxes': dtboxes})
76 |     return detections
77 | 
78 |   def __len__(self):
79 |     return self.num_samples
80 | 
81 |   def save_results(self, results, save_dir):
82 |     self._save_results(self.convert_eval_format(results),
83 |                        '{}/results_crowdhuman.odgt'.format(save_dir))
84 |   def run_eval(self, results, save_dir, write_to_file=False, dataset_version="withcrowd1000_visible_nocamerafilter_val"):
85 |     self.save_results(results, save_dir)
86 |     # try:
87 |     #   os.system('python tools/crowdhuman_eval/demo.py ' + \
88 |     #             '../data/crowdhuman/annotation_val.odgt ' + \
89 |     #             '{}/results_crowdhuman.odgt'.format(save_dir))
90 |     # except:
91 |     #   print('Crowdhuman evaluation not setup!')
92 | 


--------------------------------------------------------------------------------
/src/lib/dataset/datasets/custom_dataset.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from ..generic_dataset import GenericDataset
 6 | 
 7 | class CustomDataset(GenericDataset):
 8 |   num_categories = 1
 9 |   default_resolution = [-1, -1]
10 |   class_name = ['']
11 |   max_objs = 128
12 |   cat_ids = {1: 1}
13 |   def __init__(self, opt, split):
14 |     assert (opt.custom_dataset_img_path != '') and \
15 |       (opt.custom_dataset_ann_path != '') and \
16 |       (opt.num_classes != -1) and \
17 |       (opt.input_h != -1) and (opt.input_w != -1), \
18 |       'The following arguments must be specified for custom datasets: ' + \
19 |       'custom_dataset_img_path, custom_dataset_ann_path, num_classes, ' + \
20 |       'input_h, input_w.'
21 |     img_dir = opt.custom_dataset_img_path
22 |     ann_path = opt.custom_dataset_ann_path
23 |     self.num_categories = opt.num_classes
24 |     self.class_name = ['' for _ in range(self.num_categories)]
25 |     self.default_resolution = [opt.input_h, opt.input_w]
26 |     self.cat_ids = {i: i for i in range(1, self.num_categories + 1)}
27 | 
28 |     self.images = None
29 |     # load image list and coco
30 |     super().__init__(opt, split, ann_path, img_dir)
31 | 
32 |     self.num_samples = len(self.images)
33 |     print('Loaded Custom dataset {} samples'.format(self.num_samples))
34 |   
35 |   def __len__(self):
36 |     return self.num_samples
37 | 
38 |   def run_eval(self, results, save_dir):
39 |     pass
40 | 


--------------------------------------------------------------------------------
/src/lib/dataset/datasets/mot.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import pycocotools.coco as coco
  6 | from pycocotools.cocoeval import COCOeval
  7 | import numpy as np
  8 | import json
  9 | import os
 10 | from collections import defaultdict
 11 | from ..video_dataset import VideoDataset
 12 | 
 13 | class MOT(VideoDataset):
 14 |   num_categories = 1
 15 |   default_resolution = [544, 960]
 16 |   class_name = ['']
 17 |   max_objs = 256
 18 |   cat_ids = {1: 1, -1: -1}
 19 |   def __init__(self, opt, split):
 20 |     self.dataset_version = opt.dataset_version
 21 |     self.year = 17
 22 |     print('Using MOT {} {}'.format(self.year, self.dataset_version))
 23 |     data_dir = os.path.join(opt.data_dir, 'mot{}'.format(self.year))
 24 | 
 25 |     if split == 'val':
 26 |       ann_file = '{}.json'.format('val_half')
 27 |     elif split == 'test':
 28 |       ann_file = '{}.json'.format('test')
 29 |     elif self.dataset_version == '17trainval':
 30 |       ann_file = '{}.json'.format('train_interp')
 31 |     else:
 32 |       ann_file = '{}.json'.format('train_half_interp')
 33 | 
 34 |     img_dir = os.path.join(data_dir, '{}'.format(
 35 |       'test' if 'test' in self.dataset_version else 'train'))
 36 |     
 37 |     print('ann_file', ann_file)
 38 |     ann_path = os.path.join(data_dir, 'annotations', ann_file)
 39 | 
 40 |     self.images = None
 41 |     # load image list and coco
 42 |     super(MOT, self).__init__(opt, split, ann_path, img_dir)
 43 | 
 44 |     self.num_samples = len(self.images)
 45 |     self.box_size_thresh = [0]
 46 |     print('Loaded MOT {} {} {} samples'.format(
 47 |       self.dataset_version, split, self.num_samples))
 48 | 
 49 |   def _to_float(self, x):
 50 |     return float("{:.2f}".format(x))
 51 | 
 52 |   def __len__(self):
 53 |     return self.num_samples
 54 | 
 55 |   def save_results(self, results, save_dir):
 56 |     results_dir = os.path.join(save_dir, 'results_mot{}'.format(self.dataset_version))
 57 |     if not os.path.exists(results_dir):
 58 |       os.mkdir(results_dir)
 59 |     for video in self.coco.dataset['videos']:
 60 |       video_id = video['id']
 61 |       file_name = video['file_name']
 62 |       out_path = os.path.join(results_dir, '{}.txt'.format(file_name))
 63 |       f = open(out_path, 'w')
 64 |       images = self.video_to_images[video_id]
 65 |       tracks = defaultdict(list)
 66 |       for image_info in images:
 67 |         if not (image_info['id'] in results):
 68 |           continue
 69 |         result = results[image_info['id']]
 70 |         frame_id = image_info['frame_id']
 71 |         for item in result:
 72 |           if item['age'] != 1:
 73 |             continue
 74 |           if 'visibility' in item and not item['visibility']:
 75 |             continue
 76 |           if not ('tracking_id' in item):
 77 |             item['tracking_id'] = np.random.randint(100000)
 78 |           if item['age'] != 1:
 79 |             continue
 80 |           if 'visibility' in item and not item['visibility']:
 81 |             continue
 82 |           tracking_id = item['tracking_id']
 83 |           bbox = item['bbox']
 84 |           bbox = [bbox[0], bbox[1], bbox[2], bbox[3]]
 85 |           tracks[tracking_id].append([frame_id] + bbox)
 86 |       rename_track_id = 0
 87 |       for track_id in sorted(tracks):
 88 |         rename_track_id += 1
 89 |         for t in tracks[track_id]:
 90 |           f.write('{},{},{:.2f},{:.2f},{:.2f},{:.2f},-1,-1,-1,-1\n'.format(
 91 |             t[0], rename_track_id, t[1], t[2], t[3]-t[1], t[4]-t[2]))
 92 |       f.close()
 93 |   
 94 |   def run_eval(self, results, save_dir, write_to_file=False, dataset_version=""):
 95 |     self.save_results(results, save_dir)
 96 |     gt_type_str = '{}'.format(
 97 |                 '_train_half' if '17halftrain' in self.opt.dataset_version \
 98 |                 else '_val_half' if '17halfval' in self.opt.dataset_version \
 99 |                 else '')
100 |     gt_type_str = '--gt_type {}'.format(gt_type_str) if gt_type_str != '' else ''
101 |     print('python tools/eval_motchallenge.py ' + \
102 |               '../data/mot{}/{}/ '.format(self.year, 'train') + \
103 |               '{}/results_mot{}/ '.format(save_dir, self.dataset_version) + \
104 |               gt_type_str + ' --eval_official')
105 |     os.system('python tools/eval_motchallenge.py ' + \
106 |               '../data/mot{}/{}/ '.format(self.year, 'train') + \
107 |               '{}/results_mot{}/ '.format(save_dir, self.dataset_version) + \
108 |               gt_type_str + ' --eval_official')
109 | 


--------------------------------------------------------------------------------
/src/lib/dataset/datasets/pd_tracking.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import pycocotools.coco as coco
  6 | import numpy as np
  7 | import torch
  8 | import json
  9 | import cv2
 10 | import os
 11 | import math
 12 | 
 13 | from ..video_dataset import VideoDataset
 14 | 
 15 | class PDTracking(VideoDataset):
 16 |   num_categories = 5
 17 |   dataset_folder = 'pd'
 18 |   default_resolution = [384, 960]
 19 |   class_name = ['Pedestrian', 'Car', 'Cyclist', 'Caravan/RV', 'Truck']
 20 |   # negative id is for "not as negative sample for abs(id)".
 21 |   # 0 for ignore losses for all categories in the bounding box region
 22 |   # ['Pedestrian', 'Car', 'Bicyclist', 'Bus', 'Caravan/RV',  'OtherMovable',
 23 |   # 'Motorcycle', 'Motorcyclist', 'OtherRider', 'Train', 'Truck', 'Dontcare']
 24 |   cat_ids = {1:1, 2:2, 3:3, 4:-9999, 5:4, 6:-2, 7:-9999, 8:-1, 9:-1, 10:-9999, 11:5}
 25 |   max_objs = 500
 26 |   def __init__(self, opt, split, rank=None):
 27 |     data_dir = os.path.join(opt.data_dir, self.dataset_folder)
 28 |     split_ = 'train' if opt.dataset_version != 'test' else 'test' #'test'
 29 |     img_dir = data_dir
 30 |     if split == 'train':
 31 |       ann_file_ = "train"
 32 |     else:
 33 |       ann_file_ = 'val'
 34 |     ann_path = os.path.join(
 35 |       data_dir, 'annotations', 'tracking_{}.json'.format(
 36 |         ann_file_))
 37 |     self.images = None
 38 |     super(PDTracking, self).__init__(opt, split, ann_path, img_dir)
 39 | 
 40 |     self.box_size_thresh = [300, 500, 300, 500, 500]
 41 | 
 42 |     if opt.only_ped:
 43 |       self.num_categories = 1
 44 |       self.class_name = ['person']
 45 |       self.cat_ids = {1:1, 2:-9999, 3:-1, 4:-9999, 5:-9999, 6:-9999, 7:-9999, 8:-1, 9:-1, 10:-9999, 11:-9999}
 46 |       self.box_size_thresh = [300]
 47 | 
 48 |     if opt.nu:
 49 |       self.num_categories = 8
 50 |       self.class_name = ['Car', 'Truck', 'Bus', 'Trailer', 'construction_vehicle', 'Pedestrian', 'Motorcycle', 'Bicycle']
 51 |       self.cat_ids =  {1:6, 2:1, 3:0, 4:3, 5:1, 6:-1, 7:-7, 8:0, 9:0, 10:-9999, 11:2, 12:5, 13:-8}
 52 |       self.box_size_thresh = [500, 500, 500, 500, 500, 300, 500, 500]
 53 | 
 54 |     self.alpha_in_degree = False
 55 |     self.depth_scale = 1
 56 |     self.dep_mask = 0
 57 |     self.dim_mask = 1
 58 |     self.rot_mask = 0
 59 |     self.amodel_offset_mask = 0
 60 |     self.ignore_amodal = True
 61 |     self.num_samples = len(self.images)
 62 |     self.exp_id = opt.exp_id
 63 |     if opt.const_v_over_occl:
 64 |       self.const_v_over_occl = True
 65 | 
 66 |     print('Loaded {} {} samples'.format(split, self.num_samples))
 67 | 
 68 |   def save_results_ioueval(self, results, save_dir):
 69 |     formattted_results = []
 70 |     if not os.path.exists(save_dir):
 71 |       os.mkdir(save_dir)
 72 | 
 73 |     for video in self.coco.dataset['videos']:
 74 |       video_id = video['id']
 75 |       images = self.video_to_images[video_id]
 76 |       
 77 |       for image_info in images:
 78 |         img_id = image_info['id']
 79 |         if not (img_id in results):
 80 |           continue
 81 |         frame_id = image_info['frame_id'] 
 82 |         for i in range(len(results[img_id])):
 83 |           item = results[img_id][i]
 84 |           if item['age'] != 1:
 85 |             continue
 86 |           if 'visibility' in item and not item['visibility']:
 87 |             continue
 88 |           category_id = item['class']
 89 |           track_id = item['tracking_id'] if 'tracking_id' in item else -1   
 90 |           bbox = [item['bbox'][0].item(), item['bbox'][1].item(), item['bbox'][2].item() - item['bbox'][0].item(), item['bbox'][3].item() - item['bbox'][1].item()]
 91 | 
 92 |           entry = {'video_id': video_id, 'image_id': img_id, 'category_id': category_id, 'track_id': track_id, 'bbox': bbox, 'score': item['score'].item()}
 93 |           formattted_results.append(entry)
 94 |     
 95 |     print(save_dir + '/iou_eval.json')
 96 |     json.dump(formattted_results, open(save_dir + '/iou_eval.json', 'w'))
 97 | 
 98 |   def run_eval(self, results, save_dir, write_to_file=False, dataset_version="val"):
 99 |     self.save_results_ioueval(results, save_dir)
100 |     os.chdir("../tao")
101 |     command = 'python scripts/evaluation/evaluate.py ' + \
102 |               '../data/%s/annotations/tracking_%s_tao.json ' % (self.dataset_folder, dataset_version) + \
103 |               '{}/iou_eval.json'.format(save_dir) + ' --config-updates CATEGORIES 1,2'
104 | 
105 |     if write_to_file:
106 |       print("Writing to file")
107 |       command += ' > ../exp/tracking/{}/eval_out.txt'.format(self.exp_id)
108 |     os.system(command)
109 | 
110 |   def __len__(self):
111 |     return self.num_samples
112 | 
113 |   def _to_float(self, x):
114 |     return float("{:.2f}".format(x))
115 | 


--------------------------------------------------------------------------------
/src/lib/dataset/joint_loader.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | class JointIterator:
 4 |     def __init__(self, iter1, iter2, dataset1, dataset2):
 5 |         self.iter1 = iter1
 6 |         self.iter2 = iter2
 7 |         self.num_steps = [5, 5]
 8 |         self.loader_ind = 0
 9 |         self.counter = self.num_steps[self.loader_ind]
10 |         self.dataset1 = dataset1
11 |         self.dataset2 = dataset2
12 | 
13 |     def __next__(self):
14 |         if self.counter == 0:
15 |             ind = random.randint(0, 1)
16 |             self.loader_ind = ind
17 |             self.counter = self.num_steps[ind]
18 | 
19 |         if self.loader_ind == 0:
20 |             result = next(self.iter1, None)
21 |             if result is None:
22 |                 self.iter1 = iter(self.dataset1)
23 |                 result = next(self.iter1, None)
24 |                 if result is None:
25 |                     raise StopIteration
26 |         else:
27 |             result = next(self.iter2, None)
28 |             if result is None:
29 |                 self.iter2 = iter(self.dataset2)
30 |                 result = next(self.iter2, None)
31 |                 if result is None:
32 |                     raise StopIteration
33 | 
34 |         self.counter -= 1
35 | 
36 |         return result
37 | 
38 | class JointLoader:
39 | 
40 |     def __init__(self, dataset1, dataset2):
41 |         self.dataset1 = dataset1
42 |         self.dataset2 = dataset2
43 |         self.dataset = dataset1.dataset
44 | 
45 |     def __iter__(self):
46 |         return JointIterator(iter(self.dataset1), iter(self.dataset2), self.dataset1, self.dataset2)
47 | 
48 |     def __len__(self):
49 |         return len(self.dataset1) + len(self.dataset2)
50 | 


--------------------------------------------------------------------------------
/src/lib/external/.gitignore:
--------------------------------------------------------------------------------
1 | bbox.c
2 | bbox.cpython-35m-x86_64-linux-gnu.so
3 | bbox.cpython-36m-x86_64-linux-gnu.so
4 | 
5 | nms.c
6 | nms.cpython-35m-x86_64-linux-gnu.so
7 | nms.cpython-36m-x86_64-linux-gnu.so
8 | 


--------------------------------------------------------------------------------
/src/lib/external/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 


--------------------------------------------------------------------------------
/src/lib/external/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/src/lib/external/__init__.py


--------------------------------------------------------------------------------
/src/lib/external/setup.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | from distutils.core import setup
 3 | from distutils.extension import Extension
 4 | from Cython.Build import cythonize
 5 | 
 6 | extensions = [
 7 |     Extension(
 8 |         "nms", 
 9 |         ["nms.pyx"],
10 |         extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
11 |     )
12 | ]
13 | 
14 | setup(
15 |     name="coco",
16 |     ext_modules=cythonize(extensions),
17 |     include_dirs=[numpy.get_include()]
18 | )
19 | 


--------------------------------------------------------------------------------
/src/lib/logger.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
 6 | import os
 7 | import time
 8 | import sys
 9 | import torch
10 | import subprocess
11 | USE_TENSORBOARD = True
12 | try:
13 |   import tensorboardX
14 |   print('Using tensorboardX')
15 | except:
16 |   USE_TENSORBOARD = False
17 | 
18 | class Logger(object):
19 |   def __init__(self, opt):
20 |     """Create a summary writer logging to log_dir."""
21 |     if not os.path.exists(opt.save_dir):
22 |       os.makedirs(opt.save_dir)
23 |     if not os.path.exists(opt.debug_dir):
24 |       os.makedirs(opt.debug_dir)
25 |    
26 |     time_str = time.strftime('%Y-%m-%d-%H-%M')
27 | 
28 |     args = dict((name, getattr(opt, name)) for name in dir(opt)
29 |                 if not name.startswith('_'))
30 |     file_name = os.path.join(opt.save_dir, 'opt.txt')
31 |     with open(file_name, 'wt') as opt_file:
32 |       opt_file.write('==> torch version: {}\n'.format(torch.__version__))
33 |       opt_file.write('==> cudnn version: {}\n'.format(
34 |         torch.backends.cudnn.version()))
35 |       opt_file.write('==> Cmd:\n')
36 |       opt_file.write(str(sys.argv))
37 |       opt_file.write('\n==> Opt:\n')
38 |       for k, v in sorted(args.items()):
39 |         opt_file.write('  %s: %s\n' % (str(k), str(v)))
40 |           
41 |     log_dir = opt.save_dir + '/logs_{}'.format(time_str)
42 |     if USE_TENSORBOARD:
43 |       self.writer = tensorboardX.SummaryWriter(log_dir=log_dir)
44 |     else:
45 |       if not os.path.exists(os.path.dirname(log_dir)):
46 |         os.mkdir(os.path.dirname(log_dir))
47 |       if not os.path.exists(log_dir):
48 |         os.mkdir(log_dir)
49 |     self.log = open(log_dir + '/log.txt', 'w')
50 |     try:
51 |       os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir))
52 |     except:
53 |       pass
54 |     self.start_line = True
55 | 
56 |   def write(self, txt):
57 |     if self.start_line:
58 |       time_str = time.strftime('%Y-%m-%d-%H-%M')
59 |       self.log.write('{}: {}'.format(time_str, txt))
60 |     else:
61 |       self.log.write(txt)  
62 |     self.start_line = False
63 |     if '\n' in txt:
64 |       self.start_line = True
65 |       self.log.flush()
66 |   
67 |   def close(self):
68 |     self.log.close()
69 |   
70 |   def scalar_summary(self, tag, value, step):
71 |     """Log a scalar variable."""
72 |     if USE_TENSORBOARD:
73 |       self.writer.add_scalar(tag, value, step)
74 | 


--------------------------------------------------------------------------------
/src/lib/model/data_parallel.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn.modules import Module
  3 | from torch.nn.parallel.scatter_gather import gather
  4 | from torch.nn.parallel.replicate import replicate
  5 | from torch.nn.parallel.parallel_apply import parallel_apply
  6 | 
  7 | 
  8 | from .scatter_gather import scatter_kwargs
  9 | 
 10 | class _DataParallel(Module):
 11 |     r"""Implements data parallelism at the module level.
 12 | 
 13 |     This container parallelizes the application of the given module by
 14 |     splitting the input across the specified devices by chunking in the batch
 15 |     dimension. In the forward pass, the module is replicated on each device,
 16 |     and each replica handles a portion of the input. During the backwards
 17 |     pass, gradients from each replica are summed into the original module.
 18 | 
 19 |     The batch size should be larger than the number of GPUs used. It should
 20 |     also be an integer multiple of the number of GPUs so that each chunk is the
 21 |     same size (so that each GPU processes the same number of samples).
 22 | 
 23 |     See also: :ref:`cuda-nn-dataparallel-instead`
 24 | 
 25 |     Arbitrary positional and keyword inputs are allowed to be passed into
 26 |     DataParallel EXCEPT Tensors. All variables will be scattered on dim
 27 |     specified (default 0). Primitive types will be broadcasted, but all
 28 |     other types will be a shallow copy and can be corrupted if written to in
 29 |     the model's forward pass.
 30 | 
 31 |     Args:
 32 |         module: module to be parallelized
 33 |         device_ids: CUDA devices (default: all devices)
 34 |         output_device: device location of output (default: device_ids[0])
 35 | 
 36 |     Example::
 37 | 
 38 |         >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
 39 |         >>> output = net(input_var)
 40 |     """
 41 | 
 42 |     # TODO: update notes/cuda.rst when this class handles 8+ GPUs well
 43 | 
 44 |     def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
 45 |         super(_DataParallel, self).__init__()
 46 | 
 47 |         if not torch.cuda.is_available():
 48 |             self.module = module
 49 |             self.device_ids = []
 50 |             return
 51 | 
 52 |         if device_ids is None:
 53 |             device_ids = list(range(torch.cuda.device_count()))
 54 |         if output_device is None:
 55 |             output_device = device_ids[0]
 56 |         self.dim = dim
 57 |         self.module = module
 58 |         self.device_ids = device_ids
 59 |         self.chunk_sizes = chunk_sizes
 60 |         self.output_device = output_device
 61 |         if len(self.device_ids) == 1:
 62 |             self.module.cuda(device_ids[0])
 63 | 
 64 |     def forward(self, *inputs, **kwargs):
 65 |         if not self.device_ids:
 66 |             return self.module(*inputs, **kwargs)
 67 |         inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
 68 |         if len(self.device_ids) == 1:
 69 |             return self.module(*inputs[0], **kwargs[0])
 70 |         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
 71 |         outputs = self.parallel_apply(replicas, inputs, kwargs)
 72 |         return self.gather(outputs, self.output_device)
 73 | 
 74 |     def replicate(self, module, device_ids):
 75 |         return replicate(module, device_ids)
 76 | 
 77 |     def scatter(self, inputs, kwargs, device_ids, chunk_sizes):
 78 |         return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes)
 79 | 
 80 |     def parallel_apply(self, replicas, inputs, kwargs):
 81 |         return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
 82 | 
 83 |     def gather(self, outputs, output_device):
 84 |         return gather(outputs, output_device, dim=self.dim)
 85 | 
 86 | 
 87 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
 88 |     r"""Evaluates module(input) in parallel across the GPUs given in device_ids.
 89 | 
 90 |     This is the functional version of the DataParallel module.
 91 | 
 92 |     Args:
 93 |         module: the module to evaluate in parallel
 94 |         inputs: inputs to the module
 95 |         device_ids: GPU ids on which to replicate module
 96 |         output_device: GPU location of the output  Use -1 to indicate the CPU.
 97 |             (default: device_ids[0])
 98 |     Returns:
 99 |         a Variable containing the result of module(input) located on
100 |         output_device
101 |     """
102 |     if not isinstance(inputs, tuple):
103 |         inputs = (inputs,)
104 | 
105 |     if device_ids is None:
106 |         device_ids = list(range(torch.cuda.device_count()))
107 | 
108 |     if output_device is None:
109 |         output_device = device_ids[0]
110 | 
111 |     inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
112 |     if len(device_ids) == 1:
113 |         return module(*inputs[0], **module_kwargs[0])
114 |     used_device_ids = device_ids[:len(inputs)]
115 |     replicas = replicate(module, used_device_ids)
116 |     outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
117 |     return gather(outputs, output_device, dim)
118 | 
119 | def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
120 |     if chunk_sizes is None:
121 |         return torch.nn.DataParallel(module, device_ids, output_device, dim)
122 |     standard_size = True
123 |     for i in range(1, len(chunk_sizes)):
124 |         if chunk_sizes[i] != chunk_sizes[0]:
125 |             standard_size = False
126 |     if standard_size:
127 |         return torch.nn.DataParallel(module, device_ids, output_device, dim)
128 |     return _DataParallel(module, device_ids, output_device, dim, chunk_sizes)


--------------------------------------------------------------------------------
/src/lib/model/matcher.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from scipy.optimize import linear_sum_assignment
  3 | from torch import nn
  4 | 
  5 | from torchvision.ops.boxes import box_area
  6 | 
  7 | 
  8 | # modified from torchvision to also return the union
  9 | def box_iou(boxes1, boxes2):
 10 |     area1 = box_area(boxes1)
 11 |     area2 = box_area(boxes2)
 12 | 
 13 |     lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
 14 |     rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
 15 | 
 16 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
 17 |     inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
 18 | 
 19 |     union = area1[:, None] + area2 - inter
 20 | 
 21 |     iou = inter / union
 22 |     return iou, union
 23 | 
 24 | def box_cxcywh_to_xyxy(x):
 25 |     x_c, y_c, w, h = x.unbind(-1)
 26 |     b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
 27 |          (x_c + 0.5 * w), (y_c + 0.5 * h)]
 28 |     return torch.stack(b, dim=-1)
 29 | 
 30 | def generalized_box_iou(boxes1, boxes2):
 31 |     """
 32 |     Generalized IoU from https://giou.stanford.edu/
 33 |     The boxes should be in [x0, y0, x1, y1] format
 34 |     Returns a [N, M] pairwise matrix, where N = len(boxes1)
 35 |     and M = len(boxes2)
 36 |     """
 37 |     # degenerate boxes gives inf / nan results
 38 |     # so do an early check
 39 |     assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
 40 |     assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
 41 |     iou, union = box_iou(boxes1, boxes2)
 42 | 
 43 |     lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
 44 |     rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
 45 | 
 46 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
 47 |     area = wh[:, :, 0] * wh[:, :, 1]
 48 | 
 49 |     return iou - (area - union) / area
 50 | 
 51 | 
 52 | class HungarianMatcher(nn.Module):
 53 |     """This class computes an assignment between the targets and the predictions of the network
 54 |     For efficiency reasons, the targets don't include the no_object. Because of this, in general,
 55 |     there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
 56 |     while the others are un-matched (and thus treated as non-objects).
 57 |     """
 58 | 
 59 |     def __init__(self, cost_class: float = 1, cost_bbox: float = 1, cost_giou: float = 1):
 60 |         """Creates the matcher
 61 |         Params:
 62 |             cost_class: This is the relative weight of the classification error in the matching cost
 63 |             cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost
 64 |             cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost
 65 |         """
 66 |         super().__init__()
 67 |         self.cost_class = cost_class
 68 |         self.cost_bbox = cost_bbox
 69 |         self.cost_giou = cost_giou
 70 |         assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0, "all costs cant be 0"
 71 | 
 72 |     @torch.no_grad()
 73 |     def forward(self, outputs, targets):
 74 |         """ Performs the matching
 75 |         Params:
 76 |             outputs: This is a dict that contains at least these entries:
 77 |                  "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
 78 |                  "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
 79 |             targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
 80 |                  "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
 81 |                            objects in the target) containing the class labels
 82 |                  "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
 83 |         Returns:
 84 |             A list of size batch_size, containing tuples of (index_i, index_j) where:
 85 |                 - index_i is the indices of the selected predictions (in order)
 86 |                 - index_j is the indices of the corresponding selected targets (in order)
 87 |             For each batch element, it holds:
 88 |                 len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
 89 |         """
 90 |         bs, num_queries = outputs["pred_logits"].shape[:2]
 91 | 
 92 |         # We flatten to compute the cost matrices in a batch
 93 |         out_prob = outputs["pred_logits"].flatten(0, 1).softmax(-1)  # [batch_size * num_queries, num_classes]
 94 |         out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]
 95 | 
 96 |         # Also concat the target labels and boxes
 97 |         tgt_ids = torch.cat([v["labels"] for v in targets])
 98 |         tgt_bbox = torch.cat([v["boxes"] for v in targets])
 99 | 
100 |         # Compute the classification cost. Contrary to the loss, we don't use the NLL,
101 |         # but approximate it in 1 - proba[target class].
102 |         # The 1 is a constant that doesn't change the matching, it can be ommitted.
103 |         cost_class = -out_prob[:, tgt_ids]
104 | 
105 |         # Compute the L1 cost between boxes
106 |         cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
107 | 
108 |         # Compute the giou cost betwen boxes
109 |         cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox))
110 | 
111 |         # Final cost matrix
112 |         C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou
113 |         C = C.view(bs, num_queries, -1).cpu()
114 | 
115 |         sizes = [len(v["boxes"]) for v in targets]
116 |         indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
117 |         return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]


--------------------------------------------------------------------------------
/src/lib/model/model.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import torchvision.models as models
  6 | import torch
  7 | import torch.nn as nn
  8 | import os
  9 | import copy
 10 | 
 11 | from .networks.dla import DLASeg
 12 | from .networks.resdcn import PoseResDCN
 13 | from .networks.resnet import PoseResNet
 14 | from .networks.dlav0 import DLASegv0
 15 | from .networks.generic_network import GenericNetwork
 16 | 
 17 | _network_factory = {
 18 |   'resdcn': PoseResDCN,
 19 |   'dla': DLASeg,
 20 |   'res': PoseResNet,
 21 |   'dlav0': DLASegv0,
 22 |   'generic': GenericNetwork
 23 | }
 24 | 
 25 | def create_model(arch, head, head_conv, opt=None):
 26 |   num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
 27 |   arch = arch[:arch.find('_')] if '_' in arch else arch
 28 |   model_class = _network_factory[arch]
 29 |   model = model_class(num_layers, heads=head, head_convs=head_conv, opt=opt)
 30 |   return model
 31 | 
 32 | 
 33 | def load_model(model, model_path, opt, optimizer=None):
 34 |   start_epoch = 0
 35 |   checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
 36 |   print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
 37 |   state_dict_ = checkpoint['state_dict']
 38 |   state_dict = {}
 39 |    
 40 |   # convert data_parallal to model
 41 |   for k in state_dict_:
 42 |     if k.startswith('module') and not k.startswith('module_list'):
 43 |       state_dict[k[7:]] = state_dict_[k]
 44 |     else:
 45 |       state_dict[k] = state_dict_[k]
 46 |   model_state_dict = model.state_dict()
 47 | 
 48 |   # check loaded parameters and created model parameters
 49 |   for k in state_dict:
 50 |     if k in model_state_dict:
 51 |       if (state_dict[k].shape != model_state_dict[k].shape) or \
 52 |         (opt.reset_hm and k.startswith('hm') and (state_dict[k].shape[0] in [80, 1])):
 53 |         if opt.reuse_hm:
 54 |           print('Reusing parameter {}, required shape{}, '\
 55 |                 'loaded shape{}.'.format(
 56 |             k, model_state_dict[k].shape, state_dict[k].shape))
 57 |           if state_dict[k].shape[0] < state_dict[k].shape[0]:
 58 |             model_state_dict[k][:state_dict[k].shape[0]] = state_dict[k]
 59 |           else:
 60 |             model_state_dict[k] = state_dict[k][:model_state_dict[k].shape[0]]
 61 |           state_dict[k] = model_state_dict[k]
 62 |         else:
 63 |           print('Skip loading parameter {}, required shape{}, '\
 64 |                 'loaded shape{}.'.format(
 65 |             k, model_state_dict[k].shape, state_dict[k].shape))
 66 |           state_dict[k] = model_state_dict[k]
 67 |     else:
 68 |       print('Drop parameter {}.'.format(k))
 69 | 
 70 |   for k in model_state_dict:
 71 |     if not (k in state_dict):
 72 |       print('No param {}.'.format(k))
 73 |       state_dict[k] = model_state_dict[k]
 74 |   model.load_state_dict(state_dict, strict=False)
 75 | 
 76 |   # resume optimizer parameters
 77 |   if optimizer is not None and opt.resume:
 78 |     if 'optimizer' in checkpoint:
 79 |       # optimizer.load_state_dict(checkpoint['optimizer'])
 80 |       start_epoch = checkpoint['epoch']
 81 |       start_lr = opt.lr
 82 |       for step in opt.lr_step:
 83 |         if start_epoch >= step:
 84 |           start_lr *= 0.1
 85 |       for param_group in optimizer.param_groups:
 86 |         param_group['lr'] = start_lr
 87 |       print('Resumed optimizer with start lr', start_lr)
 88 |     else:
 89 |       print('No optimizer parameters in checkpoint.')
 90 |   if optimizer is not None:
 91 |     return model, optimizer, start_epoch
 92 |   else:
 93 |     return model
 94 | 
 95 | def save_model(path, epoch, model, optimizer=None):
 96 |   if isinstance(model, torch.nn.DataParallel):
 97 |     state_dict = model.module.state_dict()
 98 |   else:
 99 |     state_dict = model.state_dict()
100 |   data = {'epoch': epoch,
101 |           'state_dict': state_dict}
102 |   if not (optimizer is None):
103 |     data['optimizer'] = optimizer.state_dict()
104 |   torch.save(data, path)
105 | 
106 | 


--------------------------------------------------------------------------------
/src/lib/model/networks/generic_network.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import torch
  6 | from torch import nn
  7 | from .backbones.dla import dla34
  8 | from .backbones.resnet import Resnet
  9 | from .backbones.mobilenet import MobileNetV2
 10 | from .necks.dlaup import DLASeg
 11 | from .necks.msraup import MSRAUp
 12 | 
 13 | backbone_factory = {
 14 |   'dla34': dla34,
 15 |   'resnet': Resnet,
 16 |   'mobilenet': MobileNetV2
 17 | }
 18 | 
 19 | neck_factory = {
 20 |   'dlaup': DLASeg,
 21 |   'msraup': MSRAUp
 22 | }
 23 | 
 24 | def fill_fc_weights(layers):
 25 |     for m in layers.modules():
 26 |         if isinstance(m, nn.Conv2d):
 27 |             if m.bias is not None:
 28 |                 nn.init.constant_(m.bias, 0)
 29 | 
 30 | class GenericNetwork(nn.Module):
 31 |     def __init__(self, num_layers, heads, head_convs, num_stacks=1, opt=None):
 32 |         super(GenericNetwork, self).__init__()
 33 |         print('Using generic model with backbone {} and neck {}'.format(
 34 |           opt.backbone, opt.neck))
 35 |         # assert (not opt.pre_hm) and (not opt.pre_img)
 36 |         if opt is not None and opt.head_kernel != 3:
 37 |           print('Using head kernel:', opt.head_kernel)
 38 |           head_kernel = opt.head_kernel
 39 |         else:
 40 |           head_kernel = 3
 41 |         self.opt = opt
 42 |         self.backbone = backbone_factory[opt.backbone](opt=opt)
 43 |         channels = self.backbone.channels
 44 |         self.neck = neck_factory[opt.neck](opt=opt, channels=channels)
 45 |         last_channel = self.neck.out_channel
 46 |         self.num_stacks = num_stacks
 47 |         self.heads = heads
 48 |         for head in self.heads:
 49 |             classes = self.heads[head]
 50 |             head_conv = head_convs[head]
 51 |             if len(head_conv) > 0:
 52 |               out = nn.Conv2d(head_conv[-1], classes, 
 53 |                     kernel_size=1, stride=1, padding=0, bias=True)
 54 |               conv = nn.Conv2d(last_channel, head_conv[0],
 55 |                                kernel_size=head_kernel, 
 56 |                                padding=head_kernel // 2, bias=True)
 57 |               convs = [conv]
 58 |               for k in range(1, len(head_conv)):
 59 |                   convs.append(nn.Conv2d(head_conv[k - 1], head_conv[k], 
 60 |                                kernel_size=1, bias=True))
 61 |               if len(convs) == 1:
 62 |                 fc = nn.Sequential(conv, nn.ReLU(inplace=True), out)
 63 |               elif len(convs) == 2:
 64 |                 fc = nn.Sequential(
 65 |                   convs[0], nn.ReLU(inplace=True), 
 66 |                   convs[1], nn.ReLU(inplace=True), out)
 67 |               elif len(convs) == 3:
 68 |                 fc = nn.Sequential(
 69 |                     convs[0], nn.ReLU(inplace=True), 
 70 |                     convs[1], nn.ReLU(inplace=True), 
 71 |                     convs[2], nn.ReLU(inplace=True), out)
 72 |               elif len(convs) == 4:
 73 |                 fc = nn.Sequential(
 74 |                     convs[0], nn.ReLU(inplace=True), 
 75 |                     convs[1], nn.ReLU(inplace=True), 
 76 |                     convs[2], nn.ReLU(inplace=True), 
 77 |                     convs[3], nn.ReLU(inplace=True), out)
 78 |               if 'hm' in head:
 79 |                 fc[-1].bias.data.fill_(opt.prior_bias)
 80 |               else:
 81 |                 fill_fc_weights(fc)
 82 |             else:
 83 |               fc = nn.Conv2d(last_channel, classes, 
 84 |                   kernel_size=1, stride=1, padding=0, bias=True)
 85 |               if 'hm' in head:
 86 |                 fc.bias.data.fill_(opt.prior_bias)
 87 |               else:
 88 |                 fill_fc_weights(fc)
 89 |             self.__setattr__(head, fc)
 90 | 
 91 |     def forward(self, x, pre_img=None, pre_hm=None):
 92 |       y = self.backbone(x, pre_img, pre_hm)
 93 |       feats = self.neck(y)
 94 |       out = []
 95 |       if self.opt.model_output_list:
 96 |         for s in range(self.num_stacks):
 97 |           z = []
 98 |           for head in sorted(self.heads):
 99 |               z.append(self.__getattr__(head)(feats[s]))
100 |           out.append(z)
101 |       else:
102 |         for s in range(self.num_stacks):
103 |           z = {}
104 |           for head in self.heads:
105 |               z[head] = self.__getattr__(head)(feats[s])
106 |           out.append(z)
107 |       return out
108 | 


--------------------------------------------------------------------------------
/src/lib/model/networks/necks/msraup.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # Modified by Dequan Wang and Xingyi Zhou
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import os
 13 | import math
 14 | import logging
 15 | 
 16 | import torch
 17 | import torch.nn as nn
 18 | 
 19 | try:
 20 |   from ..DCNv2.dcn_v2 import DCN
 21 | except:
 22 |   print('import DCN failed')
 23 |   DCN = None
 24 | 
 25 | 
 26 | BN_MOMENTUM = 0.1
 27 | 
 28 | def fill_up_weights(up):
 29 |     w = up.weight.data
 30 |     f = math.ceil(w.size(2) / 2)
 31 |     c = (2 * f - 1 - f % 2) / (2. * f)
 32 |     for i in range(w.size(2)):
 33 |         for j in range(w.size(3)):
 34 |             w[0, 0, i, j] = \
 35 |                 (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
 36 |     for c in range(1, w.size(0)):
 37 |         w[c, 0, :, :] = w[0, 0, :, :] 
 38 | 
 39 | def fill_fc_weights(layers):
 40 |     for m in layers.modules():
 41 |         if isinstance(m, nn.Conv2d):
 42 |             nn.init.normal_(m.weight, std=0.001)
 43 |             # torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')
 44 |             # torch.nn.init.xavier_normal_(m.weight.data)
 45 |             if m.bias is not None:
 46 |                 nn.init.constant_(m.bias, 0)
 47 | 
 48 | class MSRAUp(nn.Module):
 49 |     # def __init__(self, block, layers, heads, head_conv):
 50 |     def __init__(self, opt, channels):
 51 |         super().__init__()
 52 |         self.opt = opt
 53 |         assert self.opt.msra_outchannel in [64, 256]
 54 |         self.deconv_with_bias = False
 55 |         self.inplanes = channels[-1]
 56 |         self.out_channel = self.opt.msra_outchannel
 57 |         # used for deconv layers
 58 |         if self.opt.msra_outchannel == 64:
 59 |             print('Using slimed resnet: 256 128 64 up channels.')
 60 |             self.deconv_layers = self._make_deconv_layer(
 61 |                 3,
 62 |                 [256, 128, 64],
 63 |                 [4, 4, 4],
 64 |             )
 65 |         else:
 66 |             print('Using original resnet: 256 256 256 up channels.')
 67 |             print('Using 256 deconvs')
 68 |             self.deconv_layers = self._make_deconv_layer(
 69 |                 3,
 70 |                 [256, 256, 256],
 71 |                 [4, 4, 4],
 72 |             )
 73 |         self.init_weights()
 74 |         
 75 | 
 76 |     def forward(self, x):
 77 |         x = self.deconv_layers(x[-1])
 78 |         return [x]
 79 | 
 80 |     def _get_deconv_cfg(self, deconv_kernel, index):
 81 |         if deconv_kernel == 4:
 82 |             padding = 1
 83 |             output_padding = 0
 84 |         elif deconv_kernel == 3:
 85 |             padding = 1
 86 |             output_padding = 1
 87 |         elif deconv_kernel == 2:
 88 |             padding = 0
 89 |             output_padding = 0
 90 | 
 91 |         return deconv_kernel, padding, output_padding
 92 | 
 93 |     def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
 94 |         assert num_layers == len(num_filters), \
 95 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
 96 |         assert num_layers == len(num_kernels), \
 97 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
 98 | 
 99 |         layers = []
100 |         for i in range(num_layers):
101 |             kernel, padding, output_padding = \
102 |                 self._get_deconv_cfg(num_kernels[i], i)
103 | 
104 |             planes = num_filters[i]
105 |             fc = DCN(self.inplanes, planes, 
106 |                     kernel_size=(3,3), stride=1,
107 |                     padding=1, dilation=1, deformable_groups=1)
108 |             # fc = nn.Conv2d(self.inplanes, planes,
109 |             #         kernel_size=3, stride=1, 
110 |             #         padding=1, dilation=1, bias=False)
111 |             # fill_fc_weights(fc)
112 |             up = nn.ConvTranspose2d(
113 |                     in_channels=planes,
114 |                     out_channels=planes,
115 |                     kernel_size=kernel,
116 |                     stride=2,
117 |                     padding=padding,
118 |                     output_padding=output_padding,
119 |                     bias=self.deconv_with_bias)
120 |             fill_up_weights(up)
121 | 
122 |             layers.append(fc)
123 |             layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
124 |             layers.append(nn.ReLU(inplace=True))
125 |             layers.append(up)
126 |             layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
127 |             layers.append(nn.ReLU(inplace=True))
128 |             self.inplanes = planes
129 | 
130 |         return nn.Sequential(*layers)
131 | 
132 |     def init_weights(self):
133 |         for name, m in self.deconv_layers.named_modules():
134 |             if isinstance(m, nn.BatchNorm2d):
135 |                 nn.init.constant_(m.weight, 1)
136 |                 nn.init.constant_(m.bias, 0)
137 | 


--------------------------------------------------------------------------------
/src/lib/model/scatter_gather.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from torch.nn.parallel._functions import Scatter, Gather
 4 | 
 5 | 
 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None):
 7 |     r"""
 8 |     Slices variables into approximately equal chunks and
 9 |     distributes them across given GPUs. Duplicates
10 |     references to objects that are not variables. Does not
11 |     support Tensors.
12 |     """
13 |     def scatter_map(obj):
14 |         if isinstance(obj, Variable):
15 |             return Scatter.apply(target_gpus, chunk_sizes, dim, obj)
16 |         assert not torch.is_tensor(obj), "Tensors not supported in scatter."
17 |         if isinstance(obj, tuple):
18 |             return list(zip(*map(scatter_map, obj)))
19 |         if isinstance(obj, list):
20 |             return list(map(list, zip(*map(scatter_map, obj))))
21 |         if isinstance(obj, dict):
22 |             return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
23 |         return [obj for targets in target_gpus]
24 | 
25 |     return scatter_map(inputs)
26 | 
27 | 
28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None):
29 |     r"""Scatter with support for kwargs dictionary"""
30 |     inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else []
31 |     kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else []
32 |     if len(inputs) < len(kwargs):
33 |         inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
34 |     elif len(kwargs) < len(inputs):
35 |         kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
36 |     inputs = tuple(inputs)
37 |     kwargs = tuple(kwargs)
38 |     return inputs, kwargs
39 | 


--------------------------------------------------------------------------------
/src/lib/model/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | def _sigmoid(x):
 9 |   y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
10 |   return y
11 | 
12 | def _sigmoid12(x):
13 |   y = torch.clamp(x.sigmoid_(), 1e-12)
14 |   return y
15 | 
16 | def _gather_feat(feat, ind):
17 |   dim = feat.size(2)
18 |   ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
19 |   feat = feat.gather(1, ind)
20 |   return feat
21 | 
22 | def _tranpose_and_gather_feat(feat, ind):
23 |   feat = feat.permute(0, 2, 3, 1).contiguous()
24 |   feat = feat.view(feat.size(0), -1, feat.size(3))
25 |   feat = _gather_feat(feat, ind)
26 |   return feat
27 | 
28 | def flip_tensor(x):
29 |   return torch.flip(x, [3])
30 |   # tmp = x.detach().cpu().numpy()[..., ::-1].copy()
31 |   # return torch.from_numpy(tmp).to(x.device)
32 | 
33 | def flip_lr(x, flip_idx):
34 |   tmp = x.detach().cpu().numpy()[..., ::-1].copy()
35 |   shape = tmp.shape
36 |   for e in flip_idx:
37 |     tmp[:, e[0], ...], tmp[:, e[1], ...] = \
38 |       tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
39 |   return torch.from_numpy(tmp.reshape(shape)).to(x.device)
40 | 
41 | def flip_lr_off(x, flip_idx):
42 |   tmp = x.detach().cpu().numpy()[..., ::-1].copy()
43 |   shape = tmp.shape
44 |   tmp = tmp.reshape(tmp.shape[0], 17, 2, 
45 |                     tmp.shape[2], tmp.shape[3])
46 |   tmp[:, :, 0, :, :] *= -1
47 |   for e in flip_idx:
48 |     tmp[:, e[0], ...], tmp[:, e[1], ...] = \
49 |       tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
50 |   return torch.from_numpy(tmp.reshape(shape)).to(x.device)
51 | 
52 | def _nms(heat, kernel=3):
53 |   pad = (kernel - 1) // 2
54 | 
55 |   hmax = nn.functional.max_pool2d(
56 |       heat, (kernel, kernel), stride=1, padding=pad)
57 |   keep = (hmax == heat).float()
58 |   return heat * keep
59 | 
60 | def _topk_channel(scores, K=100):
61 |   batch, cat, height, width = scores.size()
62 |   
63 |   topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
64 | 
65 |   topk_inds = topk_inds % (height * width)
66 |   topk_ys   = (topk_inds / width).int().float()
67 |   topk_xs   = (topk_inds % width).int().float()
68 | 
69 |   return topk_scores, topk_inds, topk_ys, topk_xs
70 | 
71 | def _topk(scores, K=100):
72 |   batch, cat, height, width = scores.size()
73 |     
74 |   topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
75 | 
76 |   topk_inds = topk_inds % (height * width)
77 |   topk_ys   = (topk_inds / width).int().float()
78 |   topk_xs   = (topk_inds % width).int().float()
79 |     
80 |   topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
81 |   topk_clses = (topk_ind / K).int()
82 |   topk_inds = _gather_feat(
83 |       topk_inds.view(batch, -1, 1), topk_ind).view(batch, K)
84 |   topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
85 |   topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)
86 | 
87 |   return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
88 | 


--------------------------------------------------------------------------------
/src/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/src/lib/utils/__init__.py


--------------------------------------------------------------------------------
/src/lib/utils/ddd_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import numpy as np
  6 | import cv2
  7 | 
  8 | def comput_corners_3d(dim, rotation_y):
  9 |   # dim: 3
 10 |   # location: 3
 11 |   # rotation_y: 1
 12 |   # return: 8 x 3
 13 |   c, s = np.cos(rotation_y), np.sin(rotation_y)
 14 |   R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32)
 15 |   l, w, h = dim[2], dim[1], dim[0]
 16 |   x_corners = [l/2, l/2, -l/2, -l/2, l/2, l/2, -l/2, -l/2]
 17 |   y_corners = [0,0,0,0,-h,-h,-h,-h]
 18 |   z_corners = [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2]
 19 | 
 20 |   corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32)
 21 |   corners_3d = np.dot(R, corners).transpose(1, 0)
 22 |   return corners_3d
 23 | 
 24 | def compute_box_3d(dim, location, rotation_y):
 25 |   # dim: 3
 26 |   # location: 3
 27 |   # rotation_y: 1
 28 |   # return: 8 x 3
 29 |   corners_3d = comput_corners_3d(dim, rotation_y)
 30 |   corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(1, 3)
 31 |   return corners_3d
 32 | 
 33 | def project_to_image(pts_3d, P):
 34 |   # pts_3d: n x 3
 35 |   # P: 3 x 4
 36 |   # return: n x 2
 37 |   pts_3d_homo = np.concatenate(
 38 |     [pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1)
 39 |   pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0)
 40 |   pts_2d = pts_2d[:, :2] / pts_2d[:, 2:]
 41 |   # import pdb; pdb.set_trace()
 42 |   return pts_2d
 43 | 
 44 | def compute_orientation_3d(dim, location, rotation_y):
 45 |   # dim: 3
 46 |   # location: 3
 47 |   # rotation_y: 1
 48 |   # return: 2 x 3
 49 |   c, s = np.cos(rotation_y), np.sin(rotation_y)
 50 |   R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32)
 51 |   orientation_3d = np.array([[0, dim[2]], [0, 0], [0, 0]], dtype=np.float32)
 52 |   orientation_3d = np.dot(R, orientation_3d)
 53 |   orientation_3d = orientation_3d + \
 54 |                    np.array(location, dtype=np.float32).reshape(3, 1)
 55 |   return orientation_3d.transpose(1, 0)
 56 | 
 57 | def draw_box_3d(image, corners, c=(255, 0, 255), same_color=False):
 58 |   face_idx = [[0,1,5,4],
 59 |               [1,2,6, 5],
 60 |               [3,0,4,7],
 61 |               [2,3,7,6]]
 62 |   right_corners = [1, 2, 6, 5] if not same_color else []
 63 |   left_corners = [0, 3, 7, 4] if not same_color else []
 64 |   thickness = 4 if same_color else 2
 65 |   corners = corners.astype(np.int32)
 66 |   for ind_f in range(3, -1, -1):
 67 |     f = face_idx[ind_f]
 68 |     for j in range(4):
 69 |       # print('corners', corners)
 70 |       cc = c
 71 |       if (f[j] in left_corners) and (f[(j+1)%4] in left_corners):
 72 |         cc = (255, 0, 0)
 73 |       if (f[j] in right_corners) and (f[(j+1)%4] in right_corners):
 74 |         cc = (0, 0, 255)
 75 |       try:
 76 |         cv2.line(image, (corners[f[j], 0], corners[f[j], 1]),
 77 |             (corners[f[(j+1)%4], 0], corners[f[(j+1)%4], 1]), cc, thickness, lineType=cv2.LINE_AA)
 78 |       except:
 79 |         pass
 80 |     if ind_f == 0:
 81 |       try:
 82 |         cv2.line(image, (corners[f[0], 0], corners[f[0], 1]),
 83 |                  (corners[f[2], 0], corners[f[2], 1]), c, 1, lineType=cv2.LINE_AA)
 84 |         cv2.line(image, (corners[f[1], 0], corners[f[1], 1]),
 85 |                  (corners[f[3], 0], corners[f[3], 1]), c, 1, lineType=cv2.LINE_AA)
 86 |       except:
 87 |         pass
 88 |     # top_idx = [0, 1, 2, 3]
 89 |   return image
 90 | 
 91 | def unproject_2d_to_3d(pt_2d, depth, P):
 92 |   # pts_2d: 2
 93 |   # depth: 1
 94 |   # P: 3 x 4
 95 |   # return: 3
 96 |   z = depth - P[2, 3]
 97 |   x = (pt_2d[0] * depth - P[0, 3] - P[0, 2] * z) / P[0, 0]
 98 |   y = (pt_2d[1] * depth - P[1, 3] - P[1, 2] * z) / P[1, 1]
 99 |   pt_3d = np.array([x, y, z], dtype=np.float32).reshape(3)
100 |   return pt_3d
101 | 
102 | def alpha2rot_y(alpha, x, cx, fx):
103 |     """
104 |     Get rotation_y by alpha + theta - 180
105 |     alpha : Observation angle of object, ranging [-pi..pi]
106 |     x : Object center x to the camera center (x-W/2), in pixels
107 |     rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi]
108 |     """
109 |     rot_y = alpha + np.arctan2(x - cx, fx)
110 |     if rot_y > np.pi:
111 |       rot_y -= 2 * np.pi
112 |     if rot_y < -np.pi:
113 |       rot_y += 2 * np.pi
114 |     return rot_y
115 | 
116 | def rot_y2alpha(rot_y, x, cx, fx):
117 |     """
118 |     Get rotation_y by alpha + theta - 180
119 |     alpha : Observation angle of object, ranging [-pi..pi]
120 |     x : Object center x to the camera center (x-W/2), in pixels
121 |     rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi]
122 |     """
123 |     alpha = rot_y - np.arctan2(x - cx, fx)
124 |     if alpha > np.pi:
125 |       alpha -= 2 * np.pi
126 |     if alpha < -np.pi:
127 |       alpha += 2 * np.pi
128 |     return alpha
129 | 
130 | 
131 | def ddd2locrot(center, alpha, dim, depth, calib):
132 |   # single image
133 |   locations = unproject_2d_to_3d(center, depth, calib)
134 |   locations[1] += dim[0] / 2
135 |   rotation_y = alpha2rot_y(alpha, center[0], calib[0, 2], calib[0, 0])
136 |   return locations, rotation_y
137 | 
138 | def project_3d_bbox(location, dim, rotation_y, calib):
139 |   box_3d = compute_box_3d(dim, location, rotation_y)
140 |   box_2d = project_to_image(box_3d, calib)
141 |   return box_2d
142 | 
143 | 
144 | if __name__ == '__main__':
145 |   calib = np.array(
146 |     [[7.070493000000e+02, 0.000000000000e+00, 6.040814000000e+02, 4.575831000000e+01],
147 |      [0.000000000000e+00, 7.070493000000e+02, 1.805066000000e+02, -3.454157000000e-01],
148 |      [0.000000000000e+00, 0.000000000000e+00, 1.000000000000e+00, 4.981016000000e-03]],
149 |     dtype=np.float32)
150 |   alpha = -0.20
151 |   tl = np.array([712.40, 143.00], dtype=np.float32)
152 |   br = np.array([810.73, 307.92], dtype=np.float32)
153 |   ct = (tl + br) / 2
154 |   rotation_y = 0.01
155 |   print('alpha2rot_y', alpha2rot_y(alpha, ct[0], calib[0, 2], calib[0, 0]))
156 |   print('rotation_y', rotation_y)
157 | 


--------------------------------------------------------------------------------
/src/lib/utils/post_process.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import numpy as np
 6 | import cv2
 7 | from .image import transform_preds_with_trans, get_affine_transform
 8 | from .ddd_utils import ddd2locrot, comput_corners_3d
 9 | from .ddd_utils import project_to_image, rot_y2alpha
10 | import numba
11 | 
12 | def get_alpha(rot):
13 |   # output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 
14 |   #                 bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
15 |   # return rot[:, 0]
16 |   idx = rot[:, 1] > rot[:, 5]
17 |   alpha1 = np.arctan2(rot[:, 2], rot[:, 3]) + (-0.5 * np.pi)
18 |   alpha2 = np.arctan2(rot[:, 6], rot[:, 7]) + ( 0.5 * np.pi)
19 |   return alpha1 * idx + alpha2 * (1 - idx)
20 | 
21 | def generic_post_process(
22 |   opt, dets, c, s, h, w, num_classes, calibs=None, height=-1, width=-1):
23 |   if not ('scores' in dets):
24 |     return [{}], [{}]
25 |   ret = []
26 | 
27 |   for i in range(len(dets['scores'])):
28 |     preds = []
29 |     trans = get_affine_transform(
30 |       c[i], s[i], 0, (w, h), inv=1).astype(np.float32)
31 |     for j in range(len(dets['scores'][i])):
32 |       if dets['scores'][i][j] < opt.out_thresh:
33 |         break
34 |       item = {}
35 |       item['score'] = dets['scores'][i][j]
36 |       item['class'] = int(dets['clses'][i][j]) + 1
37 |       item['ct'] = transform_preds_with_trans(
38 |         (dets['cts'][i][j]).reshape(1, 2), trans).reshape(2)
39 | 
40 |       if 'tracking' in dets:
41 |         tracking = transform_preds_with_trans(
42 |           (dets['tracking'][i][j] + dets['cts'][i][j]).reshape(1, 2), 
43 |           trans).reshape(2)
44 |         item['tracking'] = tracking - item['ct']
45 | 
46 |       if 'bboxes' in dets:
47 |         bbox = transform_preds_with_trans(
48 |           dets['bboxes'][i][j].reshape(2, 2), trans).reshape(4)
49 |         item['bbox'] = bbox
50 | 
51 |       if 'visibility' in dets:
52 |         if dets['visibility'][i][j] > opt.visibility_thresh_eval:
53 |           item['visibility'] = True
54 |         else:
55 |           item['visibility'] = False
56 | 
57 |       if 'hps' in dets:
58 |         pts = transform_preds_with_trans(
59 |           dets['hps'][i][j].reshape(-1, 2), trans).reshape(-1)
60 |         item['hps'] = pts
61 | 
62 |       if 'dep' in dets and len(dets['dep'][i]) > j:
63 |         item['dep'] = dets['dep'][i][j]
64 |       
65 |       if 'dim' in dets and len(dets['dim'][i]) > j:
66 |         item['dim'] = dets['dim'][i][j]
67 | 
68 |       if 'rot' in dets and len(dets['rot'][i]) > j:
69 |         item['alpha'] = get_alpha(dets['rot'][i][j:j+1])[0]
70 |       
71 |       if 'rot' in dets and 'dep' in dets and 'dim' in dets \
72 |         and len(dets['dep'][i]) > j:
73 |         if 'amodel_offset' in dets and len(dets['amodel_offset'][i]) > j:
74 |           ct_output = dets['bboxes'][i][j].reshape(2, 2).mean(axis=0)
75 |           amodel_ct_output = ct_output + dets['amodel_offset'][i][j]
76 |           ct = transform_preds_with_trans(
77 |             amodel_ct_output.reshape(1, 2), trans).reshape(2).tolist()
78 |         else:
79 |           bbox = item['bbox']
80 |           ct = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
81 |         item['ct'] = ct
82 |         item['loc'], item['rot_y'] = ddd2locrot(
83 |           ct, item['alpha'], item['dim'], item['dep'], calibs[i])
84 |       
85 |       preds.append(item)
86 | 
87 |     if 'nuscenes_att' in dets:
88 |       for j in range(len(preds)):
89 |         preds[j]['nuscenes_att'] = dets['nuscenes_att'][i][j]
90 | 
91 |     if 'velocity' in dets:
92 |       for j in range(len(preds)):
93 |         preds[j]['velocity'] = dets['velocity'][i][j]
94 | 
95 |     ret.append(preds)
96 |   
97 |   return ret
98 | 


--------------------------------------------------------------------------------
/src/lib/utils/tracker.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.utils.linear_assignment_ import linear_assignment
  3 | from numba import jit
  4 | import copy
  5 | 
  6 | class Tracker(object):
  7 |   def __init__(self, opt):
  8 |     self.opt = opt
  9 |     self.reset()
 10 | 
 11 |   def init_track(self, results):
 12 |     for item in results:
 13 |       if item['score'] > self.opt.new_thresh:
 14 |         self.id_count += 1
 15 |         # active and age are never used in the paper
 16 |         item['active'] = 1
 17 |         item['age'] = 1
 18 |         item['tracking_id'] = self.id_count
 19 |         if not ('ct' in item):
 20 |           bbox = item['bbox']
 21 |           item['ct'] = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
 22 |         item['prev_ct'] = item['ct']
 23 |         item['v'] = [0, 0]
 24 |         self.tracks.append(item)
 25 | 
 26 |   def reset(self):
 27 |     self.id_count = 0
 28 |     self.tracks = []
 29 | 
 30 |   def step(self, results, public_det=None):
 31 |     N = len(results)
 32 |     M = len(self.tracks)
 33 | 
 34 |     dets = np.array(
 35 |       [det['ct'] + det['tracking'] for det in results], np.float32) # N x 2
 36 |     track_size = np.array([((track['bbox'][2] - track['bbox'][0]) * \
 37 |       (track['bbox'][3] - track['bbox'][1])) \
 38 |       for track in self.tracks], np.float32) # M
 39 |     track_cat = np.array([track['class'] for track in self.tracks], np.int32) # M
 40 |     item_size = np.array([((item['bbox'][2] - item['bbox'][0]) * \
 41 |       (item['bbox'][3] - item['bbox'][1])) \
 42 |       for item in results], np.float32) # N
 43 |     item_cat = np.array([item['class'] for item in results], np.int32) # N
 44 |     tracks = np.array(
 45 |       [pre_det['ct'] for pre_det in self.tracks], np.float32) # M x 2
 46 | 
 47 |     dist = (((tracks.reshape(1, -1, 2) - \
 48 |               dets.reshape(-1, 1, 2)) ** 2).sum(axis=2)) # N x M
 49 | 
 50 |     invalid = ((dist > track_size.reshape(1, M)) + \
 51 |       (dist > item_size.reshape(N, 1)) + \
 52 |       (item_cat.reshape(N, 1) != track_cat.reshape(1, M))) > 0
 53 |     dist = dist + invalid * 1e18
 54 |     
 55 |     if self.opt.hungarian:
 56 |       item_score = np.array([item['score'] for item in results], np.float32) # N
 57 |       dist[dist > 1e18] = 1e18
 58 |       matched_indices = linear_assignment(dist)
 59 |     else:
 60 |       matched_indices = greedy_assignment(copy.deepcopy(dist))
 61 |     unmatched_dets = [d for d in range(dets.shape[0]) \
 62 |       if not (d in matched_indices[:, 0])]
 63 |     unmatched_tracks = [d for d in range(tracks.shape[0]) \
 64 |       if not (d in matched_indices[:, 1])]
 65 |     
 66 |     if self.opt.hungarian:
 67 |       matches = []
 68 |       for m in matched_indices:
 69 |         if dist[m[0], m[1]] > 1e16:
 70 |           unmatched_dets.append(m[0])
 71 |           unmatched_tracks.append(m[1])
 72 |         else:
 73 |           matches.append(m)
 74 |       matches = np.array(matches).reshape(-1, 2)
 75 |     else:
 76 |       matches = matched_indices
 77 | 
 78 |     ret = []
 79 |     for m in matches:
 80 |       track = results[m[0]]
 81 |       track['tracking_id'] = self.tracks[m[1]]['tracking_id']
 82 |       track['age'] = 1
 83 |       track['active'] = 1
 84 |       track['prev_ct'] = self.tracks[m[1]]['ct']
 85 |       track['v'] = [x - y  for x,y in zip(track['ct'], track['prev_ct'])]
 86 |       ret.append(track)
 87 | 
 88 |     if self.opt.public_det and len(unmatched_dets) > 0:
 89 |       # Public detection: only create tracks from provided detections
 90 |       pub_dets = np.array([d['ct'] for d in public_det], np.float32)
 91 |       dist3 = ((dets.reshape(-1, 1, 2) - pub_dets.reshape(1, -1, 2)) ** 2).sum(
 92 |         axis=2)
 93 |       matched_dets = [d for d in range(dets.shape[0]) \
 94 |         if not (d in unmatched_dets)]
 95 |       dist3[matched_dets] = 1e18
 96 |       for j in range(len(pub_dets)):
 97 |         i = dist3[:, j].argmin()
 98 |         if dist3[i, j] < item_size[i]:
 99 |           dist3[i, :] = 1e18
100 |           track = results[i]
101 |           if track['score'] > self.opt.new_thresh:
102 |             self.id_count += 1
103 |             track['tracking_id'] = self.id_count
104 |             track['age'] = 1
105 |             track['active'] = 1
106 |             ret.append(track)
107 |     else:
108 |       # Private detection: create tracks for all un-matched detections
109 |       for i in unmatched_dets:
110 |         track = results[i]
111 |         if track['score'] > self.opt.new_thresh:
112 |           self.id_count += 1
113 |           track['tracking_id'] = self.id_count
114 |           track['age'] = 1
115 |           track['active'] =  1
116 |           track['prev_ct'] = track['ct']
117 |           track['v'] = [0, 0]
118 |           ret.append(track)
119 |     
120 |     # Never used
121 |     for i in unmatched_tracks:
122 |       track = self.tracks[i]
123 |       if track['age'] < self.opt.max_age:
124 |         track['age'] += 1
125 |         track['active'] = 1 # 0
126 |         bbox = track['bbox']
127 |         ct = track['ct']
128 |         # v = track['v']
129 |         v = [0, 0]
130 |         track['bbox'] = [
131 |           bbox[0] + v[0], bbox[1] + v[1],
132 |           bbox[2] + v[0], bbox[3] + v[1]]
133 |         track['ct'] = [ct[0] + v[0], ct[1] + v[1]]
134 |         ret.append(track)
135 |     self.tracks = ret
136 |     return ret
137 | 
138 | def greedy_assignment(dist, v_dist=None):
139 |   matched_indices = []
140 |   if dist.shape[1] == 0:
141 |     return np.array(matched_indices, np.int32).reshape(-1, 2)
142 |   for i in range(dist.shape[0]):
143 |     j = dist[i].argmin()
144 |     if dist[i][j] < 1e16:
145 |       dist[:, j] = 1e18
146 |       matched_indices.append([i, j])
147 |   return np.array(matched_indices, np.int32).reshape(-1, 2)
148 | 


--------------------------------------------------------------------------------
/src/lib/utils/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch
 6 | 
 7 | class AverageMeter(object):
 8 |     """Computes and stores the average and current value"""
 9 |     def __init__(self):
10 |         self.reset()
11 | 
12 |     def reset(self):
13 |         self.val = 0
14 |         self.avg = 0
15 |         self.sum = 0
16 |         self.count = 0
17 | 
18 |     def update(self, val, n=1):
19 |         self.val = val
20 |         self.sum += val * n
21 |         self.count += n
22 |         if self.count > 0:
23 |           self.avg = self.sum / self.count


--------------------------------------------------------------------------------
/src/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, '../lib')
12 | add_path(lib_path)
13 | 


--------------------------------------------------------------------------------
/src/tools/annot_bbox.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import json
  4 | import cv2
  5 | import argparse
  6 | import numpy as np
  7 | image_ext = ['jpg', 'jpeg', 'png', 'webp']
  8 | 
  9 | parser = argparse.ArgumentParser()
 10 | parser.add_argument('--image_path', default='')
 11 | parser.add_argument('--save_path', default='')
 12 | MAX_CACHE = 20
 13 | CAT_NAMES = ['cat']
 14 | 
 15 | def _sort_expt(pts):
 16 |   t, l, b, r = 0, 0, 0, 0
 17 |   for i in range(4):
 18 |     if pts[i][0] < pts[l][0]:
 19 |       l = i
 20 |     if pts[i][1] < pts[t][1]:
 21 |       t = i
 22 |     if pts[i][0] > pts[r][0]:
 23 |       r = i
 24 |     if pts[i][1] > pts[b][1]:
 25 |       b = i
 26 |   ret = [pts[t], pts[l], pts[b], pts[r]]
 27 |   return ret
 28 | 
 29 | def _expt2bbox(expt):
 30 |   expt = np.array(expt, dtype=np.int32)
 31 |   bbox = [int(expt[:, 0].min()), int(expt[:, 1].min()), 
 32 |           int(expt[:, 0].max()), int(expt[:, 1].max())]
 33 |   return bbox
 34 | 
 35 | def save_txt(txt_name, pts_cls):
 36 |   ret = []
 37 |   for i in range(len(pts_cls)):
 38 |     ret.append(np.array(pts_cls[i][:4], dtype=np.int32).reshape(8).tolist() \
 39 |                + [pts_cls[i][4]])
 40 |   np.savetxt(txt_name, np.array(ret, dtype=np.int32), fmt='%d')
 41 | 
 42 | def click(event, x, y, flags, param):
 43 |   global expt_cls, bboxes, pts
 44 |   if event == cv2.EVENT_LBUTTONDOWN:
 45 |     pts.append([x, y])
 46 |     cv2.circle(img, (x, y), 5, (255, 0, 255), -1)
 47 |     if len(pts) == 4:
 48 |       expt = _sort_expt(pts)
 49 |       bbox = _expt2bbox(expt)
 50 |       expt_cls.append(expt + [cls])
 51 |       cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), 
 52 |                     (255, 0, 255), 2, cv2.LINE_AA)
 53 |       pts = []
 54 | 
 55 | if __name__ == '__main__':
 56 |   cat_info = []
 57 |   for i, cat in enumerate(CAT_NAMES):
 58 |     cat_info.append({'name': cat, 'id': i + 1})
 59 | 
 60 |   args = parser.parse_args()
 61 |   if args.save_path == '':
 62 |     args.save_path = os.path.join(args.image_path, '..', 'click_annotation')
 63 |   if not os.path.exists(args.save_path):
 64 |     os.mkdir(args.save_path)
 65 |   
 66 |   ann_path = os.path.join(args.save_path, 'annotations.json')
 67 |   if os.path.exists(ann_path):
 68 |     anns = json.load(open(ann_path, 'r'))
 69 |   else:
 70 |     anns = {'annotations': [], 'images': [], 'categories': cat_info}
 71 | 
 72 |   assert os.path.exists(args.image_path)
 73 |   ls = os.listdir(args.image_path)
 74 |   image_names = []
 75 |   for file_name in sorted(ls):
 76 |     ext = file_name[file_name.rfind('.') + 1:].lower()
 77 |     if (ext in image_ext):
 78 |       image_names.append(file_name)
 79 |   
 80 |   i = 0
 81 |   cls = 1
 82 |   cached = 0
 83 |   while i < len(image_names):
 84 |     image_name = image_names[i]
 85 |     txt_name = os.path.join(
 86 |       args.save_path, image_name[:image_name.rfind('.')] + '.txt')
 87 |     if os.path.exists(txt_name) or image_name in anns:
 88 |       i = i + 1
 89 |       continue
 90 |     image_path = os.path.join(args.image_path, image_name)
 91 |     img = cv2.imread(image_path)
 92 |     cv2.namedWindow(image_name)
 93 |     cv2.setMouseCallback(image_name, click)
 94 |     expt_cls, pts = [], []
 95 |     while True:
 96 |       finished = False
 97 |       cv2.imshow(image_name, img)
 98 |       key = cv2.waitKey(1)
 99 |       if key == 100:
100 |         i = i + 1
101 |         save_txt(txt_name, expt_cls)
102 |         image_id = len(anns['images'])
103 |         image_info = {'file_name': image_name, 'id': image_id}
104 |         anns['images'].append(image_info)
105 |         for ann in expt_cls:
106 |           ann_id = len(anns['annotations'])
107 |           ann_dict = {'image_id': image_id, 'id': ann_id, 'categoty_id': ann[4],
108 |                       'bbox': _expt2bbox(ann[:4]), 'extreme_points': ann[:4]}
109 |           anns['annotations'].append(ann_dict)
110 |           cached = cached + 1
111 |         print('saved to ', txt_name)
112 |         if cached > MAX_CACHE:
113 |           print('Saving json', ann_path)
114 |           json.dump(anns, open(ann_path, 'w'))
115 |           cached = 0
116 |         break
117 |       elif key == 97:
118 |         i = i - 1
119 |         break
120 |       elif key == 27:
121 |         json.dump(anns, open(ann_path, 'w'))
122 |         sys.exit(0)
123 |     cv2.destroyAllWindows()
124 | 


--------------------------------------------------------------------------------
/src/tools/convert_crowdhuman_to_coco.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import json
 4 | import cv2
 5 | 
 6 | DATA_PATH = '../../data/crowdhuman/'
 7 | OUT_PATH = DATA_PATH + 'annotations/'
 8 | SPLITS = ['val', 'train']
 9 | DEBUG = False
10 | 
11 | def load_func(fpath):
12 |     print('fpath', fpath)
13 |     assert os.path.exists(fpath)
14 |     with open(fpath,'r') as fid:
15 |         lines = fid.readlines()
16 |     records =[json.loads(line.strip('\n')) for line in lines]
17 |     return records
18 | 
19 | if __name__ == '__main__':
20 |   if not os.exists(OUT_PATH):
21 |     os.mkdir(OUT_PATH)
22 |   for split in SPLITS:
23 |     data_path = DATA_PATH + split
24 |     out_path = OUT_PATH + '{}.json'.format(split)
25 |     out = {'images': [], 'annotations': [], 
26 |            'categories': [{'id': 1, 'name': 'person'}]}
27 |     ann_path = DATA_PATH + '/annotation_{}.odgt'.format(split)
28 |     anns_data = load_func(ann_path)
29 |     image_cnt = 0
30 |     ann_cnt = 0
31 |     video_cnt = 0
32 |     for ann_data in anns_data:
33 |       image_cnt += 1
34 |       image_info = {'file_name': '{}.jpg'.format(ann_data['ID']),
35 |                     'id': image_cnt}
36 |       out['images'].append(image_info)
37 |       if split != 'test':
38 |         anns = ann_data['gtboxes']
39 |         for i in range(len(anns)):
40 |           ann_cnt += 1
41 |           ann = {'id': ann_cnt,
42 |                  'category_id': 1,
43 |                  'image_id': image_cnt,
44 |                  'bbox_vis': anns[i]['vbox'],
45 |                  'bbox': anns[i]['fbox'],
46 |                  'iscrowd': 1 if 'extra' in anns[i] and \
47 |                                  'ignore' in anns[i]['extra'] and \
48 |                                  anns[i]['extra']['ignore'] == 1 else 0}
49 |           out['annotations'].append(ann)
50 |     print('loaded {} for {} images and {} samples'.format(
51 |       split, len(out['images']), len(out['annotations'])))
52 |     json.dump(out, open(out_path, 'w'))
53 |         
54 |         
55 | 
56 | 


--------------------------------------------------------------------------------
/src/tools/convert_kitti_to_tao.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | 
 4 | DATA_PATH = '../../data/kitti_tracking/'
 5 | SPLITS = ['tracking_val_half']
 6 | 
 7 | 
 8 | def get_cats_for_vid(anns, vid_id, image2vid):
 9 |     all_categories = set([])
10 |     for cat in anns['categories']:
11 |         all_categories.add(cat['id'])
12 | 
13 |     positive = set([])
14 |     for ann in anns['annotations']:
15 |         ann_vid_id = image2vid[ann['image_id']]
16 |         if ann_vid_id == vid_id:
17 |             category = ann['category_id']
18 |             positive.add(category)
19 | 
20 |     return positive, all_categories - positive
21 | 
22 | 
23 | def get_image2video_map(anns):
24 |     mapping = {}
25 |     for img in anns['images']:
26 |         mapping[img['id']] = img['video_id']
27 |         img['frame_index'] = img['frame_id'] - 1
28 |     
29 |     return mapping, anns
30 | 
31 | 
32 | def unique_track_ids(anns, image2vid): 
33 |     unique_tracks = {}
34 |     track_counter = 0
35 |     for ann in anns['annotations']:
36 |         orig_track_id = ann['track_id']
37 |         image_id = ann['image_id']
38 |         ann['area'] = ann['bbox'][2] * ann['bbox'][3]
39 |         video_id = image2vid[image_id]
40 |         vid_track_pair = f"{video_id}_{orig_track_id}"
41 |         if vid_track_pair not in unique_tracks:
42 |             unique_tracks[vid_track_pair] = track_counter
43 |             track_counter += 1
44 |     
45 |     tracks = []
46 |     processed_tracks = set([])
47 |     for ann in anns['annotations']:
48 |         orig_track_id = ann['track_id']
49 |         image_id = ann['image_id']
50 |         video_id = image2vid[image_id]
51 |         vid_track_pair = f"{video_id}_{orig_track_id}"
52 |         ann['track_id'] = unique_tracks[vid_track_pair]
53 |         if ann['track_id'] not in processed_tracks:
54 |             track = {'id': ann['track_id'], 'category_id': ann['category_id'], 'video_id': video_id}
55 |             processed_tracks.add(ann['track_id'])
56 |             tracks.append(track)
57 | 
58 |     anns['tracks'] = tracks
59 | 
60 |     return anns
61 | 
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     ann_dir = DATA_PATH + '/annotations/'
66 | 
67 |     for split in SPLITS:
68 |         print("Processing split %s" % split)
69 |         anns = json.load(open(ann_dir + split + ".json"))
70 |         image2vid, anns = get_image2video_map(anns)
71 |         for vid in anns['videos']:
72 |             print("Processing video %s" % vid['file_name'])
73 |             vid['not_exhaustive_category_ids'] = []
74 |             positives, negatives = get_cats_for_vid(anns, vid['id'], image2vid)
75 |             vid['neg_category_ids'] = list(negatives)
76 | 
77 |         anns = unique_track_ids(anns, image2vid)
78 | 
79 |         json.dump(anns, open(ann_dir + split + "_tao.json", 'w'))


--------------------------------------------------------------------------------
/src/tools/convert_mot_det_to_results.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import numpy as np
 3 | import os
 4 | from collections import defaultdict
 5 | split = 'val_half'
 6 | 
 7 | DET_PATH = '../../data/mot17/train/'
 8 | ANN_PATH = '../../data/mot17/annotations/{}.json'.format(split)
 9 | OUT_DIR = '../../data/mot17/results/'
10 | OUT_PATH = OUT_DIR + '{}_det.json'.format(split)
11 | 
12 | if __name__ == '__main__':
13 |   if not os.path.exists(OUT_DIR):
14 |     os.mkdir(OUT_DIR)
15 |   seqs = [s for s in os.listdir(DET_PATH)]
16 |   data = json.load(open(ANN_PATH, 'r'))
17 |   images = data['images']
18 |   image_to_anns = defaultdict(list)
19 |   for seq in sorted(seqs):
20 |     print('seq', seq)
21 |     seq_path = '{}/{}/'.format(DET_PATH, seq)
22 |     if not os.path.exists(seq_path + 'det/det_val_half.txt'):
23 |       continue
24 |     if split == 'val_half':
25 |       ann_path = seq_path + 'det/det_val_half.txt'
26 |       train_ann_path = seq_path + 'det/det_train_half.txt'
27 |       train_anns = np.loadtxt(train_ann_path, dtype=np.float32, delimiter=',')
28 |       frame_base = int(train_anns[:, 0].max())
29 |     else:
30 |       ann_path = seq_path + 'det/det.txt'
31 |       frame_base = 0
32 |     # if not IS_THIRD_PARTY:
33 |     anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',')
34 |     for i in range(len(anns)):
35 |       frame_id = int(anns[i][0])
36 |       file_name = '{}/img1/{:06d}.jpg'.format(seq, frame_id + frame_base)
37 |       bbox = (anns[i][2:6]).tolist()
38 |       score = 1 # float(anns[i][8])
39 |       image_to_anns[file_name].append(bbox + [score])
40 | 
41 |   results = {}
42 |   for image_info in images:
43 |     image_id = image_info['id']
44 |     file_name = image_info['file_name']
45 |     dets = image_to_anns[file_name]
46 |     results[image_id] = []
47 |     for det in dets:
48 |       bbox = [float(det[0]), float(det[1]), \
49 |               float(det[0] + det[2]), float(det[1] + det[3])]
50 |       ct = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
51 |       results[image_id].append(
52 |         {'bbox': bbox, 'score': float(det[4]), 'class': 1, 'ct': ct})
53 |   out_path = OUT_PATH
54 |   json.dump(results, open(out_path, 'w'))
55 | 


--------------------------------------------------------------------------------
/src/tools/convert_mot_to_coco.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import json
  4 | import cv2
  5 | 
  6 | DATA_PATH = '../../data/mot17/'
  7 | OUT_PATH = DATA_PATH + 'annotations/'
  8 | SPLITS = ['train_half', 'val_half', 'train', 'test']
  9 | CREATE_SPLITTED_ANN = True
 10 | CREATE_SPLITTED_DET = True
 11 | 
 12 | if __name__ == '__main__':
 13 |   for split in SPLITS:
 14 |     if split == 'test':
 15 |       HALF_VIDEO = False
 16 |     else:
 17 |       HALF_VIDEO = True
 18 |     data_path = DATA_PATH + (split if not HALF_VIDEO else 'train')
 19 |     out_path = OUT_PATH + '{}.json'.format(split)
 20 |     out = {'images': [], 'annotations': [], 
 21 |            'categories': [{'id': 1, 'name': 'pedestrain'}],
 22 |            'videos': []}
 23 |     seqs = os.listdir(data_path)
 24 |     image_cnt = 0
 25 |     ann_cnt = 0
 26 |     video_cnt = 0
 27 |     for seq in sorted(seqs):
 28 |       if '.DS_Store' in seq:
 29 |         continue
 30 |       if 'mot17' in DATA_PATH and (split != 'test' and not ('FRCNN' in seq)):
 31 |         continue
 32 |       video_cnt += 1
 33 |       out['videos'].append({
 34 |         'id': video_cnt,
 35 |         'file_name': seq})
 36 |       seq_path = '{}/{}/'.format(data_path, seq)
 37 |       img_path = seq_path + 'img1/'
 38 |       ann_path = seq_path + 'gt/gt.txt'
 39 |       images = os.listdir(img_path)
 40 |       num_images = len([image for image in images if 'jpg' in image])
 41 |       if HALF_VIDEO and ('half' in split):
 42 |         image_range = [0, num_images // 2] if 'train' in split else \
 43 |           [num_images // 2 + 1, num_images - 1]
 44 |       else:
 45 |         image_range = [0, num_images - 1]
 46 |       for i in range(num_images):
 47 |         if (i < image_range[0] or i > image_range[1]):
 48 |           continue
 49 |         image_info = {'file_name': '{}/img1/{:06d}.jpg'.format(seq, i + 1),
 50 |                       'id': image_cnt + i + 1,
 51 |                       'frame_id': i + 1 - image_range[0],
 52 |                       'prev_image_id': image_cnt + i if i > 0 else -1,
 53 |                       'next_image_id': \
 54 |                         image_cnt + i + 2 if i < num_images - 1 else -1,
 55 |                       'video_id': video_cnt}
 56 |         out['images'].append(image_info)
 57 |       print('{}: {} images'.format(seq, num_images))
 58 |       if split != 'test':
 59 |         det_path = seq_path + 'det/det.txt'
 60 |         anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',')
 61 |         dets = np.loadtxt(det_path, dtype=np.float32, delimiter=',')
 62 |         if CREATE_SPLITTED_ANN and ('half' in split):
 63 |           anns_out = np.array([anns[i] for i in range(anns.shape[0]) if \
 64 |             int(anns[i][0]) - 1 >= image_range[0] and \
 65 |             int(anns[i][0]) - 1 <= image_range[1]], np.float32)
 66 |           anns_out[:, 0] -= image_range[0]
 67 |           gt_out = seq_path + '/gt/gt_{}.txt'.format(split)
 68 |           fout = open(gt_out, 'w')
 69 |           for o in anns_out:
 70 |             fout.write(
 71 |               '{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:.6f}\n'.format(
 72 |               int(o[0]),int(o[1]),int(o[2]),int(o[3]),int(o[4]),int(o[5]),
 73 |               int(o[6]),int(o[7]),o[8]))
 74 |           fout.close()
 75 |         if CREATE_SPLITTED_DET and ('half' in split):
 76 |           dets_out = np.array([dets[i] for i in range(dets.shape[0]) if \
 77 |             int(dets[i][0]) - 1 >= image_range[0] and \
 78 |             int(dets[i][0]) - 1 <= image_range[1]], np.float32)
 79 |           dets_out[:, 0] -= image_range[0]
 80 |           det_out = seq_path + '/det/det_{}.txt'.format(split)
 81 |           dout = open(det_out, 'w')
 82 |           for o in dets_out:
 83 |             dout.write(
 84 |               '{:d},{:d},{:.1f},{:.1f},{:.1f},{:.1f},{:.6f}\n'.format(
 85 |               int(o[0]),int(o[1]),float(o[2]),float(o[3]),float(o[4]),float(o[5]),
 86 |               float(o[6])))
 87 |           dout.close()
 88 | 
 89 |         print(' {} ann images'.format(int(anns[:, 0].max())))
 90 |         for i in range(anns.shape[0]):
 91 |           frame_id = int(anns[i][0])
 92 |           if (frame_id - 1 < image_range[0] or frame_id - 1 > image_range[1]):
 93 |             continue
 94 |           track_id = int(anns[i][1])
 95 |           cat_id = int(anns[i][7])
 96 |           ann_cnt += 1
 97 |           if not ('15' in DATA_PATH):
 98 |             if not (float(anns[i][8]) >= 0.25):
 99 |               continue
100 |             if not (int(anns[i][6]) == 1):
101 |               continue
102 |             if (int(anns[i][7]) in [3, 4, 5, 6, 9, 10, 11]): # Non-person
103 |               continue
104 |             if (int(anns[i][7]) in [2, 7, 8, 12]): # Ignored person
105 |               category_id = -1
106 |             else:
107 |               category_id = 1
108 |           else:
109 |             category_id = 1
110 |           ann = {'id': ann_cnt,
111 |                  'category_id': category_id,
112 |                  'image_id': image_cnt + frame_id,
113 |                  'track_id': track_id,
114 |                  'bbox': anns[i][2:6].tolist(),
115 |                  'conf': float(anns[i][6])}
116 |           out['annotations'].append(ann)
117 |       image_cnt += num_images
118 |     print('loaded {} for {} images and {} samples'.format(
119 |       split, len(out['images']), len(out['annotations'])))
120 |     json.dump(out, open(out_path, 'w'))
121 |         
122 |         
123 | 
124 | 


--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_tracking.seqmap:
--------------------------------------------------------------------------------
 1 | 0000 empty 000000 000154
 2 | 0001 empty 000000 000447
 3 | 0002 empty 000000 000233
 4 | 0003 empty 000000 000144
 5 | 0004 empty 000000 000314
 6 | 0005 empty 000000 000297
 7 | 0006 empty 000000 000270
 8 | 0007 empty 000000 000800
 9 | 0008 empty 000000 000390
10 | 0009 empty 000000 000803
11 | 0010 empty 000000 000294
12 | 0011 empty 000000 000373
13 | 0012 empty 000000 000078
14 | 0013 empty 000000 000340
15 | 0014 empty 000000 000106
16 | 0015 empty 000000 000376
17 | 0016 empty 000000 000209
18 | 0017 empty 000000 000145
19 | 0018 empty 000000 000339
20 | 0019 empty 000000 001059
21 | 0020 empty 000000 000837
22 | 


--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_tracking.seqmap.test:
--------------------------------------------------------------------------------
 1 | 0000 empty 000000 000465
 2 | 0001 empty 000000 000147
 3 | 0002 empty 000000 000243
 4 | 0003 empty 000000 000257
 5 | 0004 empty 000000 000421
 6 | 0005 empty 000000 000809
 7 | 0006 empty 000000 000114
 8 | 0007 empty 000000 000215
 9 | 0008 empty 000000 000165
10 | 0009 empty 000000 000349
11 | 0010 empty 000000 001176
12 | 0011 empty 000000 000774
13 | 0012 empty 000000 000694
14 | 0013 empty 000000 000152
15 | 0014 empty 000000 000850
16 | 0015 empty 000000 000701
17 | 0016 empty 000000 000510
18 | 0017 empty 000000 000305
19 | 0018 empty 000000 000180
20 | 0019 empty 000000 000404
21 | 0020 empty 000000 000173
22 | 0021 empty 000000 000203
23 | 0022 empty 000000 000436
24 | 0023 empty 000000 000430
25 | 0024 empty 000000 000316
26 | 0025 empty 000000 000176
27 | 0026 empty 000000 000170
28 | 0027 empty 000000 000085
29 | 0028 empty 000000 000175
30 | 


--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_tracking.seqmap.training:
--------------------------------------------------------------------------------
 1 | 0000 empty 000000 000154
 2 | 0001 empty 000000 000447
 3 | 0002 empty 000000 000233
 4 | 0003 empty 000000 000144
 5 | 0004 empty 000000 000314
 6 | 0005 empty 000000 000297
 7 | 0006 empty 000000 000270
 8 | 0007 empty 000000 000800
 9 | 0008 empty 000000 000390
10 | 0009 empty 000000 000803
11 | 0010 empty 000000 000294
12 | 0011 empty 000000 000373
13 | 0012 empty 000000 000078
14 | 0013 empty 000000 000340
15 | 0014 empty 000000 000106
16 | 0015 empty 000000 000376
17 | 0016 empty 000000 000209
18 | 0017 empty 000000 000145
19 | 0018 empty 000000 000339
20 | 0019 empty 000000 001059
21 | 0020 empty 000000 000837
22 | 


--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_trackingtrain_1-2.seqmap:
--------------------------------------------------------------------------------
 1 | 0000 empty 000000 000154
 2 | 0001 empty 000000 000447
 3 | 0002 empty 000000 000233
 4 | 0003 empty 000000 000144
 5 | 0004 empty 000000 000314
 6 | 0005 empty 000000 000297
 7 | 0006 empty 000000 000270
 8 | 0007 empty 000000 000800
 9 | 0008 empty 000000 000390
10 | 0009 empty 000000 000803
11 | 


--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_trackingtrain_2-2.seqmap:
--------------------------------------------------------------------------------
 1 | 0010 empty 000000 000294
 2 | 0011 empty 000000 000373
 3 | 0012 empty 000000 000078
 4 | 0013 empty 000000 000340
 5 | 0014 empty 000000 000106
 6 | 0015 empty 000000 000376
 7 | 0016 empty 000000 000209
 8 | 0017 empty 000000 000145
 9 | 0018 empty 000000 000339
10 | 0019 empty 000000 001059
11 | 0020 empty 000000 000837
12 | 


--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_trackingval_half.seqmap:
--------------------------------------------------------------------------------
 1 | 0000 empty 000000 000077
 2 | 0001 empty 000000 000224
 3 | 0002 empty 000000 000117
 4 | 0003 empty 000000 000072
 5 | 0004 empty 000000 000157
 6 | 0005 empty 000000 000149
 7 | 0006 empty 000000 000135
 8 | 0007 empty 000000 000400
 9 | 0008 empty 000000 000195
10 | 0009 empty 000000 000402
11 | 0010 empty 000000 000147
12 | 0011 empty 000000 000187
13 | 0012 empty 000000 000039
14 | 0013 empty 000000 000170
15 | 0014 empty 000000 000053
16 | 0015 empty 000000 000188
17 | 0016 empty 000000 000105
18 | 0017 empty 000000 000073
19 | 0018 empty 000000 000170
20 | 0019 empty 000000 000530
21 | 0020 empty 000000 000419
22 | 


--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/mailpy.py:
--------------------------------------------------------------------------------
 1 | class Mail:
 2 |     """ Dummy class to print messages without sending e-mails"""
 3 |     def __init__(self,mailaddress):
 4 |         pass
 5 |     def msg(self,msg):
 6 |         print(msg)
 7 |     def finalize(self,success,benchmark,sha_key,mailaddress=None):
 8 |         if success:
 9 |             print("Results for %s (benchmark: %s) sucessfully created" % (benchmark,sha_key))
10 |         else:
11 |             print("Creating results for %s (benchmark: %s) failed" % (benchmark,sha_key))
12 | 
13 | 


--------------------------------------------------------------------------------
/src/tools/eval_motchallenge.py:
--------------------------------------------------------------------------------
  1 | """py-motmetrics - metrics for multiple object tracker (MOT) benchmarking.
  2 | Christoph Heindl, 2017
  3 | https://github.com/cheind/py-motmetrics
  4 | Modified by Xingyi Zhou
  5 | """
  6 | 
  7 | import argparse
  8 | import glob
  9 | import os
 10 | import logging
 11 | import motmetrics as mm
 12 | import pandas as pd
 13 | from collections import OrderedDict
 14 | from pathlib import Path
 15 | 
 16 | def parse_args():
 17 |     parser = argparse.ArgumentParser(description="""
 18 | Compute metrics for trackers using MOTChallenge ground-truth data.
 19 | Files
 20 | -----
 21 | All file content, ground truth and test files, have to comply with the
 22 | format described in 
 23 | Milan, Anton, et al. 
 24 | "Mot16: A benchmark for multi-object tracking." 
 25 | arXiv preprint arXiv:1603.00831 (2016).
 26 | https://motchallenge.net/
 27 | Structure
 28 | ---------
 29 | Layout for ground truth data
 30 |     <GT_ROOT>/<SEQUENCE_1>/gt/gt.txt
 31 |     <GT_ROOT>/<SEQUENCE_2>/gt/gt.txt
 32 |     ...
 33 | Layout for test data
 34 |     <TEST_ROOT>/<SEQUENCE_1>.txt
 35 |     <TEST_ROOT>/<SEQUENCE_2>.txt
 36 |     ...
 37 | Sequences of ground truth and test will be matched according to the `<SEQUENCE_X>`
 38 | string.""", formatter_class=argparse.RawTextHelpFormatter)
 39 | 
 40 |     parser.add_argument('groundtruths', type=str, help='Directory containing ground truth files.')   
 41 |     parser.add_argument('tests', type=str, help='Directory containing tracker result files')
 42 |     parser.add_argument('--gt_type', type=str, default='')
 43 |     parser.add_argument('--eval_official', action='store_true')
 44 |     parser.add_argument('--loglevel', type=str, help='Log level', default='info')
 45 |     parser.add_argument('--fmt', type=str, help='Data format', default='mot15-2D')
 46 |     parser.add_argument('--solver', type=str, help='LAP solver to use')
 47 |     return parser.parse_args()
 48 | 
 49 | def compare_dataframes(gts, ts):
 50 |     accs = []
 51 |     names = []
 52 |     for k, tsacc in ts.items():
 53 |         if k in gts:            
 54 |             logging.info('Comparing {}...'.format(k))
 55 |             accs.append(mm.utils.compare_to_groundtruth(gts[k], tsacc, 'iou', distth=0.5))
 56 |             names.append(k)
 57 |         else:
 58 |             logging.warning('No ground truth for {}, skipping.'.format(k))
 59 | 
 60 |     return accs, names
 61 | 
 62 | if __name__ == '__main__':
 63 | 
 64 |     args = parse_args()
 65 | 
 66 |     loglevel = getattr(logging, args.loglevel.upper(), None)
 67 |     if not isinstance(loglevel, int):
 68 |         raise ValueError('Invalid log level: {} '.format(args.loglevel))        
 69 |     logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s - %(message)s', datefmt='%I:%M:%S')
 70 | 
 71 |     if args.solver:
 72 |         mm.lap.default_solver = args.solver
 73 | 
 74 |     gt_type = args.gt_type
 75 |     print('gt_type', gt_type)
 76 |     gtfiles = glob.glob(
 77 |       os.path.join(args.groundtruths, '*/gt/gt{}.txt'.format(gt_type)))
 78 |     print('gt_files', gtfiles)
 79 |     tsfiles = [f for f in glob.glob(os.path.join(args.tests, '*.txt')) if not os.path.basename(f).startswith('eval')]
 80 | 
 81 |     logging.info('Found {} groundtruths and {} test files.'.format(len(gtfiles), len(tsfiles)))
 82 |     logging.info('Available LAP solvers {}'.format(mm.lap.available_solvers))
 83 |     logging.info('Default LAP solver \'{}\''.format(mm.lap.default_solver))
 84 |     logging.info('Loading files.')
 85 |     
 86 |     gt = OrderedDict([(Path(f).parts[-3], mm.io.loadtxt(f, fmt=args.fmt, min_confidence=1)) for f in gtfiles])
 87 |     ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], mm.io.loadtxt(f, fmt=args.fmt)) for f in tsfiles])    
 88 | 
 89 |     mh = mm.metrics.create()    
 90 |     accs, names = compare_dataframes(gt, ts)
 91 |     
 92 |     logging.info('Running metrics')
 93 |     metrics = ['recall', 'precision', 'num_unique_objects', 'mostly_tracked', \
 94 |       'partially_tracked', 'mostly_lost', 'num_false_positives', 'num_misses', \
 95 |       'num_switches', 'num_fragmentations', 'mota', 'motp', 'num_objects']
 96 |     summary = mh.compute_many(
 97 |       accs, names=names, 
 98 |       metrics=metrics, generate_overall=True)
 99 |     # summary = mh.compute_many(accs, names=names, metrics=mm.metrics.motchallenge_metrics, generate_overall=True)
100 |     # print(mm.io.render_summary(
101 |     #   summary, formatters=mh.formatters, 
102 |     #   namemap=mm.io.motchallenge_metric_names))
103 |     div_dict = {
104 |         'num_objects': ['num_false_positives', 'num_misses', 
105 |           'num_switches', 'num_fragmentations'],
106 |         'num_unique_objects': ['mostly_tracked', 'partially_tracked', 
107 |           'mostly_lost']}
108 |     for divisor in div_dict:
109 |         for divided in div_dict[divisor]:
110 |             summary[divided] = (summary[divided] / summary[divisor])
111 |     fmt = mh.formatters
112 |     change_fmt_list = ['num_false_positives', 'num_misses', 'num_switches', 
113 |       'num_fragmentations', 'mostly_tracked', 'partially_tracked', 
114 |       'mostly_lost']
115 |     for k in change_fmt_list:
116 |         fmt[k] = fmt['mota']
117 |     print(mm.io.render_summary(
118 |       summary, formatters=fmt, 
119 |       namemap=mm.io.motchallenge_metric_names))
120 |     if args.eval_official:
121 |         metrics = mm.metrics.motchallenge_metrics + ['num_objects']
122 |         summary = mh.compute_many(
123 |         accs, names=names, 
124 |         metrics=metrics, generate_overall=True)
125 |         print(mm.io.render_summary(
126 |         summary, formatters=mh.formatters, 
127 |         namemap=mm.io.motchallenge_metric_names))
128 |         logging.info('Completed')
129 |     


--------------------------------------------------------------------------------
/src/tools/get_mot_17.sh:
--------------------------------------------------------------------------------
 1 | mkdir ../../data/mot17
 2 | cd ../../data/mot17
 3 | wget https://motchallenge.net/data/MOT17.zip
 4 | unzip MOT17.zip
 5 | rm MOT17.zip
 6 | mkdir annotations
 7 | mv MOT17/train .
 8 | mv MOT17/test .
 9 | rm -rf MOT17
10 | cd ../../src/tools/
11 | python convert_mot_to_coco.py
12 | python interp_mot.py
13 | python convert_mot_det_to_results.py


--------------------------------------------------------------------------------
/src/tools/interp_mot.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from collections import defaultdict
 3 | import pycocotools.coco as coco
 4 | import copy
 5 | 
 6 | SPLITS = ['train_half', 'train']
 7 | 
 8 | def interpolate(track, start_ann, start_ind, end_ann, end_ind, max_id, frames):
 9 |     print('interpolating from %d to %d' % (start_ind, end_ind))
10 |     start_box = start_ann['bbox']
11 |     end_box = end_ann['bbox']
12 |     len_occl = end_ind - start_ind
13 |     x_step = (end_box[0] - start_box[0]) / len_occl
14 |     y_step = (end_box[1] - start_box[1]) / len_occl
15 |     step = 1
16 |     for i in range(start_ind + 1, end_ind):
17 |         new_ann = copy.deepcopy(start_ann)
18 |         new_ann['bbox'][0] += step * x_step
19 |         new_ann['bbox'][1] += step * y_step
20 |         new_ann['occlusion'] = 0.01
21 |         new_ann['id'] = max_id
22 |         new_ann['image_id'] = frames[i]['id']
23 |         max_id += 1
24 |         track[i] = new_ann
25 |         step += 1
26 | 
27 |     return max_id
28 | 
29 | def process_video(frames, dataset, max_id):
30 |     tracks = {}
31 |     for i, frame in enumerate(frames):
32 |         invis_count = 0
33 |         occl_count = 0
34 |         ann_ids = dataset.getAnnIds(imgIds=[frame['id']])
35 |         anns = dataset.loadAnns(ids=ann_ids)
36 |         for ann in anns:
37 |             track_id = ann['track_id']
38 |             ann['occlusion'] = 1
39 |             if track_id not in tracks:
40 |                 tracks[track_id] = [None] * len(frames)
41 |             tracks[track_id][i] = ann
42 | 
43 |     for track_id in tracks.keys():
44 |         track = tracks[track_id]
45 |         last_seen = None
46 |         start_ind = None
47 |         in_occl = False 
48 |         for i, ann in enumerate(track):
49 |             if ann is not None and in_occl:
50 |                 max_id = interpolate(track, last_seen, start_ind, ann, i, max_id, frames)
51 |                 in_occl = False
52 | 
53 |             if ann is not None:
54 |                 last_seen = ann
55 |                 start_ind = i
56 |             if ann is None and last_seen is not None:
57 |                 in_occl = True
58 | 
59 |     annotations = []
60 |     for track_id in tracks.keys():
61 |         track = tracks[track_id]
62 |         for i, ann in enumerate(track):
63 |             if ann is not None:
64 |                 annotations.append(ann)         
65 | 
66 |     return annotations, max_id
67 |     
68 | 
69 | if __name__ == '__main__':
70 |     for split in SPLITS:
71 |         data = json.load(open('../../data/mot17/annotations/%s.json' % split))
72 |         coco_anns = coco.COCO('../../data/mot17/annotations/%s.json' % split)
73 | 
74 |         max_id = -1
75 |         for ann in data['annotations']:
76 |             if ann['id'] > max_id:
77 |                 max_id = ann['id']
78 | 
79 |         max_id += 1
80 | 
81 |         video_to_images = defaultdict(list)
82 |         video_to_image_map = {}
83 |         for image in coco_anns.dataset['images']:
84 |             video_to_images[image['video_id']].append(image)
85 | 
86 |         for vid_id in video_to_images.keys():
87 |             images = video_to_images[vid_id]
88 |             images.sort(key=lambda x: x['frame_id'])
89 |             video_to_images[vid_id] = images
90 | 
91 |         annotations = []
92 |         for vid_id in video_to_images.keys():
93 |             annotations_vid, max_id = process_video(video_to_images[vid_id], coco_anns, max_id)
94 |             annotations.extend(annotations_vid)
95 | 
96 |         data['annotations'] = annotations
97 | 
98 |         json.dump(data, open('../../data/mot17/annotations/%s_interp.json' % split, 'w'))


--------------------------------------------------------------------------------
/src/tools/remove_optimizers.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | IN_PATH = '../../centertrack_models/'
 4 | OUT_PATH = '../../models/'
 5 | REMOVE_KEYS = ['base.fc']
 6 | 
 7 | if __name__ == '__main__':
 8 |   models = sorted(os.listdir(IN_PATH))
 9 |   for model in models:
10 |     model_path = IN_PATH + model
11 |     print(model)
12 |     data = torch.load(model_path)
13 |     state_dict = data['state_dict']
14 |     keys = state_dict.keys()
15 |     delete_keys = []
16 |     for k in keys:
17 |       should_delete = False
18 |       for remove_key in REMOVE_KEYS:
19 |         if remove_key in k:
20 |           should_delete = True
21 |       if should_delete:
22 |         delete_keys.append(k)
23 |     for k in delete_keys:
24 |       print('delete ', k)
25 |       del state_dict[k]
26 |     out_data = {'epoch': data['epoch'], 'state_dict': state_dict}
27 |     torch.save(out_data, OUT_PATH + model)
28 | 


--------------------------------------------------------------------------------
/src/tools/vis_tracking_kitti.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import os
 4 | import glob
 5 | import sys
 6 | from collections import defaultdict
 7 | from pathlib import Path
 8 | 
 9 | DATA_PATH = '../../data/kitti_tracking/'
10 | IMG_PATH = DATA_PATH + 'data_tracking_image_2/testing/image_02/'
11 | SAVE_VIDEO = False
12 | IS_GT = False
13 | 
14 | cats = ['Pedestrian', 'Car', 'Cyclist']
15 | cat_ids = {cat: i for i, cat in enumerate(cats)}
16 | COLORS = [(255, 0, 255), (122, 122, 255), (255, 0, 0)]
17 | 
18 | def draw_bbox(img, bboxes, c=(255, 0, 255)):
19 |   for bbox in bboxes:
20 |     color = COLORS[int(bbox[5])]
21 |     cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), 
22 |       (int(bbox[2]), int(bbox[3])), 
23 |       color, 2, lineType=cv2.LINE_AA)
24 |     ct = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
25 |     txt = '{}'.format(int(bbox[4]))
26 |     cv2.putText(img, txt, (int(ct[0]), int(ct[1])), 
27 |                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, 
28 |                 color, thickness=1, lineType=cv2.LINE_AA)
29 | 
30 | if __name__ == '__main__':
31 |   seqs = os.listdir(IMG_PATH)
32 |   if SAVE_VIDEO:
33 |     save_path = sys.argv[1][:sys.argv[1].rfind('/res')] + '/video'
34 |     if not os.path.exists(save_path):
35 |       os.mkdir(save_path)
36 |     print('save_video_path', save_path)
37 |   for seq in sorted(seqs):
38 |     print('seq', seq)
39 |     if '.DS_Store' in seq:
40 |       continue
41 |     # if SAVE_VIDEO:
42 |     #   fourcc = cv2.VideoWriter_fourcc(*'XVID')
43 |     #   video = cv2.VideoWriter(
44 |     #     '{}/{}.avi'.format(save_path, seq),fourcc, 10.0, (1024, 750))
45 |     
46 |     
47 |     preds = {}
48 |     for K in range(1, len(sys.argv)):
49 |       pred_path = sys.argv[K] + '/{}.txt'.format(seq)
50 |       pred_file = open(pred_path, 'r')
51 |       preds[K] = defaultdict(list)
52 |       for line in pred_file:
53 |         tmp = line[:-1].split(' ')
54 |         frame_id = int(tmp[0])
55 |         track_id = int(tmp[1])
56 |         cat_id = cat_ids[tmp[2]]
57 |         bbox = [float(tmp[6]), float(tmp[7]), float(tmp[8]), float(tmp[9])]
58 |         score = float(tmp[17])
59 |         preds[K][frame_id].append(bbox + [track_id, cat_id, score])
60 | 
61 |     images_path = '{}/{}/'.format(IMG_PATH, seq)
62 |     images = os.listdir(images_path)
63 |     num_images = len([image for image in images if 'png' in image])
64 |     
65 |     for i in range(num_images):
66 |       frame_id = i
67 |       file_path = '{}/{:06d}.png'.format(images_path, i)
68 |       img = cv2.imread(file_path)
69 |       for K in range(1, len(sys.argv)):
70 |         img_pred = img.copy()
71 |         draw_bbox(img_pred, preds[K][frame_id])
72 |         cv2.imshow('pred{}'.format(K), img_pred)
73 |       cv2.waitKey()
74 |       # if SAVE_VIDEO:
75 |       #   video.write(img_pred)
76 |     # if SAVE_VIDEO:
77 |     #   video.release()
78 | 


--------------------------------------------------------------------------------
/src/tools/vis_tracking_mot.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import os
 4 | import glob
 5 | import sys
 6 | from collections import defaultdict
 7 | from pathlib import Path
 8 | 
 9 | GT_PATH = '../../data/mot17/test/'
10 | IMG_PATH = GT_PATH
11 | SAVE_VIDEO = True
12 | RESIZE = 2
13 | IS_GT = False
14 | 
15 | def draw_bbox(img, bboxes, c=(255, 0, 255)):
16 |   for bbox in bboxes:
17 |     cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), 
18 |       (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])), 
19 |       c, 2, lineType=cv2.LINE_AA)
20 |     ct = [bbox[0] + bbox[2] / 2, bbox[1] + bbox[3] / 2]
21 |     txt = '{}'.format(bbox[4])
22 |     cv2.putText(img, txt, (int(ct[0]), int(ct[1])), 
23 |                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, 
24 |                 (255, 122, 255), thickness=1, lineType=cv2.LINE_AA)
25 | 
26 | if __name__ == '__main__':
27 |   seqs = os.listdir(GT_PATH)
28 |   if SAVE_VIDEO:
29 |     save_path = sys.argv[1][:sys.argv[1].rfind('/res')] + '/video'
30 |     if not os.path.exists(save_path):
31 |       os.mkdir(save_path)
32 |     print('save_video_path', save_path)
33 |   for seq in sorted(seqs):
34 |     print('seq', seq)
35 |     # if len(sys.argv) > 2 and not sys.argv[2] in seq:
36 |     #   continue
37 |     if '.DS_Store' in seq:
38 |       continue
39 |     # if SAVE_VIDEO:
40 |     #   fourcc = cv2.VideoWriter_fourcc(*'XVID')
41 |     #   video = cv2.VideoWriter(
42 |     #     '{}/{}.avi'.format(save_path, seq),fourcc, 10.0, (1024, 750))
43 |     seq_path = '{}/{}/'.format(GT_PATH, seq)
44 |     if IS_GT:
45 |       ann_path = seq_path + 'gt/gt.txt'
46 |     else:
47 |       ann_path = seq_path + 'det/det.txt'
48 |     anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',')
49 |     print('anns shape', anns.shape)
50 |     image_to_anns = defaultdict(list)
51 |     for i in range(anns.shape[0]):
52 |       if (not IS_GT) or (int(anns[i][6]) == 1 and float(anns[i][8]) >= 0.25):
53 |         frame_id = int(anns[i][0])
54 |         track_id = int(anns[i][1])
55 |         bbox = (anns[i][2:6] / RESIZE).tolist()
56 |         image_to_anns[frame_id].append(bbox + [track_id])
57 |     
58 |     image_to_preds = {}
59 |     for K in range(1, len(sys.argv)):
60 |       image_to_preds[K] = defaultdict(list)
61 |       pred_path = sys.argv[K] + '/{}.txt'.format(seq)
62 |       try:
63 |         preds = np.loadtxt(pred_path, dtype=np.float32, delimiter=',')
64 |       except:
65 |         preds = np.loadtxt(pred_path, dtype=np.float32, delimiter=' ')
66 |       for i in range(preds.shape[0]):
67 |         frame_id = int(preds[i][0])
68 |         track_id = int(preds[i][1])
69 |         bbox = (preds[i][2:6] / RESIZE).tolist()
70 |         image_to_preds[K][frame_id].append(bbox + [track_id])
71 |     
72 |     img_path = seq_path + 'img1/'
73 |     images = os.listdir(img_path)
74 |     num_images = len([image for image in images if 'jpg' in image])
75 |     
76 |     for i in range(num_images):
77 |       frame_id = i + 1
78 |       file_name = '{}/img1/{:06d}.jpg'.format(seq, i + 1)
79 |       file_path = IMG_PATH + file_name
80 |       img = cv2.imread(file_path)
81 |       if RESIZE != 1:
82 |         img = cv2.resize(img, (img.shape[1] // RESIZE, img.shape[0] // RESIZE))
83 |       for K in range(1, len(sys.argv)):
84 |         img_pred = img.copy()
85 |         draw_bbox(img_pred, image_to_preds[K][frame_id])
86 |         cv2.imshow('pred{}'.format(K), img_pred)
87 |       draw_bbox(img, image_to_anns[frame_id])
88 |       cv2.imshow('gt', img)
89 |       cv2.waitKey()
90 |       # if SAVE_VIDEO:
91 |       #   video.write(img_pred)
92 |     # if SAVE_VIDEO:
93 |     #   video.release()
94 | 


--------------------------------------------------------------------------------
/tao/.gitignore:
--------------------------------------------------------------------------------
 1 | venv
 2 | tao.egg-info
 3 | .ipynb_checkpoints
 4 | cache
 5 | .vscode
 6 | tao/data/s3_cache
 7 | .mypy_cache
 8 | debug/
 9 | _internal_links.yaml
10 | _pull_internal_changes.py
11 | __pycache__
12 | 


--------------------------------------------------------------------------------
/tao/LICENSE:
--------------------------------------------------------------------------------
 1 | NOTE: This license applies to the code in this repository.
 2 | 
 3 | MIT License
 4 | 
 5 | Copyright (c) 2020 TAO Dataset
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/tao/README.md:
--------------------------------------------------------------------------------
 1 | # TAO: A Large-Scale Benchmark for Tracking Any Object
 2 | 
 3 | [[Paper](https://arxiv.org/abs/2005.10356)] [[Website](http://taodataset.org)]
 4 | 
 5 | [Achal Dave](http://www.achaldave.com/), [Tarasha Khurana](http://www.cs.cmu.edu/~tkhurana/), [Pavel Tokmakov](https://pvtokmakov.github.io/home/), [Cordelia Schmid](https://thoth.inrialpes.fr/~schmid/), [Deva Ramanan](http://www.cs.cmu.edu/~deva/)
 6 | 
 7 | ## Latest updates
 8 | 
 9 | - \[2020.07.10\]: The ECCV challenge is now live at the
10 |   [MOTChallenge website](https://motchallenge.net/results/ECCV_2020_TAO_Challenge/)!
11 |   See [here](docs/challenge.md) for more details.
12 | - \[2020.07.02\]: TAO was accepted to ECCV '20 as a spotlight presentation!
13 | - \[2020.02.20\]: We will be hosting a workshop and challenge at ECCV'20. See [here](http://taodataset.org/workshop/) for details.
14 | 
15 | ## Setup
16 | 
17 | 1. Clone this repo
18 |     ```
19 |     git clone https://github.com/TAO-Dataset/tao
20 |     ```
21 | 1. Install TAO toolkit:
22 |     ```
23 |     pip install git+https://github.com/TAO-Dataset/tao
24 |     ```
25 | 
26 | ## Download dataset
27 | 
28 | See [download instructions](./docs/download.md).
29 | 
30 | ## Challenge
31 | 
32 | We will be hosting a challenge at our
33 | [ECCV '20 workshop](taodataset.org/workshop/). See [here](docs/challenge.md) for details.
34 | 
35 | ## Evaluation
36 | 
37 | See [evaluation information](./docs/evaluation.md). Contains information on submitting to the challenge server.
38 | 
39 | ## Run baseline trackers
40 | 
41 | See [tracker instructions](./docs/trackers.md).
42 | 
43 | ## Questions?
44 | 
45 | Please see the [faqs](./docs/faqs.md) to check if we've anticipated your
46 | question. If not, for questions about TAO usage or the challenge, please use
47 | this Google Group: https://groups.google.com/forum/#!forum/tao-dataset/
48 | 
49 | For bug reports regarding the toolkit, annotations, or image download, please
50 | file an issue in this repository.
51 | 
52 | 


--------------------------------------------------------------------------------
/tao/docs/challenge.md:
--------------------------------------------------------------------------------
 1 | # TAO ECCV'20 Multi-Object Tracking Challenge
 2 | 
 3 | We are excited to host a challenge on TAO as part of our
 4 | [ECCV workshop](http://taodataset.org/workshop/).
 5 | The challenge is hosted on the [motchallenge.net](https://motchallenge.net/) website:
 6 | [link](https://motchallenge.net/results/ECCV_2020_TAO_Challenge/).
 7 | 
 8 | ## Important Dates
 9 | 
10 | - July 10: Challenge released!
11 | - August 16: Challenge closes, winners contacted to prepare presentation for ECCV workshop.
12 | - August 23: ECCV workshop date. Challenge results announced, along with
13 | presentations by challenge submission authors.
14 | 
15 | ## Prizes
16 | 
17 | We will have the following prizes for the winning entries!
18 | 
19 | - First place: $1,500 cash prize, presentation at ECCV workshop.
20 | - Second place: $500 cash prize, presentation at ECCV workshop.
21 | - Honorable mention(s): $250 cash prize, presentation at ECCV workshop.
22 | 
23 | ## Protocol
24 | 
25 | - **Evaluation data**: The ECCV '20 challenge evaluates multi-object tracking
26 |   on the TAO test set.
27 | 
28 | - **Training data**: We do not impose any restrictions on the training data used for
29 |   submissions, except that the TAO test videos may not be used for training in any way.
30 |   This explicitly precludes, for example, unsupervised training on the TAO test set.
31 |   However, the TAO validation videos may be used for training in a supervised or 
32 |   unsupervised manner.
33 |   We encourage training on the LVIS v0.5 dataset, which provides
34 |   ample detection training data for categories evaluated in TAO.
35 | 
36 |     - **WARNING**: The TAO test set contains sequences from existing datasets, which
37 |         must be excluded from training. These sequences can be seen from the test
38 |         json. In particular, a number of LaSOT training sequences are present in the TAO
39 |         test set.
40 | 
41 | - For submission instructions, see [evaluation.md](evaluation.md).
42 | 
43 | 
44 | ## FAQs
45 | 
46 | Please see [faqs.md](./faqs.md).
47 | 


--------------------------------------------------------------------------------
/tao/docs/detector_train.md:
--------------------------------------------------------------------------------
 1 | # Training your own detectors
 2 | 
 3 | To train your own detectors, follow the steps below:
 4 | 
 5 | 1. Download the LVIS v0.5 annotations and (LVIS v0.5 + COCO) training
 6 |    annotations from
 7 |    [here](https://drive.google.com/file/d/1rPSSIVSer7pweyJS-uqAfIF59uZVJ0Nx/view),
 8 |    and extract them to `./data/detectron_datasets/lvis-coco`.
 9 | 
10 | 1. Setup [detectron2](https://github.com/facebookresearch/detectron2).
11 | 
12 | 1. Download the COCO `train2017` and `val2017` datasets, and link them to:
13 | 
14 |     ```
15 |     ./data/detectron_datasets/lvis-coco/train2017
16 |     ./data/detectron_datasets/lvis-coco/val2017
17 |     ```
18 | 
19 | 1. Use the provided `./scripts/detectors/detectron2_train_net.py` script to
20 |    train your detector.
21 | 
22 |    ```
23 |    python scripts/detectors/detectron2_train_net.py \
24 |        --num-gpus 8 \
25 |        --config-file ./data/configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml \
26 |        DATASETS.TRAIN "('lvis_v0.5_coco_2017_train', )" \
27 |        OUTPUT_DIR /path/to/output-dir
28 |    ```
29 | 
30 |    This script was tested with detectron2 commit id
31 |    fd87af71eebc660dde2f50e4693869bb04f66015.
32 | 
33 | 


--------------------------------------------------------------------------------
/tao/docs/download.md:
--------------------------------------------------------------------------------
 1 | # Download TAO
 2 | 
 3 | TAO contains videos from 7 datasets: ArgoVerse, AVA, BDD-100k, Charades, HACS,
 4 | LaSOT, and YFCC-100M.
 5 | 
 6 | 1. Download TAO train, val and test sets from the MOTChallenge
 7 |    [download page](https://motchallenge.net/tao_download.php).
 8 | 
 9 | 1. Uncompress each downloaded file in a single directory, which we will refer to as
10 |    `$TAO_DIR`.
11 |    <details><summary>The directory should have the following structure:</summary><p>
12 | 
13 |    ```bash
14 |     └── frames
15 |        └── train
16 |           ├── ArgoVerse
17 |           ├── BDD
18 |           ├── Charades
19 |           ├── HACS
20 |           ├── LaSOT
21 |           └── YFCC100M
22 |     ```
23 |     </p></details>
24 | 
25 | 1. Download annotations:
26 | 
27 |     ```bash
28 |     python scripts/download/download_annotations.py $TAO_DIR --split train
29 |     ```
30 | 
31 | 1. Verify that the dataset was downloaded correctly:
32 | 
33 |     ```bash
34 |     python scripts/download/verify.py $TAO_ROOT --split train
35 |     ```
36 | ## Request video deletion
37 | 
38 | If you would like to request a video be deleted from TAO (e.g., because you are
39 | featured in the video or you own the rights), please email me at
40 | achald@cs.cmu.edu.
41 | 
42 | 


--------------------------------------------------------------------------------
/tao/docs/download_hacs_alt.md:
--------------------------------------------------------------------------------
 1 | Download and extract from YouTube.
 2 | 
 3 | ```
 4 | python scripts/download/download_hacs.py $TAO_ROOT --split train
 5 | ```
 6 | 
 7 | You can ignore YoutubeDL errors that are printed by this script (e.g., Video not
 8 | available). Videos that could not be downloaded will be collected in
 9 | `$TAO_ROOT/hacs_missing/missing.txt`. You can request the original HACS videos
10 | by filling out these forms: https://forms.gle/hZD612H5TXDQDozv9
11 | 


--------------------------------------------------------------------------------
/tao/docs/evaluation.md:
--------------------------------------------------------------------------------
  1 | # Evaluating Trackers
  2 | 
  3 | ## Results format
  4 | 
  5 | The TAO toolkit expects results in the same format as COCO, but with additional
  6 | `track_id` and `video_id` fields. Specifically, `results.json` should have the
  7 | following format:
  8 | 
  9 | ```
 10 | [{
 11 |     "image_id" : int,
 12 |     "category_id" : int,
 13 |     "bbox" : [x,y,width,height],
 14 |     "score" : float,
 15 |     "track_id": int,
 16 |     "video_id": int
 17 | }]
 18 | ```
 19 | 
 20 | 
 21 | ## Evaluation (toolkit)
 22 | 
 23 | The TAO toolkit provides code for evaluating tracker results.
 24 | 
 25 | ```python
 26 | import logging
 27 | from tao.toolkit.tao import TaoEval
 28 | 
 29 | # TAO uses logging to print results. Make sure logging is set to show INFO
 30 | # messages, or you won't see any evaluation results.
 31 | logging.setLevel(logging.INFO)
 32 | tao_eval = TaoEval('/path/to/annotations.json', '/path/to/results.json')
 33 | tao_eval.run()
 34 | tao_eval.print_results()
 35 | ```
 36 | 
 37 | ## Evaluation (command-line)
 38 | 
 39 | TAO also comes with a higher-level `evaluate.py` script which incorporates
 40 | various additional features for evaluation.
 41 | 
 42 | In all the examples below, let -
 43 | - `$ANNOTATIONS` be the `/path/to/annotations.json`
 44 | - `$RESULTS` be the `/path/to/results.json`
 45 | - `$OUTPUT_DIR` be the `/path/to/output/logdir`.
 46 | 
 47 | We demonstrate some features below; for more, take a look at the config
 48 | description in [`./tao/utils/evaluation.py`](/tao/utils/evaluation.py).
 49 | 
 50 | - Simple evaluation, with logging to an output directory
 51 | 
 52 |     ```bash
 53 |     python scripts/evaluate.py \
 54 |         $ANNOTATIONS $RESULTS --output-dir $OUTPUT_DIR \
 55 |     ```
 56 | 
 57 | - <details><summary>Classification oracle</summary><p>
 58 | 
 59 |     ```bash
 60 |     python scripts/evaluate.py \
 61 |         $ANNOTATIONS $RESULTS --output-dir $OUTPUT_DIR \
 62 |         --config-updates ORACLE.TYPE class
 63 |     ```
 64 |     </p>
 65 | 
 66 | - <details><summary>Track oracle (for linking detections)</summary><p>
 67 | 
 68 |     ```bash
 69 |     python scripts/evaluate.py \
 70 |         $ANNOTATIONS $RESULTS --output-dir $OUTPUT_DIR \
 71 |         --config-updates ORACLE.TYPE track
 72 |     ```
 73 |     </p>
 74 | 
 75 | - <details><summary>Evaluate MOTA</summary><p>
 76 | 
 77 |     ```bash
 78 |     python scripts/evaluate.py \
 79 |         $ANNOTATIONS $RESULTS --output-dir $OUTPUT_DIR \
 80 |         --config-updates MOTA.ENABLED True
 81 |     ```
 82 |     </p>
 83 | 
 84 | - <details><summary>Evaluate at (3D) IoU threshold of 0.9</summary><p>
 85 | 
 86 |     ```bash
 87 |     python scripts/evaluate.py \
 88 |         $ANNOTATIONS $RESULTS --output-dir $OUTPUT_DIR \
 89 |         --config-updates EVAL_IOUS "[0.9]"
 90 |     ```
 91 |     </p>
 92 | 
 93 | - <details><summary>Evaluate at multiple (3D) IoU thresholds</summary><p>
 94 | 
 95 |     ```bash
 96 |     python scripts/evaluate.py \
 97 |         $ANNOTATIONS $RESULTS --output-dir $OUTPUT_DIR \
 98 |         --config-updates \
 99 |             EVAL_IOUS "[0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]"
100 |     ```
101 |     </p>
102 | 
103 | - <details><summary>Category agnostic evaluation</summary><p>
104 | 
105 |     ```bash
106 |     python scripts/evaluate.py \
107 |         $ANNOTATIONS $RESULTS --output-dir $OUTPUT_DIR \
108 |         --config-updates CATEGORY_AGNOSTIC True
109 |     ```
110 |     </p>
111 | 
112 | - <details><summary>Report evaluation by source dataset</summary><p>
113 | 
114 |     ```bash
115 |     python scripts/evaluate.py \
116 |         $ANNOTATIONS $RESULTS --output-dir $OUTPUT_DIR \
117 |         --config-updates EVAL_BY_DATASET True
118 |     ```
119 |     </p>
120 | 
121 | ## Evaluation (challenge server)
122 | 
123 | For local evaluation, evaluate with steps above on the released validation
124 | set. When submitting test set results to the
125 | [challenge server](https://motchallenge.net/login/), follow same format for
126 | json files as mentioned above.
127 | 
128 | The server requires you to submit train, validation and test set results.
129 | We request you to submit these three json files for facilitating progress in
130 | the tracking community. However, if absolutely necessary, submit empty json
131 | files for train and validation. Create a .zip archive that deflates into the
132 | following files
133 | 
134 | ```bash
135 | ./TAO_test.json
136 | ./TAO_train.json
137 | ./TAO_val.json
138 | ```
139 | 


--------------------------------------------------------------------------------
/tao/docs/faqs.md:
--------------------------------------------------------------------------------
 1 | # Frequently asked questions
 2 | 
 3 | 1. Why does the training set only contain 216 LVIS categories?
 4 | 
 5 |     TAO contains a total of 482 LVIS categories. However, not all categories
 6 |     are present in the train, val, and test sets. Instead, we encourage researchers to
 7 |     train detectors on the LVIS v0.5 dataset, which contains a superset of
 8 |     the 482 categories, and trackers on existing single-object tracking datasets.
 9 |     TAO is primarily a benchmark dataset, but we provide a small set of training videos
10 |     for tuning trackers.
11 | 
12 | 1. Why do the LVIS v1 dataset categories not match with the TAO categories?
13 | 
14 |     Tao was constructed to be aligned with the LVIS v0.5 dataset. The LVIS v1 update
15 |     changes the category names and ids in the LVIS dataset. We are looking into updating
16 |     TAO to use the LVIS v1 categories. For now, you may either train on the LVIS v0.5
17 |     dataset, or construct your own mapping from LVIS v1 categories to TAO categories
18 |     using the 'synset' field.
19 | 
20 | 1. Is there any restriction on which data I can train on?
21 | 
22 |     The only restriction is that you may not train on videos in the TAO test set.
23 |     You can see a list of videos in the TAO test set from the test set json file
24 |     shared with the annotations. In particular, a number of LaSOT training videos
25 |     are in the TAO test set, and must not be used for training.
26 | 
27 |     Apart from this, there are currently no restrictions on training datasets.
28 | 
29 | 1. Are only LVIS categories evaluated in TAO?
30 | 
31 |     Currently (as of July 2020), we are focusing on the LVIS categories within TAO.
32 |     The ECCV challenge will only evaluate on these categories. We intend to formalize
33 |     a protocol for evaluation on the non-LVIS categories later this year.
34 | 
35 | 1. Is there a single-object tracking track in the ECCV '20 challenge?
36 | 
37 |     Currently, there is no single-object / user-initialized tracking track in
38 |     the challenge. We are looking into ways to host a challenge for user-initialized
39 |     tracking on held out data (e.g., by asking researchers to submit code which we run
40 |     locally on the held out test set). If you have any suggestions or
41 |     feedback, please contact us!
42 | 


--------------------------------------------------------------------------------
/tao/docs/manual_download.md:
--------------------------------------------------------------------------------
 1 | These are alternative instructions that mimic the helper script in
 2 | [scripts/download/download_helper.py](/scripts/download/download_helper.py),
 3 | in case the helper script causes issues. Please read
 4 | [./download.md](./download.md) first.
 5 | 
 6 | 1. Download TAO annotations to $TAO_DIR
 7 | 
 8 |     ```
 9 |     wget 'https://github.com/TAO-Dataset/annotations/archive/v1.0.tar.gz'
10 |     tar xzvf v1.0.tar.gz
11 |     mv annotations-v1.0 annotations
12 |     ```
13 | 
14 | 1. Extract frames from BDD, Charades, HACS and YFCC-100M.
15 | 
16 |     ```
17 |     python scripts/download/extract_frames.py $TAO_ROOT --split train
18 |     ```
19 |    <details><summary>After this, your directory should have the following structure:</summary><p>
20 | 
21 |     ```
22 |     ├── frames
23 |     │  └── train
24 |     │     ├── ArgoVerse
25 |     │     ├── BDD
26 |     │     ├── Charades
27 |     │     ├── HACS
28 |     │     ├── LaSOT
29 |     │     └── YFCC100M
30 |     └── videos
31 |         └── train
32 |             ├── BDD
33 |             ├── Charades
34 |             ├── HACS
35 |             └── YFCC100M
36 |     ```
37 |     </details>
38 | 
39 | 1. Download and extract frames from AVA:
40 | 
41 |     ```
42 |     python scripts/download/download_ava.py $TAO_ROOT --split train
43 |     ```
44 | 
45 | 1. Finally, you can verify that you have downloaded TAO.
46 | 
47 |    <details><summary>Expected directory structure</summary><p>
48 | 
49 |     ```
50 |     ├── frames
51 |     │  └── train
52 |     │     ├── ArgoVerse
53 |     │     ├── AVA
54 |     │     ├── BDD
55 |     │     ├── Charades
56 |     │     ├── HACS
57 |     │     ├── LaSOT
58 |     │     └── YFCC100M
59 |     └── videos
60 |         └── train
61 |             ├── BDD
62 |             ├── Charades
63 |             └── YFCC100M
64 |     ```
65 |     </details>
66 | 
67 |     You can run the following command to check that TAO was properly extracted:
68 | 
69 |     ```
70 |     python scripts/download/verify.py $TAO_ROOT --split train
71 |     ```
72 | 


--------------------------------------------------------------------------------
/tao/docs/trackers.md:
--------------------------------------------------------------------------------
 1 | # Running trackers on TAO
 2 | 
 3 | ## SORT
 4 | 
 5 | Here, we will reproduce a simpler variant of the SORT result presented in TAO.
 6 | Specifically, we will reproduce the following row from Table 13 in our
 7 | supplementary material.
 8 | 
 9 | | NMS Thresh | Det / image | Det score | `max_age` | `min_hits` | `min_iou` | Track mAP |
10 | | ---------- | ----------- | --------- | --------- | ---------- | --------- | --------- |
11 | | 0.5 | 300 | 0.0005 | 100 | 1 | 0.1 | 11.3 |
12 | 
13 | ### Run detectors
14 | 
15 | 1. Download and decompress the detection model and config from [here](https://drive.google.com/file/d/13BdXSQDqK0t-LrF2CrwJtT9lFc48u83H/view?usp=sharing) or [here](https://cdn3.vision.in.tum.de/~tao/baselines/detector-r101-fpn-1x-lvis-coco.zip) to
16 |    `$DETECTRON_MODEL`.
17 | 
18 |     If you would like to re-train the detector, please see [this doc](./detector_train.md).
19 | 
20 | 1. Setup and install
21 |    [detectron2](https://github.com/facebookresearch/detectron2)
22 | 1. Run the detector on TAO:
23 | 
24 |     ```
25 |     python scripts/detectors/detectron2_infer.py \
26 |         --gpus 0 1 2 3 \
27 |         --root $TAO_ROOT/train \
28 |         --output /path/to/detectron2/output/train \
29 |         --config $DETECTRON_MODEL/config.yaml \
30 |         --opts MODEL.WEIGHTS $DETECTRON_MODEL/model_final.pth
31 |     ```
32 | 
33 |     On a machine with 4 2080TIs, the above took about 8 hours to run on the
34 |     train set.
35 | 
36 | ### Run [SORT](https://github.com/abewley/sort)
37 | 
38 | ```
39 | python scripts/trackers/sort/track.py \
40 |     --detections-dir /path/to/detectron2/output/train \
41 |     --annotations $TAO_ROOT/annotations/train.json \
42 |     --output-dir /path/to/sort/output/train \
43 |     --workers 8
44 | ```
45 | 
46 | On our machine, the above took about 11 hours to run on the train set.
47 | 
48 | ### Evaluate
49 | 
50 | ```
51 | python scripts/evaluation/evaluate.py \
52 |     $TAO_ROOT/annotations/train.json \
53 |     /path/to/sort/output/train/results.json
54 | ```
55 | 
56 | This should report an AP of 11.3.
57 | 


--------------------------------------------------------------------------------
/tao/scripts/detectors/detectron2_infer.py:
--------------------------------------------------------------------------------
  1 | # Modified from detectron2/demo/demo.py
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  3 | 
  4 | import argparse
  5 | import logging
  6 | import os
  7 | import pickle
  8 | from pathlib import Path
  9 | 
 10 | import numpy as np
 11 | import torch
 12 | from detectron2.config import get_cfg
 13 | from detectron2.data.detection_utils import read_image
 14 | from detectron2.engine.defaults import DefaultPredictor
 15 | from pycocotools import mask
 16 | from script_utils.common import common_setup
 17 | from tqdm import tqdm
 18 | 
 19 | from tao.utils.parallel.fixed_gpu_pool import FixedGpuPool
 20 | 
 21 | 
 22 | def init_model(init_args, context):
 23 |     os.environ['CUDA_VISIBLE_DEVICES'] = str(context['gpu'])
 24 |     context['predictor'] = DefaultPredictor(init_args['config'])
 25 | 
 26 | 
 27 | def infer(kwargs, context):
 28 |     predictor = context['predictor']
 29 |     image_path = kwargs['image_path']
 30 |     output_path = kwargs['output_path']
 31 |     img = read_image(str(image_path), format="BGR")
 32 | 
 33 |     predictions = predictor(img)
 34 |     predictions = predictions["instances"].get_fields()
 35 |     boxes_decoded = predictions["pred_boxes"].tensor.cpu().numpy().tolist()
 36 |     scores_decoded = predictions["scores"].cpu().numpy().tolist()
 37 |     classes_decoded = predictions["pred_classes"].cpu().numpy().tolist()
 38 |     masks_decoded = None
 39 |     if args.save_masks:
 40 |         masks_decoded = predictions["pred_masks"].cpu().numpy().astype(np.bool)
 41 |     save(boxes_decoded, scores_decoded, classes_decoded, masks_decoded,
 42 |          output_path)
 43 | 
 44 | 
 45 | def save(boxes_decoded, scores_decoded, classes_decoded, masks_decoded,
 46 |          results_path):
 47 |     predictions_decoded = {}
 48 |     predictions_decoded["instances"] = {
 49 |         "pred_boxes": boxes_decoded,
 50 |         "scores": scores_decoded,
 51 |         "pred_classes": classes_decoded,
 52 |     }
 53 |     if masks_decoded is not None:
 54 |         rles = mask.encode(
 55 |             np.array(masks_decoded.transpose((1, 2, 0)),
 56 |                      order='F',
 57 |                      dtype=np.uint8))
 58 |         for rle in rles:
 59 |             rle["counts"] = rle["counts"].decode("utf-8")
 60 |         predictions_decoded['instances']['pred_masks'] = rles
 61 |     with open(results_path, 'wb') as f:
 62 |         pickle.dump(predictions_decoded, f)
 63 | 
 64 | 
 65 | def setup_cfg(args):
 66 |     # load config from file and command-line arguments
 67 |     cfg = get_cfg()
 68 |     cfg.merge_from_file(args.config_file)
 69 |     cfg.merge_from_list(args.opts)
 70 |     if not args.save_masks:
 71 |         cfg.MODEL.MASK_ON = False
 72 |     cfg.freeze()
 73 |     return cfg
 74 | 
 75 | 
 76 | def get_parser():
 77 |     parser = argparse.ArgumentParser(description="Detectron2 Demo")
 78 |     parser.add_argument("--root", required=True, type=Path)
 79 |     parser.add_argument("--output",
 80 |                         required=True,
 81 |                         type=Path,
 82 |                         help="Directory to save output pickles.")
 83 |     parser.add_argument("--config-file",
 84 |                         required=True,
 85 |                         type=Path,
 86 |                         help="path to config file")
 87 |     parser.add_argument('--gpus', default=[0], nargs='+', type=int)
 88 |     parser.add_argument(
 89 |         "--opts",
 90 |         help="Modify model config options using the command-line",
 91 |         default=[],
 92 |         nargs=argparse.REMAINDER)
 93 |     parser.add_argument(
 94 |         '--save-masks', default=False, action='store_true')
 95 |     return parser
 96 | 
 97 | 
 98 | if __name__ == "__main__":
 99 |     args = get_parser().parse_args()
100 |     Path(args.output).mkdir(exist_ok=True, parents=True)
101 |     common_setup(__file__, args.output, args)
102 |     # Prevent detectron from flooding terminal with messages.
103 |     logging.getLogger('detectron2.checkpoint.c2_model_loading').setLevel(
104 |         logging.WARNING)
105 |     logging.getLogger('fvcore.common.checkpoint').setLevel(
106 |         logging.WARNING)
107 |     logger = logging.root
108 | 
109 |     cfg = setup_cfg(args)
110 | 
111 |     threads_per_worker = 4
112 |     torch.set_num_threads(threads_per_worker)
113 |     os.environ['OMP_NUM_THREADS'] = str(threads_per_worker)
114 | 
115 |     all_files = args.root.rglob('*.jpg')
116 | 
117 |     # Arguments to init_model()
118 |     init_args = {'config': cfg}
119 | 
120 |     # Tasks to pass to infer()
121 |     infer_tasks = []
122 |     for path in tqdm(all_files,
123 |                      mininterval=1,
124 |                      dynamic_ncols=True,
125 |                      desc='Collecting frames'):
126 |         relative = path.relative_to(args.root)
127 |         output_pkl = (args.output / relative).with_suffix('.pkl')
128 |         if output_pkl.exists():
129 |             continue
130 |         output_pkl.parent.mkdir(exist_ok=True, parents=True)
131 |         infer_tasks.append({'image_path': path, 'output_path': output_pkl})
132 | 
133 |     if len(args.gpus) == 1:
134 |         context = {'gpu': args.gpus[0]}
135 |         init_model(init_args, context)
136 |         for task in tqdm(infer_tasks,
137 |                          mininterval=1,
138 |                          desc='Running detector',
139 |                          dynamic_ncols=True):
140 |             infer(task, context)
141 |     else:
142 |         pool = FixedGpuPool(
143 |             args.gpus, initializer=init_model, initargs=init_args)
144 |         list(
145 |             tqdm(pool.imap_unordered(infer, infer_tasks),
146 |                  total=len(infer_tasks),
147 |                  mininterval=10,
148 |                  desc='Running detector',
149 |                  dynamic_ncols=True))
150 | 


--------------------------------------------------------------------------------
/tao/scripts/detectors/merge_coco_with_lvis.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import itertools
  3 | import json
  4 | import logging
  5 | from pathlib import Path
  6 | 
  7 | import numpy as np
  8 | from pycocotools.coco import COCO
  9 | import pycocotools.mask as mask_util
 10 | from script_utils.common import common_setup
 11 | from tqdm import tqdm
 12 | 
 13 | 
 14 | ROOT = Path(__file__).resolve().parent.parent.parent
 15 | 
 16 | 
 17 | def main():
 18 |     # Use first line of file docstring as description if it exists.
 19 |     parser = argparse.ArgumentParser(
 20 |         description=__doc__.split('\n')[0] if __doc__ else '',
 21 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 22 |     parser.add_argument('--lvis', type=Path, required=True)
 23 |     parser.add_argument('--coco', type=Path, required=True)
 24 |     parser.add_argument('--mapping',
 25 |                         type=Path,
 26 |                         default=ROOT / 'data/lvis_coco_to_synset.json')
 27 |     parser.add_argument('--output-json',
 28 |                         type=Path,
 29 |                         required=True)
 30 |     parser.add_argument(
 31 |         '--iou-thresh',
 32 |         default=0.7,
 33 |         type=float,
 34 |         help=('If a COCO annotation overlaps with an LVIS annotations with '
 35 |               'IoU over this threshold, we use only the LVIS annotation.'))
 36 | 
 37 |     args = parser.parse_args()
 38 |     args.output_json.parent.mkdir(exist_ok=True, parents=True)
 39 |     common_setup(args.output_json.name + '.log', args.output_json.parent, args)
 40 | 
 41 |     coco = COCO(args.coco)
 42 |     lvis = COCO(args.lvis)
 43 | 
 44 |     synset_to_lvis_id = {x['synset']: x['id'] for x in lvis.cats.values()}
 45 |     coco_to_lvis_category = {}
 46 |     with open(args.mapping, 'r') as f:
 47 |         name_mapping = json.load(f)
 48 |     for category in coco.cats.values():
 49 |         mapped = name_mapping[category['name']]
 50 |         assert mapped['coco_cat_id'] == category['id']
 51 |         synset = mapped['synset']
 52 |         if synset not in synset_to_lvis_id:
 53 |             logging.debug(
 54 |                 f'Found no LVIS category for "{category["name"]}" from COCO')
 55 |             continue
 56 |         coco_to_lvis_category[category['id']] = synset_to_lvis_id[synset]
 57 | 
 58 |     for image_id, image in coco.imgs.items():
 59 |         if image_id in lvis.imgs:
 60 |             coco_name = coco.imgs[image_id]['file_name']
 61 |             lvis_name = lvis.imgs[image_id]['file_name']
 62 |             assert coco_name in lvis_name
 63 |         else:
 64 |             logging.info(
 65 |                 f'Image {image_id} in COCO, but not annotated in LVIS')
 66 | 
 67 |     lvis_highest_id = max(x['id'] for x in lvis.anns.values())
 68 |     ann_id_generator = itertools.count(lvis_highest_id + 1)
 69 |     new_annotations = []
 70 |     for image_id, lvis_anns in tqdm(lvis.imgToAnns.items()):
 71 |         if image_id not in coco.imgToAnns:
 72 |             logging.info(
 73 |                 f'Image {image_id} in LVIS, but not annotated in COCO')
 74 |             continue
 75 | 
 76 |         coco_anns = coco.imgToAnns[image_id]
 77 |         # Compute IoU between coco_anns and lvis_anns
 78 |         # Shape (num_coco_anns, num_lvis_anns)
 79 |         mask_iou = mask_util.iou([coco.annToRLE(x) for x in coco_anns],
 80 |                                  [lvis.annToRLE(x) for x in lvis_anns],
 81 |                                  pyiscrowd=np.zeros(len(lvis_anns)))
 82 |         does_overlap = mask_iou.max(axis=1) > args.iou_thresh
 83 |         to_add = []
 84 |         for i, ann in enumerate(coco_anns):
 85 |             if does_overlap[i]:
 86 |                 continue
 87 |             if ann['category_id'] not in coco_to_lvis_category:
 88 |                 continue
 89 |             ann['category_id'] = coco_to_lvis_category[ann['category_id']]
 90 |             ann['id'] = next(ann_id_generator)
 91 |             to_add.append(ann)
 92 |         new_annotations.extend(to_add)
 93 | 
 94 |     with open(args.lvis, 'r') as f:
 95 |         merged = json.load(f)
 96 |     merged['annotations'].extend(new_annotations)
 97 |     with open(args.output_json, 'w') as f:
 98 |         json.dump(merged, f)
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     main()
103 | 


--------------------------------------------------------------------------------
/tao/scripts/download/download_annotations.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import urllib.error
 3 | import urllib.request
 4 | from pathlib import Path
 5 | 
 6 | import subprocess
 7 | 
 8 | ANNOTATIONS_TAR_GZ = 'https://github.com/TAO-Dataset/annotations/archive/v1.1.tar.gz'
 9 | 
10 | 
11 | def banner_log(msg):
12 |     banner = '#' * len(msg)
13 |     print(f'\n{banner}\n{msg}\n{banner}')
14 | 
15 | 
16 | def log_and_run(cmd, *args, **kwargs):
17 |     print(f'Running command:\n{" ".join(cmd)}')
18 |     subprocess.run(cmd, *args, **kwargs)
19 | 
20 | 
21 | def main():
22 |     # Use first line of file docstring as description if it exists.
23 |     parser = argparse.ArgumentParser(
24 |         description=__doc__.split('\n')[0] if __doc__ else '',
25 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
26 |     parser.add_argument('tao_root', type=Path)
27 |     parser.add_argument('--split',
28 |                         required=True,
29 |                         choices=['train', 'val', 'test'])
30 | 
31 |     args = parser.parse_args()
32 | 
33 |     assert args.tao_root.exists(), (
34 |         f'TAO_ROOT does not exist at {args.tao_root}')
35 | 
36 |     annotations_dir = args.tao_root / 'annotations'
37 |     if annotations_dir.exists():
38 |         print(f'Annotations directory already exists; skipping.')
39 |     else:
40 |         annotations_compressed = args.tao_root / 'annotations.tar.gz'
41 |         if not annotations_compressed.exists():
42 |             banner_log('Downloading annotations')
43 |             try:
44 |                 urllib.request.urlretrieve(ANNOTATIONS_TAR_GZ,
45 |                                            annotations_compressed)
46 |             except urllib.error.HTTPError as e:
47 |                 if e.code == 404:
48 |                     print(f'Unable to download annotations.tar.gz. Please '
49 |                           f'download it manually from\n'
50 |                           f'{ANNOTATIONS_TAR_GZ}\n'
51 |                           f'and save it to {args.tao_root}.')
52 |                     return
53 |                 raise
54 |         banner_log('Extracting annotations')
55 |         log_and_run([
56 |             'tar', 'xzvf',
57 |             str(annotations_compressed), '-C',
58 |             str(args.tao_root)
59 |         ])
60 |         (args.tao_root / 'annotations-1.1').rename(annotations_dir)
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     main()
65 | 


--------------------------------------------------------------------------------
/tao/scripts/download/download_cfg.yaml:
--------------------------------------------------------------------------------
 1 | TAO_ANNOTATIONS:
 2 |   TRAIN: /data/achald/track_dataset/annotations/scale/4-18/tao-format/train_federated_lvis.json
 3 |   VAL: /data/achald/track_dataset/annotations/scale/4-18/tao-format/validation_federated_lvis.json
 4 | CHECKSUMS:
 5 |   VERIFY: True
 6 |   PATH: /data/achald/track_dataset/annotations/scale/4-18/tao-format/with_test_unfederated/checksums.json
 7 | AVA:
 8 |   MOVIES:
 9 |     # Contains symlinks to /data/all/AVA/data
10 |     DIR: /scratch/achald/tao/release/ava/ 
11 | LASOT:
12 |   DATASET_ROOT: /ssd1/achald/lasot
13 |   CREATE_SYMLINKS: True
14 | CHARADES:
15 |   VIDEOS_DIR: /data/all/Charades/Charades_v1/videos
16 | BDD:
17 |   VIDEOS_DIR: /data/achald/track_dataset/bdd/val/videos/val_00/


--------------------------------------------------------------------------------
/tao/scripts/download/download_helper.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import urllib.error
 3 | import urllib.request
 4 | from pathlib import Path
 5 | 
 6 | import subprocess
 7 | 
 8 | # ANNOTATIONS_TAR_GZ = 'https://github.com/TAO-Dataset/annotations/archive/v1.0.tar.gz'
 9 | # Temporary URL while in beta.
10 | ANNOTATIONS_TAR_GZ = 'https://achal-public.s3.amazonaws.com/release-beta/annotations/annotations.tar.gz'
11 | 
12 | 
13 | def banner_log(msg):
14 |     banner = '#' * len(msg)
15 |     print(f'\n{banner}\n{msg}\n{banner}')
16 | 
17 | 
18 | def log_and_run(cmd, *args, **kwargs):
19 |     print(f'Running command:\n{" ".join(cmd)}')
20 |     subprocess.run(cmd, *args, **kwargs)
21 | 
22 | 
23 | def main():
24 |     # Use first line of file docstring as description if it exists.
25 |     parser = argparse.ArgumentParser(
26 |         description=__doc__.split('\n')[0] if __doc__ else '',
27 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
28 |     parser.add_argument('tao_root', type=Path)
29 |     parser.add_argument('--split',
30 |                         required=True,
31 |                         choices=['train', 'val', 'test'])
32 | 
33 |     args = parser.parse_args()
34 | 
35 |     assert args.tao_root.exists(), (
36 |         f'TAO_ROOT does not exist at {args.tao_root}')
37 | 
38 |     annotations_dir = args.tao_root / 'annotations'
39 |     if annotations_dir.exists():
40 |         print(f'Annotations directory already exists; skipping.')
41 |     else:
42 |         annotations_compressed = args.tao_root / 'annotations.tar.gz'
43 |         if not annotations_compressed.exists():
44 |             banner_log('Downloading annotations')
45 |             try:
46 |                 urllib.request.urlretrieve(ANNOTATIONS_TAR_GZ,
47 |                                            annotations_compressed)
48 |             except urllib.error.HTTPError as e:
49 |                 if e.code == 404:
50 |                     print(f'Unable to download annotations.tar.gz. Please '
51 |                           f'download it manually from\n'
52 |                           f'{ANNOTATIONS_TAR_GZ}\n'
53 |                           f'and save it to {args.tao_root}.')
54 |                     return
55 |                 raise
56 |         banner_log('Extracting annotations')
57 |         log_and_run([
58 |             'tar', 'xzvf',
59 |             str(annotations_compressed), '-C',
60 |             str(args.tao_root)
61 |         ])
62 |         (args.tao_root / 'annotations-1.0').rename(annotations_dir)
63 | 
64 |     banner_log("Extracting BDD, Charades, HACS, and YFCC frames")
65 |     log_and_run([
66 |         'python', 'scripts/download/extract_frames.py',
67 |         str(args.tao_root), '--split', args.split
68 |     ])
69 | 
70 |     banner_log("Downloading AVA videos")
71 |     log_and_run([
72 |         'python', 'scripts/download/download_ava.py',
73 |         str(args.tao_root), '--split', args.split
74 |     ])
75 | 
76 |     banner_log("Verifying TAO frames")
77 |     log_and_run([
78 |         'python', 'scripts/download/verify.py',
79 |         str(args.tao_root), '--split', args.split
80 |     ])
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     main()
85 | 


--------------------------------------------------------------------------------
/tao/scripts/download/extract_frames.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import logging
 4 | from collections import defaultdict
 5 | from pathlib import Path
 6 | 
 7 | from script_utils.common import common_setup
 8 | 
 9 | from tao.utils.download import (
10 |     are_tao_frames_dumped, dump_tao_frames, remove_non_tao_frames)
11 | 
12 | 
13 | def main():
14 |     # Use first line of file docstring as description if it exists.
15 |     parser = argparse.ArgumentParser(
16 |         description=__doc__.split('\n')[0] if __doc__ else '',
17 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
18 |     parser.add_argument('root', type=Path)
19 |     parser.add_argument('--split',
20 |                         required=True,
21 |                         choices=['train', 'val', 'test'])
22 |     parser.add_argument('--sources',
23 |                         default=['BDD', 'HACS', 'Charades', 'YFCC100M'],
24 |                         choices=['BDD', 'HACS', 'Charades', 'YFCC100M'])
25 |     parser.add_argument('--workers', default=8, type=int)
26 | 
27 |     args = parser.parse_args()
28 |     log_dir = args.root / 'logs'
29 |     log_dir.mkdir(exist_ok=True, parents=True)
30 |     common_setup(__file__, log_dir, args)
31 | 
32 |     ann_path = args.root / f'annotations/{args.split}.json'
33 |     with open(ann_path, 'r') as f:
34 |         tao = json.load(f)
35 | 
36 |     checksums_path = (
37 |         args.root / f'annotations/checksums/{args.split}_checksums.json')
38 |     with open(checksums_path, 'r') as f:
39 |         checksums = json.load(f)
40 | 
41 |     videos_by_dataset = defaultdict(list)
42 |     for video in tao['videos']:
43 |         videos_by_dataset[video['metadata']['dataset']].append(video)
44 | 
45 |     videos_dir = args.root / 'videos'
46 |     frames_dir = args.root / 'frames'
47 |     for dataset in args.sources:
48 |         # Collect list of videos
49 |         ext = '.mov' if dataset == 'BDD' else '.mp4'
50 |         videos = videos_by_dataset[dataset]
51 |         video_paths = [
52 |             videos_dir / f"{video['name']}{ext}" for video in videos
53 |         ]
54 |         output_frame_dirs = [frames_dir / video['name'] for video in videos]
55 | 
56 |         # List of (video, video path, frame directory) tuples
57 |         to_dump = []
58 |         for video, video_path, frame_dir in zip(videos, video_paths,
59 |                                                 output_frame_dirs):
60 |             if not video_path.exists():
61 |                 raise ValueError(f'Could not find video at {video_path}')
62 |             video_checksums = checksums[video['name']]
63 |             if frame_dir.exists() and are_tao_frames_dumped(
64 |                     frame_dir, video_checksums, warn=False):
65 |                 continue
66 |             to_dump.append((video, video_path, frame_dir))
67 | 
68 |         # Dump frames from each video
69 |         logging.info(f'{dataset}: Extracting frames')
70 |         dump_tao_frames([x[1] for x in to_dump], [x[2] for x in to_dump],
71 |                         workers=args.workers)
72 | 
73 |         to_dump = []
74 |         for video, video_path, frame_dir in zip(videos, video_paths,
75 |                                                 output_frame_dirs):
76 |             video_checksums = checksums[video['name']]
77 |             # Remove frames not used for TAO.
78 |             remove_non_tao_frames(frame_dir, set(video_checksums.keys()))
79 |             # Compare checksums for frames
80 |             assert are_tao_frames_dumped(frame_dir, video_checksums), (
81 |                 f'Not all TAO frames for {video["name"]} were extracted.')
82 | 
83 |         logging.info(
84 |             f'{dataset}: Removing non-TAO frames, verifying extraction')
85 |         logging.info(f'{dataset}: Successfully extracted!')
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     main()
90 | 


--------------------------------------------------------------------------------
/tao/scripts/download/gen_checksums.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | from collections import defaultdict
 4 | from hashlib import md5
 5 | from pathlib import Path
 6 | 
 7 | from tqdm import tqdm
 8 | from script_utils.common import common_setup
 9 | 
10 | from tao.utils import fs
11 | 
12 | 
13 | def main():
14 |     # Use first line of file docstring as description if it exists.
15 |     parser = argparse.ArgumentParser(
16 |         description=__doc__.split('\n')[0] if __doc__ else '',
17 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
18 |     parser.add_argument('--frames-dir', type=Path, required=True)
19 |     parser.add_argument('--output-json', type=Path, required=True)
20 |     parser.add_argument('--tao-annotations', type=Path, required=True)
21 | 
22 |     args = parser.parse_args()
23 |     output_dir = args.output_json.parent
24 |     output_dir.mkdir(exist_ok=True, parents=True)
25 |     common_setup(args.output_json.name, output_dir, args)
26 | 
27 |     with open(args.tao_annotations, 'r') as f:
28 |         tao = json.load(f)
29 |         videos = [x['name'] for x in tao['videos']]
30 | 
31 |     labeled_frames = defaultdict(set)
32 |     for frame in tao['images']:
33 |         video, frame_name = frame['file_name'].rsplit('/', 1)
34 |         labeled_frames[video].add(frame_name)
35 | 
36 |     # videos = videos[:10]
37 |     hashes = {}
38 |     for video in tqdm(videos):
39 |         frames = fs.glob_ext(args.frames_dir / video, ('.jpg', '.jpeg'))
40 |         hashes[video] = {}
41 |         for i, frame in tqdm(enumerate(frames)):
42 |             if frame.name in labeled_frames[video]:
43 |                 with open(frame, 'rb') as f:
44 |                     hashes[video][frame.name] = md5(f.read()).hexdigest()
45 |             else:
46 |                 hashes[video][frame.name] = ''
47 |         if all(x == '' for x in hashes[video].values()):
48 |             raise ValueError(f'Did not find any labeled frames for {video}')
49 | 
50 |     with open(args.output_json, 'w') as f:
51 |         json.dump(hashes, f)
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     main()
56 | 


--------------------------------------------------------------------------------
/tao/scripts/download/meta/ava_file_names_test_v2.1.txt:
--------------------------------------------------------------------------------
  1 | --205wugM18.mkv
  2 | -APF0-L14kw.mkv
  3 | -FLn0aeA6EU.mkv
  4 | 0OLtK6SeTwo.mp4
  5 | 1R7n8B8KkZE.mkv
  6 | 1XZZnWMP4CU.mkv
  7 | 2eTGj8zPykM.mkv
  8 | 3-ivkPTSTSw.mp4
  9 | 30Qkf0pq-PY.mkv
 10 | 55R6Ng9w65o.mkv
 11 | 6IebItD0ETQ.mkv
 12 | 72MzYjWz_7g.mkv
 13 | 7QstV153hbA.mkv
 14 | 7SGCpWCNN84.mp4
 15 | 7oY-kE-goOA.mkv
 16 | 8FYx0LtfPTE.mkv
 17 | 8oL0i5WorkE.mp4
 18 | 9f8r96-it6c.mkv
 19 | A8SUe2Yqn60.mkv
 20 | A9WSiEDeu0I.mkv
 21 | AwlY-zteegM.mkv
 22 | BD3zaLKhkV4.mkv
 23 | BLDTynQwGRI.mkv
 24 | BU98nWUtT5E.mkv
 25 | BV1VreCWZ64.mkv
 26 | BnIFkfDhJ2w.mkv
 27 | DaUzhc9_6io.mp4
 28 | E-6ruyZFfZs.mkv
 29 | E-fqjlYMFhE.mp4
 30 | EHMP5-9KUdI.mp4
 31 | EO1gLAoEZRA.mp4
 32 | FONjBIXaM-0.mp4
 33 | G0gDuIVKiXg.mkv
 34 | GElolK2jG50.mkv
 35 | GQxKfbvL3mg.mkv
 36 | Gsm_ZBStr0s.mp4
 37 | HPd4eMvs1Kg.mp4
 38 | HeKz7BELAQc.mkv
 39 | HtXWX0LnifY.mp4
 40 | IC5M1EhJNfI.webm
 41 | IIyYHprTP58.webm
 42 | Ic0LMbDyc9Y.mkv
 43 | JiBiCiK9HjY.mp4
 44 | K-tICG1ek-E.mp4
 45 | Ke8b1_yiUVQ.mkv
 46 | KkAf75yOKqs.mkv
 47 | KrMSZUQJlNM.mkv
 48 | LO964EmiVfo.mkv
 49 | Mz0FKktvMLY.mkv
 50 | NUwem2aZa0Y.mp4
 51 | O5y8zKl9X2E.mp4
 52 | O8xkUcUJPNo.mkv
 53 | OEUMcSba9t0.mp4
 54 | OL_Wwo5W1Zs.mp4
 55 | OQxN4ksema0.mkv
 56 | P5EhajqkqPw.mkv
 57 | QTf_v67C5KI.mp4
 58 | Qes4a8HuyEc.mkv
 59 | RCNuAys0Hsg.mkv
 60 | RW-H3fN_79I.mp4
 61 | Scg5LeZszCc.mkv
 62 | Sntyb4omSfU.mkv
 63 | SoNhz0WJZsI.mkv
 64 | Uw7387tc9PU.mp4
 65 | V6RX59GT-3k.mkv
 66 | VNZ8JDb8sks.mkv
 67 | ViY7CR2TSO8.mkv
 68 | W8TFzEy0gp0.mkv
 69 | WMFTBgYWJS8.mkv
 70 | Wgytpy6TeUA.mp4
 71 | WhkON_S-pQc.mp4
 72 | XOe9GeojzCs.mp4
 73 | YAAUPjq-L-Q.mp4
 74 | Z0FEElATNjk.mkv
 75 | Z42lnoj2n08.mkv
 76 | ZS2C28fDC9U.mp4
 77 | ZbeMNLwASVo.mkv
 78 | ZsgPK0XGYoM.mp4
 79 | Zu4iQJrlpo0.mkv
 80 | _kbrVsCaaPo.mp4
 81 | _vy57h5Oeys.mkv
 82 | aDfOtlsdoWw.mkv
 83 | bNP8Q_8u89A.webm
 84 | bUVls-bf0jM.mkv
 85 | bzGQK5lH-RA.mkv
 86 | c5mlhcFYYZs.mp4
 87 | cYt6NaQgcEk.mp4
 88 | cqkChR44vkA.mkv
 89 | fT_WjgJ_-r0.mkv
 90 | gEI9qBdVt5I.mp4
 91 | h7Atb503JwY.webm
 92 | hgmK4Epb02E.mkv
 93 | i9cuy3teV0w.mkv
 94 | ipBRBABLSAk.mkv
 95 | jKKXDh4lYd0.mkv
 96 | kW5WyJ1QNpM.mkv
 97 | keUOiCcHtoQ.mkv
 98 | kvFlbTK812w.mkv
 99 | l8_Mk3-sZsQ.mkv
100 | nAg_NVzLoAY.mkv
101 | nRzhjXMIXt4.mkv
102 | o-ZcbjLBtls.mkv
103 | ohn_RxyaCy4.mp4
104 | pSE4Dlork1Y.mp4
105 | pSdPmmJ3-ng.mp4
106 | rJibAAUEMDY.mkv
107 | rRL0Ce8e-RY.mkv
108 | rTCch_5JlkA.mp4
109 | s2z5UASlrP8.mkv
110 | sV3zZROy0uc.mkv
111 | tDF-BqFfF78.mkv
112 | tj-VmrMYtUI.mp4
113 | u97DLHpcw7c.mkv
114 | vL7N_xRJKJU.mp4
115 | vsMgg4snZzM.mkv
116 | w-jIrlwuv2Y.mkv
117 | wamBSoyRtbs.mkv
118 | woC9Vfbn74I.mkv
119 | xH1WLtZ8csM.mp4
120 | xJpDPrwLJh4.mkv
121 | xT2ogY6xEsI.mp4
122 | xYUx0drhUNk.mkv
123 | xauSNGP5yA0.mkv
124 | xdDTWBRWPLQ.mkv
125 | y4lBI_gFnqI.mkv
126 | y5o8w0FRj98.mkv
127 | yQdi5Ke4dNY.mkv
128 | yRRZkwtJCwU.mkv
129 | z5lg_3abT-s.mkv
130 | zm78XnWN7MU.mkv
131 | zvxnOrzTg0M.mp4
132 | 


--------------------------------------------------------------------------------
/tao/scripts/download/meta/ava_file_names_trainval_v2.1.txt:
--------------------------------------------------------------------------------
  1 | _-Z6wFjXtGQ.mkv
  2 | _145Aa_xkuE.mp4
  3 | _7oWZq_s_Sk.mkv
  4 | _a9SWtcaNj8.mkv
  5 | _Ca3gOdOHxU.mp4
  6 | _dBTTYDRdRQ.webm
  7 | _eBah6c5kyA.mkv
  8 | _ithRWANKB0.mp4
  9 | _mAfwH6i90E.mkv
 10 | -5KQ66BBWC4.mkv
 11 | -FaXLcSFjUI.mp4
 12 | -IELREHX_js.mp4
 13 | -OyDO1g74vc.mp4
 14 | -XpUuIgyUHE.mp4
 15 | -ZFgsrolSxo.mkv
 16 | 053oq2xB3oU.mkv
 17 | 0f39OWEqJ24.mp4
 18 | 0wBYFahr3uI.mp4
 19 | 1j20qq1JyX4.mp4
 20 | 1ReZIMmD_8E.mp4
 21 | 26V9UzqSguo.mp4
 22 | 2bxKkUgcqpk.mp4
 23 | 2DUITARAsWQ.mp4
 24 | 2E_e8JlvTlg.mkv
 25 | 2FIHxnZKg6A.webm
 26 | 2fwni_Kjf2M.mkv
 27 | 2KpThOF_QmE.mkv
 28 | 2PpxiG0WU18.mkv
 29 | 2qQs3Y9OJX0.mkv
 30 | 3_VjIRdXVdM.mkv
 31 | 32HR3MnDZ8g.mp4
 32 | 3IOE-Q3UWdA.mp4
 33 | 4gVsDd8PV9U.mp4
 34 | 4k-rTF3oZKw.mp4
 35 | 4Y5qi1gD2Sw.mkv
 36 | 4ZpjKfu6Cl8.mkv
 37 | 55Ihr6uVIDA.mkv
 38 | 5BDj0ow5hnA.mp4
 39 | 5LrOQEt_XVM.mp4
 40 | 5milLu-6bWI.mp4
 41 | 5MxjqHfkWFI.mkv
 42 | 5YPjcdLbs5g.mkv
 43 | 6d5u6FHvz7Q.mkv
 44 | 7g37N3eoQ9s.mkv
 45 | 7nHkh4sP5Ks.mkv
 46 | 7T5G0CmwTPo.mkv
 47 | 7YpF6DntOYw.mkv
 48 | 8aMv-ZGD4ic.mkv
 49 | 8JSxLhDMGtE.mkv
 50 | 8nO5FFbIAog.webm
 51 | 8VZEwOCQ8bc.mkv
 52 | 914yZXz-iRs.mkv
 53 | 9bK05eBt1GM.mp4
 54 | 9eAOr_ttXp0.mkv
 55 | 9F2voT6QWvQ.mkv
 56 | 9HOMUW7QNFc.mkv
 57 | 9IF8uTRrWAM.mkv
 58 | 9mLYmkonWZQ.mkv
 59 | 9QbzS8bZXFE.mkv
 60 | 9Rcxr3IEX4E.mkv
 61 | 9tyiDEYiWiA.mkv
 62 | 9Y_l9NsnYE0.mp4
 63 | aDEYi1OG0vU.mkv
 64 | Ag-pXiLrd48.mp4
 65 | aMYcLyh9OhU.mkv
 66 | AN07xQokfiE.mp4
 67 | aRbLw-dU2XY.mp4
 68 | ax3q-RkVIt4.mp4
 69 | ayAMdYfJJLk.mkv
 70 | AYebXQ8eUkM.mkv
 71 | b-YoBU0XT90.mp4
 72 | B1MAUxpKaV8.mkv
 73 | b50s4AlOOKY.mkv
 74 | b5pRYl_djbs.mp4
 75 | bAVXp1oGjHA.mkv
 76 | BCiuXAuCKAU.mp4
 77 | bePts02nIY8.mkv
 78 | bhlFavrh7WU.mkv
 79 | bSZiZ4rOC7c.mkv
 80 | BXCh3r-pPAM.mkv
 81 | BY3sZmvUp-0.mp4
 82 | C25wkwAMB-w.mkv
 83 | C3qk4yAMANk.mkv
 84 | c9pEMjPT16M.webm
 85 | cc4y-yYm5Ao.mkv
 86 | CG98XdYsgrA.mkv
 87 | cKA-qeZuH_w.mkv
 88 | cLiJgvrDlWw.mp4
 89 | CMCPhm2L400.mkv
 90 | covMYDBa5dk.mp4
 91 | CrlfWnsS7ac.mkv
 92 | cWYJHb25EVs.mp4
 93 | CZ2NP8UsPuE.mkv
 94 | D-BJTU6NxZ8.mkv
 95 | D8Vhxbho1fY.mp4
 96 | Db19rWN5BGo.mkv
 97 | dgLApPvmfBE.mkv
 98 | Di1MG6auDYo.mkv
 99 | dMH8L7mqCNI.mkv
100 | E2jecoyAx1M.mkv
101 | E7JcKooKVsM.mp4
102 | eA55_shhKko.mkv
103 | Ecivp8t3MdY.mkv
104 | Ekwy7wzLfjc.mkv
105 | er7eeiJB6dI.mkv
106 | F3dPH6Xqf5M.mp4
107 | fD6VkIRlIRI.mkv
108 | Feu1_8NazPE.mp4
109 | fGgnNCbXZ20.mp4
110 | fNcxxBjEOgw.mkv
111 | fpprSy6AzKk.mkv
112 | fZs-yXm-uUs.mp4
113 | g1wyIcLPbq0.mp4
114 | G4qq1MRXCiY.mkv
115 | G5Yr20A5z_Q.mkv
116 | GBXK_SyfisM.mkv
117 | Gfdg_GcaNe8.mkv
118 | gjasEUDkbuc.mkv
119 | gjdgj04FzR0.mp4
120 | GozLjpMNADg.mkv
121 | gqmmpoO1JrY.mkv
122 | Gt61_Yekkgc.mp4
123 | Gvp-cj3bmIY.webm
124 | hbYvDvJrpNk.mp4
125 | hHgg9WI8dTk.mkv
126 | Hi8QeP_VPu0.mkv
127 | HJzgJ9ZjvJk.mkv
128 | HKjR70GCRPE.mp4
129 | Hscyg0vLKc8.mp4
130 | HTYT2vF-j_w.mkv
131 | HV0H6oc4Kvs.mkv
132 | HVAmkvLrthQ.mkv
133 | HymKCzQJbB8.mkv
134 | I8j6Xq2B5ys.mp4
135 | Ie35yEssHko.mkv
136 | IKdBLciu_-A.mp4
137 | iSlDMboCSao.mkv
138 | IuPC-z-M9u8.mkv
139 | IzvOYVMltkI.mp4
140 | J1jDc2rTJlg.mkv
141 | j35JnR0Q7Es.mp4
142 | J4bt4y9ShTA.mkv
143 | j5jmjhGBW44.mkv
144 | jBs_XYHI7gM.mkv
145 | jE0S8gYWftE.webm
146 | jgAwJ0RqmYg.mp4
147 | jI0HIlSsa3s.mkv
148 | JNb4nWexD0I.mkv
149 | jqZpiHlJUig.mkv
150 | K_SpqDJnlps.mkv
151 | kAsz-76DTDE.mkv
152 | Kb1fduj-jdY.mp4
153 | KHHgQ_Pe4cI.mkv
154 | KIy2a-nejxg.mp4
155 | kLDpP9QEVBs.mp4
156 | kMy-6RtoOVU.mkv
157 | kplbKz3_fZk.mkv
158 | Ksd1JQFHYWA.mp4
159 | KVq6If6ozMY.mkv
160 | KWoSGtglCms.mkv
161 | l-jxh8gpxuY.mkv
162 | l2XO3tQk8lI.mkv
163 | lDmLcWWBp1E.mkv
164 | Lg1jOu8cUBM.mkv
165 | LIavUJVrXaI.mkv
166 | LrDT25hmApw.mkv
167 | lT1zdTL-3SM.mkv
168 | lWXhqIAvarw.mkv
169 | M6cgEs9JgDo.mkv
170 | Ma2hgTmveKQ.mkv
171 | mfsbYdLx9wE.mkv
172 | miB-wo2PfLI.mkv
173 | mkcDANJjDcM.mkv
174 | N0Dt9i9IUNg.mkv
175 | N1K2bEZLL_A.mkv
176 | N5UD8FGzDek.mkv
177 | N7baJsMszJ0.mkv
178 | NEQ7Wpf-EtI.mkv
179 | nlinqZPgvVk.mkv
180 | NO2esmws190.mkv
181 | O_NYCUhZ9zw.mp4
182 | o4xQ-BEa3Ss.mkv
183 | O5m_0Yay4EU.mkv
184 | oD_wxyTHJ2I.mp4
185 | OfMdakd4bHI.mkv
186 | OGNnUvJq9RI.mkv
187 | oifTDWZvOhY.mkv
188 | oITFHwzfw_k.mkv
189 | om_83F5VwTQ.mp4
190 | oq_bufAhyl8.mkv
191 | Ov0za6Xb1LM.mkv
192 | oWhvucAskhk.mkv
193 | P60OxWahxBQ.mkv
194 | P90hF2S1JzA.mkv
195 | PcFEhUKhN6g.mkv
196 | pGP_oIdKmRY.mkv
197 | phrYEKv0rmw.mkv
198 | phVLLTMzmKk.mkv
199 | pieVIsGmLsc.mkv
200 | piYxcrMxVPw.mkv
201 | plkJ45_-pMk.mp4
202 | PmElx9ZVByw.mp4
203 | PNZQ2UJfyQE.mp4
204 | QaIMUi-elFo.mkv
205 | qBUu7cy-5Iw.mp4
206 | QCLQYnt3aMo.webm
207 | QD3L10bUnBo.mkv
208 | QJzocCGLdHU.mp4
209 | QMwT7DFA5O4.mkv
210 | QotkBTEePI8.mkv
211 | qpoWHELxL-4.mp4
212 | qrkff49p4E4.mp4
213 | qsTqtWVVSLM.mkv
214 | QTmwhrVal1g.mkv
215 | qx2vAO5ofmo.mp4
216 | r2llOyS-BmE.mkv
217 | rCb9-U4TArw.mp4
218 | rFgb2ECMcrY.mkv
219 | ri4P2enZT9o.mkv
220 | Riu4ZKk4YdQ.webm
221 | rJKeqfTlAeY.mkv
222 | rk8Xm0EAOWs.mkv
223 | Rm518TUhbRY.mkv
224 | rUYsoIIE37A.mp4
225 | rXFlJbXyZyc.mkv
226 | S0tkhGJjwLA.mkv
227 | sADELCyj10I.mkv
228 | SCh-ZImnyyk.mp4
229 | SHBMiL5f_3Q.mkv
230 | skiZueh4lfY.mkv
231 | sNQJfYvhcPk.mp4
232 | sUVhd0YTKgw.mkv
233 | T-Fc9ctuNVI.mkv
234 | t0V4drbYDnc.mkv
235 | t1LXrJOvPDg.mkv
236 | T26G6_AjJZ4.mkv
237 | TcB0IFBwk-k.mkv
238 | TCmNvNLRWrc.mkv
239 | tEoJW9ycmSY.mkv
240 | TEQ9sAj-DPo.mp4
241 | tghXjom3120.mkv
242 | tjqCzVjojCo.mkv
243 | TM5MPJIq1Is.mkv
244 | tNpZtigMc4g.mkv
245 | tt0t_a1EDCE.mkv
246 | TzaVHtLXOzY.mkv
247 | U_WzY2k8IBM.mkv
248 | u1ltv6r14KQ.mkv
249 | UgZFdrNT6W0.mkv
250 | uNT6HrrnqPU.webm
251 | UOfuzrwkclM.mkv
252 | UOyyTUX5Vo4.mkv
253 | uq_HBsvP548.mkv
254 | UrsCy6qIGoo.mkv
255 | UsLnxI_zGpY.mkv
256 | uwW0ejeosmk.mkv
257 | uzPI7FcF79U.mkv
258 | v0L-WkMO3s4.mp4
259 | vBbjA4tWCPg.mp4
260 | vfjywN5CN0Y.mkv
261 | Vmef_8MY46w.mkv
262 | VRlpH1MbWUw.mp4
263 | VsYPP2I0aUQ.mkv
264 | wEAeql4z1O0.mp4
265 | wfEOx36N4jA.mp4
266 | WKqbLbU68wU.mkv
267 | WlgxRNCHQzw.mkv
268 | wogRuPNBUi8.mp4
269 | wONG7Vh87B4.mkv
270 | WSPvfxtqisg.mkv
271 | WVde9pyaHg4.mkv
272 | x-6CtPWVi6E.mkv
273 | X5wWhZ2r9kc.mp4
274 | xeGWXqSvC-8.webm
275 | XF87VL5T0aA.mkv
276 | XglAvHaEtHY.mp4
277 | xJmRNZVDDCY.mkv
278 | xmqSaQPzL1E.mkv
279 | xO4ABy2iOQA.mp4
280 | xp67EC-Hvwk.mkv
281 | XpGRS72ghag.mkv
282 | XV_FF3WC7kA.mkv
283 | y7ncweROe9U.mkv
284 | yMtGmGa8KZ0.mkv
285 | yn9WN9lsHRE.mkv
286 | yo-Kg2YxlZs.mkv
287 | yqImJuC5UzI.mp4
288 | Ytga8ciKWJc.mkv
289 | yvgCGJ6vfkY.mkv
290 | YYWdB7h1INo.mkv
291 | z-fsLpGHq6o.mkv
292 | Z1YV6wB037M.mkv
293 | z3kgrh0L_80.mkv
294 | zC5Fh2tTS1U.mp4
295 | zG7mx8KiavA.mp4
296 | zlVkeKC6Ha8.mp4
297 | ZosVdkY76FU.mkv
298 | zR725veL-DI.mkv
299 | ZxQn8HVmXsY.mkv
300 | 


--------------------------------------------------------------------------------
/tao/scripts/download/verify.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import logging
 4 | from collections import defaultdict
 5 | from pathlib import Path
 6 | 
 7 | from script_utils.common import common_setup
 8 | from tqdm import tqdm
 9 | 
10 | from tao.utils.download import are_tao_frames_dumped
11 | 
12 | 
13 | def main():
14 |     # Use first line of file docstring as description if it exists.
15 |     parser = argparse.ArgumentParser(
16 |         description=__doc__.split('\n')[0] if __doc__ else '',
17 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
18 |     parser.add_argument('root', type=Path)
19 |     parser.add_argument('--split',
20 |                         required=True,
21 |                         choices=['train', 'val', 'test'])
22 | 
23 |     args = parser.parse_args()
24 |     log_dir = args.root / 'logs'
25 |     log_dir.mkdir(exist_ok=True, parents=True)
26 |     common_setup(__file__, log_dir, args)
27 | 
28 |     ann_path = args.root / f'annotations/{args.split}.json'
29 |     with open(ann_path, 'r') as f:
30 |         tao = json.load(f)
31 | 
32 |     checksums_path = (
33 |         args.root / f'annotations/checksums/{args.split}_checksums.json')
34 |     with open(checksums_path, 'r') as f:
35 |         checksums = json.load(f)
36 | 
37 |     videos_by_dataset = defaultdict(list)
38 |     for video in tao['videos']:
39 |         videos_by_dataset[video['metadata']['dataset']].append(video)
40 | 
41 |     status = {}
42 |     for dataset, videos in sorted(videos_by_dataset.items()):
43 |         status[dataset] = True
44 |         for video in tqdm(videos, desc=f'Verifying {dataset}'):
45 |             name = video['name']
46 |             frame_dir = args.root / 'frames' / name
47 |             if not are_tao_frames_dumped(
48 |                     frame_dir, checksums[name], warn=True, allow_extra=False):
49 |                 logging.warning(
50 |                     f'Frames for {name} are not extracted properly. '
51 |                     f'Skipping rest of dataset.')
52 |                 status[dataset] = False
53 |                 break
54 | 
55 |     success = []
56 |     for dataset in sorted([d for d, v in status.items() if v]):
57 |         success.append(f'{dataset: <12}: Verified ✓✓✓')
58 | 
59 |     failure = []
60 |     for dataset in sorted([d for d, v in status.items() if not v]):
61 |         failure.append(f'{dataset: <12}: FAILED 𐄂𐄂𐄂')
62 | 
63 |     if success:
64 |         logging.info('Success!\n' + ('\n'.join(success)))
65 |     if failure:
66 |         logging.warning('Some datasets were not properly extracted!\n' +
67 |                         ('\n'.join(failure)))
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     main()
72 | 


--------------------------------------------------------------------------------
/tao/scripts/evaluation/configs/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/tao/scripts/evaluation/configs/default.yaml


--------------------------------------------------------------------------------
/tao/scripts/evaluation/evaluate.py:
--------------------------------------------------------------------------------
 1 | """Evaluate tao results (helper script)."""
 2 | 
 3 | import argparse
 4 | import logging
 5 | import numpy as np
 6 | from pathlib import Path
 7 | 
 8 | from script_utils.common import common_setup
 9 | from tao.utils.evaluation import get_cfg_defaults, evaluate, log_eval
10 | from tao.utils.yacs_util import merge_from_file_with_base
11 | 
12 | 
13 | CONFIG_DIR = Path(__file__).resolve().parent / 'configs'
14 | 
15 | 
16 | def main():
17 |     # Use first line of file docstring as description if it exists.
18 |     parser = argparse.ArgumentParser(
19 |         description=__doc__.split('\n')[0] if __doc__ else '',
20 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
21 |     parser.add_argument('annotations', type=Path)
22 |     parser.add_argument('predictions', type=Path)
23 |     parser.add_argument('--output-dir', type=Path)
24 |     parser.add_argument('--config',
25 |                         type=Path,
26 |                         default=CONFIG_DIR / 'default.yaml')
27 |     parser.add_argument('--config-updates', nargs='*')
28 | 
29 |     args = parser.parse_args()
30 | 
31 |     if args.output_dir:
32 |         tensorboard_dir = args.output_dir / 'tensorboard'
33 |         if tensorboard_dir.exists():
34 |             raise ValueError(
35 |                 'Tensorboard dir already exists, not evaluating.')
36 |         args.output_dir.mkdir(exist_ok=True, parents=True)
37 |         log_path = common_setup(__file__, args.output_dir, args).name
38 |     else:
39 |         logging.getLogger().setLevel(logging.INFO)
40 |         logging.basicConfig(format='%(asctime)s.%(msecs).03d: %(message)s',
41 |                             datefmt='%H:%M:%S')
42 |         logging.info('Args:\n%s', vars(args))
43 |         log_path = None
44 | 
45 |     cfg = get_cfg_defaults()
46 |     merge_from_file_with_base(cfg, args.config)
47 |     if args.config_updates:
48 |         cfg.merge_from_list(args.config_updates)
49 |     cfg.freeze()
50 | 
51 |     if args.output_dir:
52 |         with open(args.output_dir / 'config.yaml', 'w') as f:
53 |             f.write(cfg.dump())
54 | 
55 |     tao_eval = evaluate(args.annotations, args.predictions, cfg)
56 |     area_index = tao_eval['tao_eval'].params.area_rng_lbl.index('all')
57 |     time_index = tao_eval['tao_eval'].params.time_rng_lbl.index('all')
58 |     category_aps = np.mean(
59 |         tao_eval['tao_eval'].eval['precision'][:, :, :, area_index, time_index],
60 |         axis=(0, 1))
61 |     for i, cat in enumerate(tao_eval['tao_eval'].params.cat_ids):
62 |         print(f'category_id: {cat}, ap:{category_aps[i]}')
63 |     log_eval(tao_eval, cfg, output_dir=args.output_dir, log_path=log_path)
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     main()
68 | 


--------------------------------------------------------------------------------
/tao/scripts/trackers/sort/README.md:
--------------------------------------------------------------------------------
  1 | SORT
  2 | =====
  3 | 
  4 | A simple online and realtime tracking algorithm for 2D multiple object tracking in video sequences.
  5 | See an example [video here](https://motchallenge.net/movies/ETH-Linthescher-SORT.mp4).
  6 | 
  7 | By Alex Bewley  
  8 | 
  9 | ### Introduction
 10 | 
 11 | SORT is a barebones implementation of a visual multiple object tracking framework based on rudimentary data association and state estimation techniques. It is designed for online tracking applications where only past and current frames are available and the method produces object identities on the fly. While this minimalistic tracker doesn't handle occlusion or re-entering objects its purpose is to serve as a baseline and testbed for the development of future trackers.
 12 | 
 13 | SORT was initially described in an [arXiv tech report](http://arxiv.org/abs/1602.00763). At the time of the initial publication, SORT was ranked the best *open source* multiple object tracker on the [MOT benchmark](https://motchallenge.net/results/2D_MOT_2015/).
 14 | 
 15 | This code has been tested on Mac OSX 10.10, and Ubuntu 14.04, with Python 2.7 (anaconda).
 16 | 
 17 | **Note:** A significant proportion of SORT's accuracy is attributed to the detections.
 18 | For your convenience, this repo also contains *Faster* RCNN detections for the MOT benchmark sequences in the [benchmark format](https://motchallenge.net/instructions/). To run the detector yourself please see the original [*Faster* RCNN project](https://github.com/ShaoqingRen/faster_rcnn) or the python reimplementation of [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn) by Ross Girshick.
 19 | 
 20 | **Also see:**
 21 | A new and improved version of SORT with a Deep Association Metric implemented in tensorflow is available at [https://github.com/nwojke/deep_sort](https://github.com/nwojke/deep_sort) .
 22 | 
 23 | ### License
 24 | 
 25 | SORT is released under the GPL License (refer to the LICENSE file for details) to promote the open use of the tracker and future improvements. If you require a permissive license contact Alex (alex@bewley.ai).
 26 | 
 27 | ### Citing SORT
 28 | 
 29 | If you find this repo useful in your research, please consider citing:
 30 | 
 31 |     @inproceedings{Bewley2016_sort,
 32 |       author={Bewley, Alex and Ge, Zongyuan and Ott, Lionel and Ramos, Fabio and Upcroft, Ben},
 33 |       booktitle={2016 IEEE International Conference on Image Processing (ICIP)},
 34 |       title={Simple online and realtime tracking},
 35 |       year={2016},
 36 |       pages={3464-3468},
 37 |       keywords={Benchmark testing;Complexity theory;Detectors;Kalman filters;Target tracking;Visualization;Computer Vision;Data Association;Detection;Multiple Object Tracking},
 38 |       doi={10.1109/ICIP.2016.7533003}
 39 |     }
 40 | 
 41 | 
 42 | ### Dependencies:
 43 | 
 44 | This code makes use of the following packages:
 45 | 1. [`scikit-learn`](http://scikit-learn.org/stable/)
 46 | 0. [`scikit-image`](http://scikit-image.org/download)
 47 | 0. [`FilterPy`](https://github.com/rlabbe/filterpy)
 48 | 
 49 | To install required dependencies run:
 50 | ```
 51 | $ pip install -r requirements.txt
 52 | ```
 53 | 
 54 | 
 55 | ### Demo:
 56 | 
 57 | To run the tracker with the provided detections:
 58 | 
 59 | ```
 60 | $ cd path/to/sort
 61 | $ python sort.py
 62 | ```
 63 | 
 64 | To display the results you need to:
 65 | 
 66 | 0. Download the [2D MOT 2015 benchmark dataset](https://motchallenge.net/data/2D_MOT_2015/#download)
 67 | 0. Create a symbolic link to the dataset
 68 |   ```
 69 |   $ ln -s /path/to/MOT2015_challenge/data/2DMOT2015 mot_benchmark
 70 |   ```
 71 | 0. Run the demo with the ```--display``` flag
 72 |   ```
 73 |   $ python sort.py --display
 74 |   ```
 75 | 
 76 | 
 77 | ### Main Results
 78 | 
 79 | Using the [MOT challenge devkit](https://motchallenge.net/devkit/) the method produces the following results (as described in the paper).
 80 | 
 81 |  Sequence       | Rcll | Prcn |  FAR | GT  MT  PT  ML|   FP    FN  IDs   FM|  MOTA  MOTP MOTAL
 82 | --------------- |:----:|:----:|:----:|:-------------:|:-------------------:|:------------------:
 83 |  TUD-Campus     | 68.5 | 94.3 | 0.21 |  8   6   2   0|   15   113    6    9|  62.7  73.7  64.1
 84 |  ETH-Sunnyday   | 77.5 | 81.9 | 0.90 | 30  11  16   3|  319   418   22   54|  59.1  74.4  60.3
 85 |  ETH-Pedcross2  | 51.9 | 90.8 | 0.39 | 133  17  60  56|  330  3014   77  103|  45.4  74.8  46.6
 86 |  ADL-Rundle-8   | 44.3 | 75.8 | 1.47 | 28   6  16   6|  959  3781  103  211|  28.6  71.1  30.1
 87 |  Venice-2       | 42.5 | 64.8 | 2.75 | 26   7   9  10| 1650  4109   57  106|  18.6  73.4  19.3
 88 |  KITTI-17       | 67.1 | 92.3 | 0.26 |  9   1   8   0|   38   225    9   16|  60.2  72.3  61.3
 89 |  *Overall*      | 49.5 | 77.5 | 1.24 | 234  48 111  75| 3311 11660  274  499|  34.0  73.3  35.1
 90 | 
 91 | 
 92 | ### Using SORT in your own project
 93 | 
 94 | Below is the gist of how to instantiate and update SORT. See the ['__main__'](https://github.com/abewley/sort/blob/master/sort.py#L239) section of [sort.py](https://github.com/abewley/sort/blob/master/sort.py#L239) for a complete example.
 95 |     
 96 |     from sort import *
 97 |     
 98 |     #create instance of SORT
 99 |     mot_tracker = Sort() 
100 |     
101 |     # get detections
102 |     ...
103 |     
104 |     # update SORT
105 |     track_bbs_ids = mot_tracker.update(detections)
106 | 
107 |     # track_bbs_ids is a np array where each row contains a valid bounding box and track_id (last column)
108 |     ...
109 |     
110 |  
111 | 


--------------------------------------------------------------------------------
/tao/scripts/trackers/sort/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/tao/scripts/trackers/sort/__init__.py


--------------------------------------------------------------------------------
/tao/scripts/trackers/sort/create_json_for_eval.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import logging
 4 | import random
 5 | from pathlib import Path
 6 | 
 7 | import numpy as np
 8 | from natsort import natsorted
 9 | from script_utils.common import common_setup
10 | from tqdm import tqdm
11 | 
12 | 
13 | def create_json(track_result, groundtruth, output_dir):
14 |     # Image without extension -> image id
15 |     image_stem_to_info = {
16 |         x['file_name'].rsplit('.', 1)[0]: x for x in groundtruth['images']
17 |     }
18 |     valid_videos = {x['name'] for x in groundtruth['videos']}
19 | 
20 |     all_annotations = []
21 |     found_predictions = {}
22 |     for video in tqdm(valid_videos):
23 |         video_npz = track_result / f'{video}.npz'
24 |         if not video_npz.exists():
25 |             logging.error(f'Could not find video {video} at {video_npz}')
26 |             continue
27 |         video_result = np.load(video_npz)
28 |         frame_names = [x for x in video_result.keys() if x != 'field_order']
29 |         video_found = {}
30 |         for frame in natsorted(frame_names):
31 |             # (x0, y0, x1, y1, class, score, box_index, track_id)
32 |             frame_name = f'{video}/{frame}'
33 |             if frame_name not in image_stem_to_info:
34 |                 continue
35 |             video_found[frame_name] = True
36 |             image_info = image_stem_to_info[frame_name]
37 |             all_annotations.extend([{
38 |                 # (x1, y1) -> (w, h)
39 |                 'image_id': image_info['id'],
40 |                 'video_id':  image_info['video_id'],
41 |                 'track_id': int(x[7]),
42 |                 'bbox': [x[0], x[1], x[2] - x[0], x[3] - x[1]],
43 |                 'category_id': x[4],
44 |                 'score': x[5],
45 |             } for x in video_result[frame]])
46 |         if not video_found:
47 |             raise ValueError(f'Found no valid predictions for video {video}')
48 |         found_predictions.update(video_found)
49 |     if not found_predictions:
50 |         raise ValueError('Found no valid predictions!')
51 | 
52 |     with_predictions = set(found_predictions.keys())
53 |     with_labels = set(image_stem_to_info.keys())
54 |     if with_predictions != with_labels:
55 |         missing_videos = {
56 |             x.rsplit('/', 1)[0]
57 |             for x in with_labels - with_predictions
58 |         }
59 |         logging.warn(
60 |             f'{len(with_labels - with_predictions)} images from '
61 |             f'{len(missing_videos)} videos did not have predictions!')
62 | 
63 |     with open(output_dir / 'results.json', 'w') as f:
64 |         json.dump(all_annotations, f)
65 | 
66 | 
67 | def main():
68 |     # Use first line of file docstring as description if it exists.
69 |     parser = argparse.ArgumentParser(
70 |         description=__doc__.split('\n')[0] if __doc__ else '',
71 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
72 |     parser.add_argument('--track-result', required=True, type=Path)
73 |     parser.add_argument('--annotations-json',
74 |                         type=Path,
75 |                         help='Annotations json')
76 |     parser.add_argument('--output-dir', required=True, type=Path)
77 | 
78 |     args = parser.parse_args()
79 |     args.output_dir.mkdir(exist_ok=True, parents=True)
80 |     common_setup(__file__, args.output_dir, args)
81 | 
82 |     with open(args.annotations_json, 'r') as f:
83 |         groundtruth = json.load(f)
84 | 
85 |     create_json(args.track_result, groundtruth, args.output_dir)
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     main()
90 | 


--------------------------------------------------------------------------------
/tao/scripts/trackers/sort/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy
2 | filterpy~=1.4.1
3 | numba~=0.38.1
4 | scikit-image~=0.14.0
5 | scikit-learn~=0.19.1
6 | lap~=0.4.0
7 | 


--------------------------------------------------------------------------------
/tao/scripts/trackers/sort/sort_with_detection_id.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from sort import associate_detections_to_trackers, KalmanBoxTracker
 4 | 
 5 | 
 6 | class SortWithDetectionId(object):
 7 |     def __init__(self, max_age=1, min_hits=3, iou_threshold=0.3):
 8 |         """
 9 |     Sets key parameters for SORT
10 |     """
11 |         self.max_age = max_age
12 |         self.min_hits = min_hits
13 |         self.trackers = []
14 |         self.frame_count = 0
15 |         self.iou_threshold = iou_threshold
16 | 
17 |     def update(self, dets):
18 |         """
19 |         Args:
20 |             dets (np.array): Shape (num_boxes, 5), where each row contains
21 |                 [x1, y1, x2, y2, score]
22 | 
23 |         Retruns:
24 |             tracks (np.array): Shape (num_boxes, 6), where each row contains
25 |                 [x1, y1, x2, y2, detection_index, track_id]
26 |         """
27 |         self.frame_count += 1
28 |         # get predicted locations from existing trackers.
29 |         trks = np.zeros((len(self.trackers), 5))
30 |         to_del = []
31 |         ret = []
32 |         for t, trk in enumerate(trks):
33 |             pos = self.trackers[t].predict()[0]
34 |             trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
35 |             if (np.any(np.isnan(pos))):
36 |                 to_del.append(t)
37 | 
38 |         trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
39 |         for t in reversed(to_del):
40 |             self.trackers.pop(t)
41 |         matched, unmatched_dets, unmatched_trks = (
42 |             associate_detections_to_trackers(dets, trks, self.iou_threshold))
43 | 
44 |         # update matched trackers with assigned detections
45 |         track_to_det_index = {t: d for d, t in matched}
46 |         # matched[i, 0] is matched to matched[i, 1]
47 |         for t, trk in enumerate(self.trackers):
48 |             if (t not in unmatched_trks):
49 |                 d = track_to_det_index[t]
50 |                 trk.update(dets[d, :])
51 | 
52 |         # create and initialise new trackers for unmatched detections
53 |         for i in unmatched_dets:
54 |             trk = KalmanBoxTracker(dets[i, :])
55 |             self.trackers.append(trk)
56 |             track_to_det_index[len(self.trackers) - 1] = i
57 |         i = len(self.trackers)
58 |         for t, trk in reversed(list(enumerate(self.trackers))):
59 |             d = trk.get_state()[0]
60 |             det_id = track_to_det_index.get(t, -1)
61 |             if ((trk.time_since_update < 1)
62 |                     and (trk.hit_streak >= self.min_hits
63 |                          or self.frame_count <= self.min_hits)):
64 |                 ret.append(
65 |                     np.concatenate((d, [det_id, trk.id + 1])).reshape(
66 |                         1, -1))  # +1 as MOT benchmark requires positive
67 |             i -= 1
68 |             # remove dead tracklet
69 |             if (trk.time_since_update > self.max_age):
70 |                 self.trackers.pop(i)
71 |         if (len(ret) > 0):
72 |             return np.concatenate(ret)
73 |         return np.empty((0, 6))
74 | 


--------------------------------------------------------------------------------
/tao/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Note: To use the 'upload' functionality of this file, you must:
  5 | #   $ pipenv install twine --dev
  6 | 
  7 | import io
  8 | import os
  9 | import sys
 10 | from shutil import rmtree
 11 | 
 12 | from setuptools import find_packages, setup, Command
 13 | 
 14 | # Package meta-data.
 15 | NAME = 'tao'
 16 | DESCRIPTION = 'Track Any Object'
 17 | URL = 'http://taodataset.org'
 18 | EMAIL = 'achald@cs.cmu.edu'
 19 | AUTHOR = 'Achal Dave'
 20 | REQUIRES_PYTHON = '>=3.6.0'
 21 | VERSION = '0.1.0'
 22 | 
 23 | # What packages are required for this module to be executed?
 24 | REQUIRED = [
 25 |     'script_utils @ git+https://github.com/achalddave/python-script-utils.git@v0.0.2#egg=script_utils',
 26 |     'moviepy~=0.2', 'scipy', 'natsort', 'tqdm', 'yacs', 'boto3', 'youtube_dl',
 27 |     'numba'
 28 |     # 'requests', 'maya', 'records',
 29 | ]
 30 | 
 31 | # What packages are optional?
 32 | EXTRAS = {
 33 |     # 'fancy feature': ['django'],
 34 | }
 35 | 
 36 | # The rest you shouldn't have to touch too much :)
 37 | # ------------------------------------------------
 38 | # Except, perhaps the License and Trove Classifiers!
 39 | # If you do change the License, remember to change the Trove Classifier for that!
 40 | 
 41 | here = os.path.abspath(os.path.dirname(__file__))
 42 | 
 43 | # Import the README and use it as the long-description.
 44 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file!
 45 | try:
 46 |     with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
 47 |         long_description = '\n' + f.read()
 48 | except FileNotFoundError:
 49 |     long_description = DESCRIPTION
 50 | 
 51 | # Load the package's __version__.py module as a dictionary.
 52 | about = {}
 53 | if not VERSION:
 54 |     project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
 55 |     with open(os.path.join(here, project_slug, '__version__.py')) as f:
 56 |         exec(f.read(), about)
 57 | else:
 58 |     about['__version__'] = VERSION
 59 | 
 60 | 
 61 | class UploadCommand(Command):
 62 |     """Support setup.py upload."""
 63 | 
 64 |     description = 'Build and publish the package.'
 65 |     user_options = []
 66 | 
 67 |     @staticmethod
 68 |     def status(s):
 69 |         """Prints things in bold."""
 70 |         print('\033[1m{0}\033[0m'.format(s))
 71 | 
 72 |     def initialize_options(self):
 73 |         pass
 74 | 
 75 |     def finalize_options(self):
 76 |         pass
 77 | 
 78 |     def run(self):
 79 |         try:
 80 |             self.status('Removing previous builds…')
 81 |             rmtree(os.path.join(here, 'dist'))
 82 |         except OSError:
 83 |             pass
 84 | 
 85 |         self.status('Building Source and Wheel (universal) distribution…')
 86 |         os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))
 87 | 
 88 |         self.status('Uploading the package to PyPI via Twine…')
 89 |         os.system('twine upload dist/*')
 90 | 
 91 |         self.status('Pushing git tags…')
 92 |         os.system('git tag v{0}'.format(about['__version__']))
 93 |         os.system('git push --tags')
 94 | 
 95 |         sys.exit()
 96 | 
 97 | 
 98 | # Where the magic happens:
 99 | setup(
100 |     name=NAME,
101 |     version=about['__version__'],
102 |     description=DESCRIPTION,
103 |     long_description=long_description,
104 |     long_description_content_type='text/markdown',
105 |     author=AUTHOR,
106 |     author_email=EMAIL,
107 |     python_requires=REQUIRES_PYTHON,
108 |     url=URL,
109 |     packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
110 |     # If your package is a single module, use this instead of 'packages':
111 |     # py_modules=['tao'],
112 | 
113 |     # entry_points={
114 |     #     'console_scripts': ['mycli=mymodule:cli'],
115 |     # },
116 |     install_requires=REQUIRED,
117 |     extras_require=EXTRAS,
118 |     include_package_data=True,
119 |     license='MIT',
120 |     classifiers=[
121 |         # Trove classifiers
122 |         # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
123 |         'License :: OSI Approved :: MIT License',
124 |         'Programming Language :: Python',
125 |         'Programming Language :: Python :: 3',
126 |         'Programming Language :: Python :: 3.6',
127 |         'Programming Language :: Python :: Implementation :: CPython',
128 |         'Programming Language :: Python :: Implementation :: PyPy'
129 |     ],
130 |     # $ setup.py publish support.
131 |     cmdclass={
132 |         'upload': UploadCommand,
133 |     },
134 | )
135 | 


--------------------------------------------------------------------------------
/tao/tao/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/tao/tao/__init__.py


--------------------------------------------------------------------------------
/tao/tao/toolkit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/tao/tao/toolkit/__init__.py


--------------------------------------------------------------------------------
/tao/tao/toolkit/tao/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from .tao import Tao
 3 | from .results import TaoResults
 4 | from .eval import TaoEval
 5 | 
 6 | logging.basicConfig(
 7 |     format="[%(asctime)s] %(name)s %(levelname)s: %(message)s",
 8 |     datefmt="%m/%d %H:%M:%S",
 9 |     level=logging.WARN,
10 | )
11 | 
12 | __all__ = ["Tao", "TaoResults", "TaoEval"]
13 | 


--------------------------------------------------------------------------------
/tao/tao/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/tao/tao/utils/__init__.py


--------------------------------------------------------------------------------
/tao/tao/utils/detectron2/datasets.py:
--------------------------------------------------------------------------------
1 | from detectron2.data.datasets import register_coco_instances
2 | 
3 | 
4 | def register_datasets():
5 |     register_coco_instances(
6 |         "lvis_v0.5_coco_2017_train", {},
7 |         "data/detectron_datasets/lvis-coco/lvis-0.5_coco2017_train.json",
8 |         "data/detectron_datasets/lvis-coco/train2017")
9 | 


--------------------------------------------------------------------------------
/tao/tao/utils/download.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from hashlib import md5
  3 | from multiprocessing import Pool
  4 | from pathlib import Path
  5 | 
  6 | from tqdm import tqdm
  7 | 
  8 | from tao.utils.video import dump_frames
  9 | 
 10 | 
 11 | def dump_frames_star(task):
 12 |     return dump_frames(*task)
 13 | 
 14 | 
 15 | def dump_tao_frames(videos,
 16 |                     output_dirs,
 17 |                     workers,
 18 |                     tqdm_desc='Converting to frames'):
 19 |     fps = None
 20 |     extension = '.jpg'
 21 |     jpeg_qscale = 2
 22 | 
 23 |     for output_dir in output_dirs:
 24 |         Path(output_dir).mkdir(exist_ok=True, parents=True)
 25 | 
 26 |     dump_frames_tasks = []
 27 |     for video_path, output_dir in zip(videos, output_dirs):
 28 |         dump_frames_tasks.append(
 29 |             (video_path, output_dir, fps, extension, jpeg_qscale))
 30 | 
 31 |     # dump_frames code logs when, e.g., the expected number of frames does not
 32 |     # match the number of dumped frames. But these logs can have false
 33 |     # positives that are confusing, so we check that frames are correctly
 34 |     # dumped ourselves separately based on frames in TAO annotations.
 35 |     _log_level = logging.root.level
 36 |     logging.root.setLevel(logging.ERROR)
 37 |     if workers > 1:
 38 |         pool = Pool(workers)
 39 |         try:
 40 |             list(
 41 |                 tqdm(pool.imap_unordered(dump_frames_star, dump_frames_tasks),
 42 |                      total=len(dump_frames_tasks),
 43 |                      leave=False,
 44 |                      desc=tqdm_desc))
 45 |         except KeyboardInterrupt:
 46 |             print('Parent received control-c, exiting.')
 47 |             pool.terminate()
 48 |     else:
 49 |         for task in tqdm(dump_frames_tasks):
 50 |             dump_frames_star(task)
 51 |     logging.root.setLevel(_log_level)
 52 | 
 53 | 
 54 | def frame_checksums_diff(frames_dir, checksums, early_exit=False):
 55 |     missing = []
 56 |     mismatch = []
 57 | 
 58 |     checksums = {k.replace('.jpeg', '.jpg'): v for k, v in checksums.items()}
 59 |     extra = [x for x in frames_dir.rglob('.jpg') if x.name not in checksums]
 60 | 
 61 |     for frame, cksum in checksums.items():
 62 |         path = frames_dir / frame
 63 |         if not path.exists():
 64 |             missing.append(path)
 65 |             if early_exit:
 66 |                 break
 67 |         if cksum:
 68 |             with open(path, 'rb') as f:
 69 |                 md5_digest = md5(f.read()).hexdigest()
 70 |             if md5_digest != cksum:
 71 |                 # path, seen, expected
 72 |                 mismatch.append((path, md5_digest, cksum))
 73 |                 if early_exit:
 74 |                     break
 75 |     return missing, mismatch, extra
 76 | 
 77 | 
 78 | def are_tao_frames_dumped(frames_dir, checksums, warn=True, allow_extra=True):
 79 |     missing, mismatch, extra = frame_checksums_diff(frames_dir,
 80 |                                                     checksums,
 81 |                                                     early_exit=True)
 82 |     if allow_extra:
 83 |         extra = []
 84 |     if warn and extra:
 85 |         logging.warning(f'Unexpected frame at {extra[0]}!')
 86 |     if warn and missing:
 87 |         logging.warning(f'Could not find frame at {missing[0]}!')
 88 |     if warn and mismatch:
 89 |         path, seen, expected = mismatch[0]
 90 |         logging.warning(
 91 |             f'Checksum for {path} did not match! '
 92 |             f'Expected: {expected}, saw: {seen}')
 93 |     return not mismatch and not missing and not extra
 94 | 
 95 | 
 96 | def remove_non_tao_frames(frames_dir, keep_frames):
 97 |     frames = {x.split('.')[0] for x in keep_frames}
 98 |     extracted_frames = list(frames_dir.glob('*.jpg'))
 99 |     to_remove = [x for x in extracted_frames if x.stem not in frames]
100 |     assert len(to_remove) != len(extracted_frames)
101 |     for frame in to_remove:
102 |         frame.unlink()
103 | 


--------------------------------------------------------------------------------
/tao/tao/utils/fs.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from pathlib import Path
  3 | 
  4 | IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm']
  5 | VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mkv', '.mov']
  6 | 
  7 | 
  8 | def dir_path(path):
  9 |     """Wrapper around Path that ensures this directory is created."""
 10 |     if not isinstance(path, Path):
 11 |         path = Path(path)
 12 |     path.mkdir(exist_ok=True, parents=True)
 13 |     return path
 14 | 
 15 | 
 16 | def file_path(path):
 17 |     """Wrapper around Path that ensures parent directories are created.
 18 | 
 19 |         x = mkdir_parents(dir / video_with_dir_prefix)
 20 |     is short-hand for
 21 |         x = Path(dir / video_with_dir_prefix)
 22 |         x.parent.mkdir(exist_ok=True, parents=True)
 23 |     """
 24 |     if not isinstance(path, Path):
 25 |         path = Path(path)
 26 |     path.resolve().parent.mkdir(exist_ok=True, parents=True)
 27 |     return path
 28 | 
 29 | 
 30 | def glob_ext(path, extensions, recursive=False):
 31 |     if not isinstance(path, Path):
 32 |         path = Path(path)
 33 |     if recursive:
 34 |         # Handle one level of symlinks.
 35 |         path_children = list(path.glob('*'))
 36 |         all_files = list(path_children)
 37 |         for x in path_children:
 38 |             if x.is_dir():
 39 |                 all_files += x.rglob('*')
 40 |     else:
 41 |         all_files = path.glob('*')
 42 |     return [
 43 |         x for x in all_files if any(x.name.endswith(y) for y in extensions)
 44 |     ]
 45 | 
 46 | 
 47 | def find_file_extensions(folder, stem, possible_extensions):
 48 |     if not isinstance(folder, Path):
 49 |         folder = Path(folder)
 50 |     for ext in possible_extensions:
 51 |         if ext[0] != '.':
 52 |             ext = f'.{ext}'
 53 |         path = folder / f'{stem}{ext}'
 54 |         if path.exists():
 55 |             return path
 56 |     return None
 57 | 
 58 | 
 59 | def is_image_file(filename):
 60 |     """Checks if a file is an image.
 61 | 
 62 |     Args:
 63 |         filename (string): path to a file
 64 |     Returns:
 65 |         bool: True if the filename ends with a known image extension
 66 |     """
 67 |     filename_lower = filename.lower()
 68 |     return any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS)
 69 | 
 70 | 
 71 | def simple_table(rows):
 72 |     lengths = [
 73 |         max(len(row[i]) for row in rows) + 1 for i in range(len(rows[0]))
 74 |     ]
 75 |     row_format = ' '.join(('{:<%s}' % length) for length in lengths[:-1])
 76 |     row_format += ' {}'  # The last column can maintain its length.
 77 | 
 78 |     output = ''
 79 |     for i, row in enumerate(rows):
 80 |         if i > 0:
 81 |             output += '\n'
 82 |         output += row_format.format(*row)
 83 |     return output
 84 | 
 85 | 
 86 | def parse_bool(arg):
 87 |     """Parse string to boolean.
 88 |     Using type=bool in argparse does not do the right thing. E.g.
 89 |     '--bool_flag False' will parse as True. See
 90 |     <https://stackoverflow.com/q/15008758/1291812>
 91 | 
 92 |     Usage:
 93 |         parser.add_argument( '--choice', type=parse_bool)
 94 |     """
 95 |     if arg == 'True':
 96 |         return True
 97 |     elif arg == 'False':
 98 |         return False
 99 |     else:
100 |         raise argparse.ArgumentTypeError("Expected 'True' or 'False'.")
101 | 


--------------------------------------------------------------------------------
/tao/tao/utils/misc.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import pickle
  4 | 
  5 | from pathlib import Path
  6 | from scipy.io import loadmat
  7 | from tqdm import tqdm
  8 | 
  9 | from tao.utils import misc
 10 | 
 11 | 
 12 | def parse_bool(arg):
 13 |     """Parse string to boolean.
 14 | 
 15 |     Using type=bool in argparse does not do the right thing. E.g.
 16 |     '--bool_flag False' will parse as True. See
 17 |     <https://stackoverflow.com/q/15008758/1291812>
 18 |     """
 19 |     if arg == 'True':
 20 |         return True
 21 |     elif arg == 'False':
 22 |         return False
 23 |     else:
 24 |         raise argparse.ArgumentTypeError("Expected 'True' or 'False'.")
 25 | 
 26 | 
 27 | def load_detection_mat(mat):
 28 |     dictionary = {}
 29 |     f = loadmat(mat)['x']
 30 |     result = {}
 31 |     # Assume mat files are of the format (x0, y0, x1, y1, label, score)
 32 |     if f.shape[1] == 6:
 33 |         result['pred_boxes'] = [[x[0], x[1], x[2], x[3]] for x in f[:, :4]]
 34 |         result['scores'] = [x for x in f[:, 5]]
 35 |         result['pred_classes'] = [x for x in f[:, 4]]
 36 |     elif f.shape[1] > 6:
 37 |         # Assume mat files are of the format
 38 |         # (x0, y0, x1, y1, label1_score, label2_score, ..., labeln_score)
 39 |         result['pred_boxes'] = [[x[0], x[1], x[2], x[3]] for x in f[:, :4]]
 40 |         result['scores'] = []
 41 |         result['pred_classes'] = []
 42 |         for box in f:
 43 |             label = box[4:].argmax()
 44 |             result['pred_classes'].append(label)
 45 |             result['scores'].append(box[label+4])
 46 |     dictionary['instances'] = result
 47 |     return dictionary
 48 | 
 49 | 
 50 | def load_detection_dir_as_results(root,
 51 |                                   annotations,
 52 |                                   detections_format='pickle',
 53 |                                   include_masks=False,
 54 |                                   score_threshold=None,
 55 |                                   max_dets_per_image=None,
 56 |                                   show_progress=False):
 57 |     """Load detections from dir as a results.json dict."""
 58 |     if not isinstance(root, Path):
 59 |         root = Path(root)
 60 |     ext = {
 61 |         'pickle': '.pickle',
 62 |         'pkl': '.pkl',
 63 |         'mat': '.mat'
 64 |     }[detections_format]
 65 |     bbox_annotations = []
 66 |     if include_masks:
 67 |         segmentation_annotations = []
 68 | 
 69 |     for image in tqdm(annotations['images'],
 70 |                       desc='Collecting annotations',
 71 |                       disable=not show_progress):
 72 |         path = (root / f'{image["file_name"]}').with_suffix(ext)
 73 |         if not path.exists():
 74 |             logging.warn(f'Could not find detections for image '
 75 |                          f'{image["file_name"]} at {path}; skipping...')
 76 |             continue
 77 |         if detections_format in ('pickle', 'pkl'):
 78 |             with open(path, 'rb') as f:
 79 |                 detections = pickle.load(f)
 80 |         else:
 81 |             detections = misc.load_detection_mat(path)
 82 | 
 83 |         num_detections = len(detections['instances']['scores'])
 84 |         indices = sorted(range(num_detections),
 85 |                          key=lambda i: detections['instances']['scores'][i],
 86 |                          reverse=True)
 87 | 
 88 |         if max_dets_per_image is not None:
 89 |             indices = indices[:max_dets_per_image]
 90 | 
 91 |         for idx in indices:
 92 |             entry = detections['instances']['pred_boxes'][idx]
 93 |             x1 = entry[0]
 94 |             y1 = entry[1]
 95 |             x2 = entry[2]
 96 |             y2 = entry[3]
 97 |             bbox = [int(x1), int(y1), int(x2-x1), int(y2-y1)]
 98 | 
 99 |             category = int(detections['instances']['pred_classes'][idx] + 1)
100 |             score = detections['instances']['scores'][idx]
101 |             if score_threshold is not None and score < score_threshold:
102 |                 continue
103 | 
104 |             try:
105 |                 score = score.item()
106 |             except AttributeError:
107 |                 pass
108 | 
109 |             bbox_annotations.append({
110 |                 'image_id': image['id'],
111 |                 'category_id': category,
112 |                 'bbox': bbox,
113 |                 'score': score,
114 |             })
115 |             if include_masks:
116 |                 segmentation_annotations.append({
117 |                     'image_id': image['id'],
118 |                     'category_id': category,
119 |                     'segmentation': detections['instances']['pred_masks'][idx],
120 |                     'score': score
121 |                 })
122 |     if include_masks:
123 |         return bbox_annotations, segmentation_annotations
124 |     else:
125 |         return bbox_annotations
126 | 


--------------------------------------------------------------------------------
/tao/tao/utils/parallel/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/tao/tao/utils/parallel/__init__.py


--------------------------------------------------------------------------------
/tao/tao/utils/parallel/fixed_gpu_pool.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing as mp
 2 | from tao.utils.parallel.pool_context import PoolWithContext
 3 | 
 4 | 
 5 | class FixedGpuPool:
 6 |     """Pool where each process is attached to a specific GPU.
 7 | 
 8 |     Usage:
 9 |         def init(args, context):
10 |             context['init_return'] = 'init'
11 |         def run(args, context):
12 |             return (context['gpu'], context['init_return'], args)
13 |         p = FixedGpuPool([0, 1, 2, 3], init, None)
14 |         print(p.map(run, ['task1', 'task2', 'task3']))
15 |         # [(0, 'init', 'task1'), (1, 'init', 'task2'), (2, 'hi', 'task3')]
16 |         # NOTE: GPUs may be in different order
17 |     """
18 | 
19 |     def __init__(self, gpus, initializer=None, initargs=None):
20 |         gpu_queue = mp.Manager().Queue()
21 |         for gpu in gpus:
22 |             gpu_queue.put(gpu)
23 |         self.pool = PoolWithContext(
24 |             len(gpus), _FixedGpuPool_init, (gpu_queue, initializer, initargs))
25 | 
26 |     def map(self, task_fn, tasks):
27 |         return self.pool.map(_FixedGpuPool_run,
28 |                              ((task_fn, task) for task in tasks))
29 | 
30 |     def imap_unordered(self, task_fn, tasks):
31 |         return self.pool.imap_unordered(_FixedGpuPool_run,
32 |                                         ((task_fn, task) for task in tasks))
33 | 
34 |     def close(self):
35 |         self.pool.close()
36 | 
37 | 
38 | def _FixedGpuPool_init(args, context):
39 |     gpu_queue, initializer, initargs = args
40 |     context['gpu'] = gpu_queue.get()
41 |     initializer(initargs, context=context)
42 | 
43 | 
44 | def _FixedGpuPool_run(args, context):
45 |     task_fn, task_args = args
46 |     return task_fn(task_args, context=context)
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     def _test_gpu_init(args, context):
51 |         context['init_return'] = 'init'
52 | 
53 |     def _test_gpu_run(args, context):
54 |         return (context['gpu'], context['init_return'], args)
55 | 
56 |     p = FixedGpuPool([0, 1, 2, 3], _test_gpu_init, 'init arg')
57 |     print(p.map(_test_gpu_run, ['task1', 'task2', 'task3']))
58 | 


--------------------------------------------------------------------------------
/tao/tao/utils/parallel/pool_context.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing as mp
 2 | from collections.abc import Iterable
 3 | 
 4 | 
 5 | _PoolWithContext_context = None
 6 | 
 7 | 
 8 | def _PoolWithContext_init(initializer, init_args):
 9 |     global _PoolWithContext_context
10 |     _PoolWithContext_context = {}
11 |     if init_args is None:
12 |         initializer(context=_PoolWithContext_context)
13 |     else:
14 |         initializer(init_args, context=_PoolWithContext_context)
15 | 
16 | 
17 | def _PoolWithContext_run(args):
18 |     task_fn, task_args = args
19 |     return task_fn(task_args, context=_PoolWithContext_context)
20 | 
21 | 
22 | class PoolWithContext:
23 |     """Like multiprocessing.Pool, but pass output of initializer to map fn.
24 | 
25 |     Usage:
26 |         def init(context):
27 |             context['init_return'] = 'init'
28 |         def run(args, context):
29 |             return (context['init_return'], args)
30 |         p = PoolWithContext(4, init)
31 |         print(p.map(run, ['task1', 'task2', 'task3']))
32 |         # [('init', 'task1'), ('init', 'task2'), ('init', 'task3')]
33 |         # NOTE: GPUs may be in different order
34 |     """
35 |     def __init__(self, num_workers, initializer, initargs=None):
36 |         self.pool = mp.Pool(
37 |             num_workers,
38 |             initializer=_PoolWithContext_init,
39 |             initargs=(initializer, initargs))
40 | 
41 |     def map(self, task_fn, tasks):
42 |         return self.pool.map(_PoolWithContext_run,
43 |                              ((task_fn, task) for task in tasks))
44 | 
45 |     def close(self):
46 |         self.pool.close()
47 | 
48 |     def imap_unordered(self, task_fn, tasks):
49 |         return self.pool.imap_unordered(_PoolWithContext_run,
50 |                                         ((task_fn, task) for task in tasks))
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     def _test_init(context):
55 |         context['init_return'] = 'hi'
56 | 
57 |     def _test_init_2(context):
58 |         context['hello'] = 2
59 | 
60 |     def _test_run(args, context):
61 |         return (args, context['init_return'])
62 | 
63 |     def _test_run_2(args, context):
64 |         return (args, context)
65 | 
66 |     p = PoolWithContext(4, _test_init)
67 |     p2 = PoolWithContext(4, _test_init_2)
68 |     print(p.map(_test_run, ['task1', 'task2', 'task3']))
69 |     print(p2.map(_test_run_2, ['task1', 'task2', 'task3']))
70 | 


--------------------------------------------------------------------------------
/tao/tao/utils/yacs_util.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import yaml
  4 | from typing import Any, Dict
  5 | 
  6 | from yacs.config import CfgNode
  7 | from yacs.config import _valid_type, _VALID_TYPES
  8 | 
  9 | 
 10 | BASE_KEY = "_BASE_"
 11 | 
 12 | 
 13 | def _load_yaml_with_base(filename: str, allow_unsafe: bool = False) -> CfgNode:
 14 |     """
 15 |     Just like `yaml.load(open(filename))`, but inherit attributes from its
 16 |         `_BASE_`.
 17 | 
 18 |     Modified from
 19 |     https://github.com/facebookresearch/fvcore/blob/99cb965c67e675dc3259cd490c1dd78ab03a55ff/fvcore/common/config.py
 20 | 
 21 |     Args:
 22 |         filename (str): the file name of the current config. Will be used to
 23 |             find the base config file.
 24 |         allow_unsafe (bool): whether to allow loading the config file with
 25 |             `yaml.unsafe_load`.
 26 |     Returns:
 27 |         (dict): the loaded yaml
 28 |     """
 29 |     with open(filename, "r") as f:
 30 |         try:
 31 |             cfg = yaml.safe_load(f)
 32 |         except yaml.constructor.ConstructorError:
 33 |             if not allow_unsafe:
 34 |                 raise
 35 |             logger = logging.getLogger(__name__)
 36 |             logger.warning(
 37 |                 "Loading config {} with yaml.unsafe_load. Your machine may "
 38 |                 "be at risk if the file contains malicious content.".format(
 39 |                     filename
 40 |                 )
 41 |             )
 42 |             f.close()
 43 |             with open(filename, "r") as f:
 44 |                 cfg = yaml.unsafe_load(f)  # pyre-ignore
 45 | 
 46 |     if cfg is None:
 47 |         return cfg
 48 | 
 49 |     # pyre-ignore
 50 |     def merge_a_into_b(a: Dict[Any, Any], b: Dict[Any, Any]) -> None:
 51 |         # merge dict a into dict b. values in a will overwrite b.
 52 |         for k, v in a.items():
 53 |             if isinstance(v, dict) and k in b:
 54 |                 assert isinstance(
 55 |                     b[k], dict), "Cannot inherit key '{}' from base!".format(k)
 56 |                 merge_a_into_b(v, b[k])
 57 |             else:
 58 |                 b[k] = v
 59 | 
 60 |     if BASE_KEY in cfg:
 61 |         base_cfg_file = cfg[BASE_KEY]
 62 |         if base_cfg_file.startswith("~"):
 63 |             base_cfg_file = os.path.expanduser(base_cfg_file)
 64 |         if not any(map(base_cfg_file.startswith,
 65 |                        ["/", "https://", "http://"])):
 66 |             # the path to base cfg is relative to the config file itself.
 67 |             base_cfg_file = os.path.join(os.path.dirname(filename),
 68 |                                          base_cfg_file)
 69 |         base_cfg = _load_yaml_with_base(base_cfg_file,
 70 |                                         allow_unsafe=allow_unsafe)
 71 |         del cfg[BASE_KEY]
 72 |         if base_cfg is None:
 73 |             return cfg
 74 | 
 75 |         merge_a_into_b(cfg, base_cfg)  # pyre-ignore
 76 |         return base_cfg
 77 |     return cfg
 78 | 
 79 | 
 80 | def merge_from_file_with_base(cfg,
 81 |                               cfg_filename: str,
 82 |                               allow_unsafe: bool = False) -> None:
 83 |     """
 84 |     Merge configs from a given yaml file.
 85 |     
 86 |     Modified from
 87 |     https://github.com/facebookresearch/fvcore/blob/99cb965c67e675dc3259cd490c1dd78ab03a55ff/fvcore/common/config.py
 88 | 
 89 |     Args:
 90 |         cfg_filename: the file name of the yaml config.
 91 |         allow_unsafe: whether to allow loading the config file with
 92 |             `yaml.unsafe_load`.
 93 |     """
 94 |     loaded_cfg = _load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe)
 95 |     loaded_cfg = type(cfg)(loaded_cfg)
 96 |     cfg.merge_from_other_cfg(loaded_cfg)
 97 | 
 98 | 
 99 | def cfg_to_dict(cfg_node, key_list=[]):
100 |     if not isinstance(cfg_node, CfgNode):
101 |         assert _valid_type(cfg_node), (
102 |             "Key {} with value {} is not a valid type; valid types: {}".format(
103 |                 ".".join(key_list), type(cfg_node), _VALID_TYPES))
104 |         return cfg_node
105 |     else:
106 |         cfg_dict = dict(cfg_node)
107 |         for k, v in cfg_dict.items():
108 |             cfg_dict[k] = cfg_to_dict(v, key_list + [k])
109 |         return cfg_dict
110 | 
111 | 
112 | def cfg_to_flat_dict(cfg_node, key_list=[]):
113 |     if not isinstance(cfg_node, CfgNode):
114 |         assert _valid_type(cfg_node), (
115 |             "Key {} with value {} is not a valid type; valid types: {}".format(
116 |                 ".".join(key_list), type(cfg_node), _VALID_TYPES))
117 |         return cfg_node
118 |     else:
119 |         cfg_dict_flat = {}
120 |         for k, v in dict(cfg_node).items():
121 |             updated = cfg_to_dict(v, key_list + [k])
122 |             if isinstance(updated, dict):
123 |                 for k1, v1 in updated.items():
124 |                     cfg_dict_flat['.'.join(key_list + [k, k1])] = v1
125 |             else:
126 |                 cfg_dict_flat['.'.join(key_list + [k])] = updated
127 |         return cfg_dict_flat
128 | 


--------------------------------------------------------------------------------