├── LICENSE ├── Makefile ├── NOTICE ├── README.md ├── data └── .gitignore ├── docker └── Dockerfile ├── experiments ├── crowdhuman.sh ├── kitti_fulltrain.sh ├── kitti_half.sh ├── mot17_fulltrain.sh ├── mot17_half.sh ├── nuScenes_3Dtracking.sh └── pd.sh ├── readme ├── DATA.md ├── GETTING_STARTED.md ├── INSTALL.md └── method.png ├── src ├── _init_paths.py ├── lib │ ├── dataset │ │ ├── dataset_factory.py │ │ ├── datasets │ │ │ ├── crowdhuman.py │ │ │ ├── custom_dataset.py │ │ │ ├── kitti_tracking.py │ │ │ ├── mot.py │ │ │ ├── nuscenes_tracking.py │ │ │ └── pd_tracking.py │ │ ├── generic_dataset.py │ │ ├── joint_loader.py │ │ └── video_dataset.py │ ├── detector.py │ ├── external │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── nms.pyx │ │ └── setup.py │ ├── logger.py │ ├── model │ │ ├── ConvGRU.py │ │ ├── data_parallel.py │ │ ├── decode.py │ │ ├── losses.py │ │ ├── matcher.py │ │ ├── model.py │ │ ├── networks │ │ │ ├── backbones │ │ │ │ ├── dla.py │ │ │ │ ├── mobilenet.py │ │ │ │ └── resnet.py │ │ │ ├── base_model.py │ │ │ ├── dla.py │ │ │ ├── dlav0.py │ │ │ ├── generic_network.py │ │ │ ├── necks │ │ │ │ ├── dlaup.py │ │ │ │ └── msraup.py │ │ │ ├── resdcn.py │ │ │ └── resnet.py │ │ ├── scatter_gather.py │ │ └── utils.py │ ├── opts.py │ ├── trainer.py │ └── utils │ │ ├── __init__.py │ │ ├── ddd_utils.py │ │ ├── debugger.py │ │ ├── image.py │ │ ├── pose.py │ │ ├── post_process.py │ │ ├── tracker.py │ │ └── utils.py ├── main.py ├── test.py └── tools │ ├── _init_paths.py │ ├── annot_bbox.py │ ├── convert_crowdhuman_to_coco.py │ ├── convert_kitti_to_tao.py │ ├── convert_kittitrack_to_coco.py │ ├── convert_mot_det_to_results.py │ ├── convert_mot_to_coco.py │ ├── convert_nuScenes.py │ ├── eval_kitti_track │ ├── data │ │ └── tracking │ │ │ ├── evaluate_tracking.seqmap │ │ │ ├── evaluate_tracking.seqmap.test │ │ │ ├── evaluate_tracking.seqmap.training │ │ │ ├── evaluate_trackingtrain_1-2.seqmap │ │ │ ├── evaluate_trackingtrain_2-2.seqmap │ │ │ ├── evaluate_trackingval_half.seqmap │ │ │ ├── label_02 │ │ │ ├── 0000.txt │ │ │ ├── 0001.txt │ │ │ ├── 0002.txt │ │ │ ├── 0003.txt │ │ │ ├── 0004.txt │ │ │ ├── 0005.txt │ │ │ ├── 0006.txt │ │ │ ├── 0007.txt │ │ │ ├── 0008.txt │ │ │ ├── 0009.txt │ │ │ ├── 0010.txt │ │ │ ├── 0011.txt │ │ │ ├── 0012.txt │ │ │ ├── 0013.txt │ │ │ ├── 0014.txt │ │ │ ├── 0015.txt │ │ │ ├── 0016.txt │ │ │ ├── 0017.txt │ │ │ ├── 0018.txt │ │ │ ├── 0019.txt │ │ │ └── 0020.txt │ │ │ ├── label_02_train_half │ │ │ ├── 0000.txt │ │ │ ├── 0001.txt │ │ │ ├── 0002.txt │ │ │ ├── 0003.txt │ │ │ ├── 0004.txt │ │ │ ├── 0005.txt │ │ │ ├── 0006.txt │ │ │ ├── 0007.txt │ │ │ ├── 0008.txt │ │ │ ├── 0009.txt │ │ │ ├── 0010.txt │ │ │ ├── 0011.txt │ │ │ ├── 0012.txt │ │ │ ├── 0013.txt │ │ │ ├── 0014.txt │ │ │ ├── 0015.txt │ │ │ ├── 0016.txt │ │ │ ├── 0017.txt │ │ │ ├── 0018.txt │ │ │ ├── 0019.txt │ │ │ └── 0020.txt │ │ │ └── label_02_val_half │ │ │ ├── 0000.txt │ │ │ ├── 0001.txt │ │ │ ├── 0002.txt │ │ │ ├── 0003.txt │ │ │ ├── 0004.txt │ │ │ ├── 0005.txt │ │ │ ├── 0006.txt │ │ │ ├── 0007.txt │ │ │ ├── 0008.txt │ │ │ ├── 0009.txt │ │ │ ├── 0010.txt │ │ │ ├── 0011.txt │ │ │ ├── 0012.txt │ │ │ ├── 0013.txt │ │ │ ├── 0014.txt │ │ │ ├── 0015.txt │ │ │ ├── 0016.txt │ │ │ ├── 0017.txt │ │ │ ├── 0018.txt │ │ │ ├── 0019.txt │ │ │ └── 0020.txt │ ├── evaluate_tracking.py │ ├── mailpy.py │ └── munkres.py │ ├── eval_motchallenge.py │ ├── get_mot_17.sh │ ├── interp_mot.py │ ├── nuScenes_lib │ ├── export_kitti.py │ └── utils_kitti.py │ ├── remove_optimizers.py │ ├── vis_tracking_kitti.py │ └── vis_tracking_mot.py └── tao ├── .gitignore ├── LICENSE ├── README.md ├── docs ├── challenge.md ├── detector_train.md ├── download.md ├── download_hacs_alt.md ├── evaluation.md ├── faqs.md ├── manual_download.md └── trackers.md ├── scripts ├── detectors │ ├── detectron2_infer.py │ ├── detectron2_train_net.py │ └── merge_coco_with_lvis.py ├── download │ ├── download_annotations.py │ ├── download_ava.py │ ├── download_cfg.yaml │ ├── download_hacs.py │ ├── download_helper.py │ ├── extract_frames.py │ ├── gen_checksums.py │ ├── meta │ │ ├── ava_file_names_test_v2.1.txt │ │ └── ava_file_names_trainval_v2.1.txt │ └── verify.py ├── evaluation │ ├── configs │ │ └── default.yaml │ └── evaluate.py └── trackers │ └── sort │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ ├── create_json_for_eval.py │ ├── requirements.txt │ ├── sort.py │ ├── sort_with_detection_id.py │ └── track.py ├── setup.py └── tao ├── __init__.py ├── toolkit ├── __init__.py └── tao │ ├── __init__.py │ ├── eval.py │ ├── results.py │ └── tao.py └── utils ├── __init__.py ├── detectron2 └── datasets.py ├── download.py ├── evaluation.py ├── fs.py ├── misc.py ├── parallel ├── __init__.py ├── fixed_gpu_pool.py └── pool_context.py ├── s3.py ├── video.py ├── yacs_util.py └── ytdl.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Pavel Tokmakov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Handy commands: 2 | # - `make docker-build`: builds DOCKERIMAGE 3 | PROJECT ?= permatrack 4 | WORKSPACE ?= /workspace/$(PROJECT) 5 | DOCKER_IMAGE ?= ${PROJECT}:latest 6 | 7 | SHMSIZE ?= 444G 8 | DOCKER_OPTS := \ 9 | --name ${PROJECT} \ 10 | --rm -it \ 11 | --shm-size=${SHMSIZE} \ 12 | -e AWS_DEFAULT_REGION \ 13 | -e AWS_ACCESS_KEY_ID \ 14 | -e AWS_SECRET_ACCESS_KEY \ 15 | -e HOST_HOSTNAME= \ 16 | -e NCCL_DEBUG=VERSION \ 17 | -e DISPLAY=${DISPLAY} \ 18 | -e XAUTHORITY \ 19 | -e NVIDIA_DRIVER_CAPABILITIES=all \ 20 | -v ~/.aws:/root/.aws \ 21 | -v /root/.ssh:/root/.ssh \ 22 | -v ~/.cache:/root/.cache \ 23 | -v /data:/data \ 24 | -v /mnt/fsx/:/mnt/fsx \ 25 | -v /dev/null:/dev/raw1394 \ 26 | -v /tmp:/tmp \ 27 | -v /tmp/.X11-unix/X0:/tmp/.X11-unix/X0 \ 28 | -v /var/run/docker.sock:/var/run/docker.sock \ 29 | -v ${PWD}:${WORKSPACE} \ 30 | -w ${WORKSPACE} \ 31 | --privileged \ 32 | --ipc=host \ 33 | --network=host 34 | 35 | NGPUS=$(shell nvidia-smi -L | wc -l) 36 | 37 | 38 | .PHONY: all clean docker-build 39 | 40 | all: clean 41 | 42 | clean: 43 | find . -name "*.pyc" | xargs rm -f && \ 44 | find . -name "__pycache__" | xargs rm -rf 45 | 46 | docker-build: 47 | docker build \ 48 | -f docker/Dockerfile \ 49 | -t ${DOCKER_IMAGE} . 50 | 51 | docker-start-interactive: docker-build 52 | nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} bash 53 | 54 | docker-run: docker-build 55 | nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \ 56 | bash -c "${COMMAND}" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Learning to Track with Object Permanence 2 | A video-based MOT approach capable of tracking through full occlusions: 3 | ![](readme/method.png) 4 | > [**Learning to Track with Object Permanence**](https://arxiv.org/pdf/2103.14258.pdf), 5 | > Pavel Tokmakov, Jie Li, Wolfram Burgard, Adrien Gaidon, 6 | > *arXiv technical report ([arXiv 2103.14258](https://arxiv.org/pdf/2103.14258.pdf))* 7 | 8 | 9 | @inproceedings{tokmakov2021learning, 10 | title={Learning to Track with Object Permanence}, 11 | author={Tokmakov, Pavel and Li, Jie and Burgard, Wolfram and Gaidon, Adrien}, 12 | booktitle={ICCV}, 13 | year={2021} 14 | } 15 | 16 | Check out our self-supervised extension publised at ICML'22: 17 | > [**Object Permanence Emerges in a Random Walk along Memory**](https://arxiv.org/abs/2204.01784), 18 | > Pavel Tokmakov, Allan Jabri, Jie Li, Adrien Gaidon, 19 | > *arXiv technical report ([arXiv 2204.01784](https://arxiv.org/pdf/2204.01784.pdf))* 20 | 21 | 22 | @inproceedings{tokmakov2022object, 23 | title={Object Permanence Emerges in a Random Walk along Memory}, 24 | author={Tokmakov, Pavel and Jabri, Allan and Li, Jie and Gaidon, Adrien}, 25 | booktitle={ICML}, 26 | year={2022} 27 | } 28 | 29 | ## Abstract 30 | Tracking by detection, the dominant approach for online multi-object tracking, alternates between localization and association steps. As a result, it strongly depends on the quality of instantaneous observations, often failing when objects are not fully visible. In contrast, tracking in humans is underlined by the notion of object permanence: once an object is recognized, we are aware of its physical existence and can approximately localize it even under full occlusions. In this work, we introduce an end-to-end trainable approach for joint object detection and tracking that is capable of such reasoning. We build on top of the recent CenterTrack architecture, which takes pairs of frames as input, and extend it to videos of arbitrary length. To this end, we augment the model with a spatio-temporal, recurrent memory module, allowing it to reason about object locations and identities in the current frame using all the previous history. It is, however, not obvious how to train such an approach. We study this question on a new, large-scale, synthetic dataset for multi-object tracking, which provides ground truth annotations for invisible objects, and propose several approaches for supervising tracking behind occlusions. Our model, trained jointly on synthetic and real data, outperforms the state of the art on KITTI and MOT17 datasets thanks to its robustness to occlusions. 31 | 32 | ## Installation 33 | 34 | Please refer to [INSTALL.md](readme/INSTALL.md) for installation instructions. 35 | 36 | ## Benchmark Evaluation and Training 37 | 38 | After [installation](readme/INSTALL.md), follow the instructions in [DATA.md](readme/DATA.md) to setup the datasets. Then check [GETTING_STARTED.md](readme/GETTING_STARTED.md) to reproduce the results in the paper. 39 | We provide scripts for all the experiments in the [experiments](experiments) folder. 40 | 41 | ## License 42 | 43 | PermaTrack is developed upon [CenterTrack](https://github.com/xingyizhou/CenterTrack). Both codebases are released under MIT License themselves. Some code of CenterTrack are from third-parties with different licenses, please check the CenterTrack repo for details. In addition, this repo uses [py-motmetrics](https://github.com/cheind/py-motmetrics) for MOT evaluation, [nuscenes-devkit](https://github.com/nutonomy/nuscenes-devkit) for nuScenes evaluation and preprocessing, and [TAO codebase](https://github.com/TAO-Dataset/tao) for computing Track AP. ConvGRU implementation is adopted from [this](https://github.com/happyjin/ConvGRU-pytorch) repo. See [NOTICE](NOTICE) for detail. Please note the licenses of each dataset. Most of the datasets we used in this project are under non-commercial licenses. 44 | 45 | -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | FROM nvidia/cuda:10.0-devel-ubuntu18.04 4 | 5 | ENV PROJECT=permatrack 6 | ENV PYTORCH_VERSION=1.4 7 | ENV TORCHVISION_VERSION=0.5.0 8 | ENV CUDNN_VERSION=7.6.5.32-1+cuda10.1 9 | ENV NCCL_VERSION=2.4.8-1+cuda10.1 10 | ENV TRT_VERSION=6.0.1.5 11 | ENV LC_ALL=C.UTF-8 12 | ENV LANG=C.UTF-8 13 | 14 | ARG python=3.6 15 | ENV PYTHON_VERSION=${python} 16 | ENV DEBIAN_FRONTEND=noninteractive 17 | 18 | # Set default shell to /bin/bash 19 | SHELL ["/bin/bash", "-cu"] 20 | 21 | RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \ 22 | build-essential \ 23 | cmake \ 24 | g++-4.8 \ 25 | git \ 26 | curl \ 27 | docker.io \ 28 | vim \ 29 | wget \ 30 | ca-certificates \ 31 | libcudnn7=${CUDNN_VERSION} \ 32 | libnccl2=${NCCL_VERSION} \ 33 | libnccl-dev=${NCCL_VERSION} \ 34 | libjpeg-dev \ 35 | libpng-dev \ 36 | python${PYTHON_VERSION} \ 37 | python${PYTHON_VERSION}-dev \ 38 | python3-tk \ 39 | librdmacm1 \ 40 | libibverbs1 \ 41 | libgtk2.0-dev \ 42 | unzip \ 43 | bzip2 \ 44 | htop \ 45 | gnuplot \ 46 | ffmpeg 47 | 48 | # Install OpenSSH for MPI to communicate between containers 49 | RUN apt-get install -y --no-install-recommends openssh-client openssh-server && \ 50 | mkdir -p /var/run/sshd 51 | 52 | RUN ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python 53 | 54 | RUN curl -O https://bootstrap.pypa.io/get-pip.py && \ 55 | python get-pip.py && \ 56 | rm get-pip.py 57 | 58 | # Install Pydata and other deps 59 | RUN pip install easydict scipy numpy pyquaternion matplotlib jupyter h5py \ 60 | awscli nuscenes-devkit tqdm progress path.py pyyaml opencv-python \ 61 | pycuda numba cython motmetrics scikit-learn==0.22.2 moviepy imageio yacs 62 | 63 | # Install PyTorch 64 | RUN pip install torch==${PYTORCH_VERSION} \ 65 | torchvision==${TORCHVISION_VERSION} && ldconfig 66 | 67 | RUN pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' 68 | 69 | RUN pip3 install git+https://github.com/achalddave/python-script-utils.git@v0.0.2#egg=script_utils 70 | 71 | # create project workspace dir 72 | RUN mkdir -p /workspace/experiments 73 | RUN mkdir -p /workspace/${PROJECT} 74 | WORKDIR /workspace/${PROJECT} 75 | 76 | # Copy project source last (to avoid cache busting) 77 | WORKDIR /workspace/${PROJECT} 78 | COPY . /workspace/${PROJECT} 79 | ENV PYTHONPATH="/workspace/${PROJECT}:$PYTHONPATH" -------------------------------------------------------------------------------- /experiments/crowdhuman.sh: -------------------------------------------------------------------------------- 1 | # Initial model pre-trained on PD: https://tri-ml-public.s3.amazonaws.com/github/permatrack/pd_17fr_21ep_vis.pth 2 | # Resulting model trained on CrowdHuman: https://tri-ml-public.s3.amazonaws.com/github/permatrack/crowdhuman.pth 3 | 4 | cd src 5 | # train 6 | python main.py tracking --exp_id crowdhuman --occlusion_thresh 0.15 --visibility_thresh 0.05 --dataset joint --dataset1 crowdhuman --dataset2 pd_tracking --dataset_version x --same_aug_pre --hm_disturb 0.0 --lost_disturb 0.0 --fp_disturb 0.0 --gpus 0,1,2,3,4,5,6,7 --batch_size 2 --load_model ../models/pd_17fr_21ep_vis.pth --val_intervals 100 --is_recurrent --gru_filter_size 7 --input_len 17 --pre_thresh 0.4 --hm_weight 0.5 --const_v_over_occl --sup_invis --invis_hm_weight 20 --use_occl_len --occl_len_mult 5 --visibility --num_iter 5000 --num_epochs 9 --lr_step 5 --ltrb_amodal --only_ped --reuse_hm 7 | cd .. 8 | -------------------------------------------------------------------------------- /experiments/kitti_fulltrain.sh: -------------------------------------------------------------------------------- 1 | # Initial model pre-trained on PD: https://tri-ml-public.s3.amazonaws.com/github/permatrack/pd_17fr_21ep_vis.pth 2 | # Resulting model trained on KITTI full train: https://tri-ml-public.s3.amazonaws.com/github/permatrack/kitti_full.pth 3 | 4 | cd src 5 | # train 6 | python main.py tracking --exp_id kitti_fulltrain --occlusion_thresh 0.15 --visibility_thresh 0.05 --dataset joint --dataset1 kitti_tracking --dataset2 pd_tracking --dataset_version train --same_aug_pre --hm_disturb 0.0 --lost_disturb 0.0 --fp_disturb 0.0 --gpus 0,1,2,3,4,5,6,7 --batch_size 2 --load_model ../models/pd_17fr_21ep_vis.pth --val_intervals 1 --is_recurrent --gru_filter_size 7 --input_len 17 --pre_thresh 0.4 --hm_weight 0.5 --const_v_over_occl --sup_invis --invis_hm_weight 20 --use_occl_len --occl_len_mult 5 --visibility --num_iter 5000 --num_epochs 5 --lr_step 4 --visibility_thresh_eval 0.2 7 | # test 8 | CUDA_VISIBLE_DEVICES=0 python test.py tracking --exp_id kitti_fulltrain --dataset kitti_tracking --dataset_version test --track_thresh 0.4 --resume --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility --visibility_thresh_eval 0.2 --stream_test --flip_test --trainval 9 | -------------------------------------------------------------------------------- /experiments/kitti_half.sh: -------------------------------------------------------------------------------- 1 | # Initial model pre-trained on PD: https://tri-ml-public.s3.amazonaws.com/github/permatrack/pd_17fr_21ep_vis.pth 2 | # Resulting model trained on KITTI half train: https://tri-ml-public.s3.amazonaws.com/github/permatrack/kitti_half_pd_5ep.pth 3 | 4 | cd src 5 | # train 6 | python main.py tracking --exp_id kitti_half --occlusion_thresh 0.15 --visibility_thresh 0.05 --dataset joint --dataset1 kitti_tracking --dataset2 pd_tracking --dataset_version train_half --same_aug_pre --hm_disturb 0.0 --lost_disturb 0.0 --fp_disturb 0.0 --gpus 0,1,2,3,4,5,6,7 --batch_size 2 --load_model ../models/pd_17fr_21ep_vis.pth --val_intervals 1 --is_recurrent --gru_filter_size 7 --input_len 17 --pre_thresh 0.4 --hm_weight 0.5 --const_v_over_occl --sup_invis --invis_hm_weight 20 --use_occl_len --occl_len_mult 5 --visibility --num_iter 5000 --num_epochs 5 --lr_step 4 --visibility_thresh_eval 0.2 7 | # test 8 | CUDA_VISIBLE_DEVICES=0 python test.py tracking --exp_id kitti_half --dataset kitti_tracking --dataset_version val_half --track_thresh 0.4 --resume --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility --visibility_thresh_eval 0.2 --stream_test 9 | -------------------------------------------------------------------------------- /experiments/mot17_fulltrain.sh: -------------------------------------------------------------------------------- 1 | # Initial model pre-trained on PD + CrowdHuman: https://tri-ml-public.s3.amazonaws.com/github/permatrack/crowdhuman.pth 2 | # Resulting model trained on MOT17 full train: https://tri-ml-public.s3.amazonaws.com/github/permatrack/mot_full.pth 3 | 4 | cd src 5 | # train 6 | python main.py tracking --exp_id mot17_half --occlusion_thresh 0.15 --visibility_thresh 0.05 --dataset joint --dataset1 mot --dataset2 pd_tracking --dataset_version 17trainval --same_aug_pre --hm_disturb 0.0 --lost_disturb 0.0 --fp_disturb 0.0 --gpus 0,1,2,3,4,5,6,7 --batch_size 2 --load_model ../models/crowdhuman.pth --val_intervals 1 --is_recurrent --gru_filter_size 7 --input_len 17 --pre_thresh 0.4 --hm_weight 0.5 --const_v_over_occl --sup_invis --invis_hm_weight 20 --use_occl_len --occl_len_mult 5 --visibility --num_iter 1600 --num_epochs 5 --lr_step 4 --visibility_thresh_eval 0.1 --ltrb_amodal --only_ped --reuse_hm 7 | # test 8 | CUDA_VISIBLE_DEVICES=0 python test.py tracking --exp_id mot17_fulltrain --dataset mot --dataset_version test --track_thresh 0.4 --resume --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility_thresh_eval 0.1 --stream_test --only_ped --ltrb_amodal --visibility --max_age 32 --trainval 9 | cd .. 10 | -------------------------------------------------------------------------------- /experiments/mot17_half.sh: -------------------------------------------------------------------------------- 1 | # Initial model pre-trained on PD + CrowdHuman: https://tri-ml-public.s3.amazonaws.com/github/permatrack/crowdhuman.pth 2 | # Resulting model trained on MOT17 half train: https://tri-ml-public.s3.amazonaws.com/github/permatrack/mot_half.pth 3 | 4 | cd src 5 | # train 6 | python main.py tracking --exp_id mot17_half --occlusion_thresh 0.15 --visibility_thresh 0.05 --dataset joint --dataset1 mot --dataset2 pd_tracking --dataset_version 17halftrain --same_aug_pre --hm_disturb 0.0 --lost_disturb 0.0 --fp_disturb 0.0 --gpus 0,1,2,3,4,5,6,7 --batch_size 2 --load_model ../models/crowdhuman.pth --val_intervals 1 --is_recurrent --gru_filter_size 7 --input_len 17 --pre_thresh 0.4 --hm_weight 0.5 --const_v_over_occl --sup_invis --invis_hm_weight 20 --use_occl_len --occl_len_mult 5 --visibility --num_iter 1600 --num_epochs 5 --lr_step 4 --visibility_thresh_eval 0.1 --ltrb_amodal --only_ped --reuse_hm 7 | # test 8 | CUDA_VISIBLE_DEVICES=0 python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --track_thresh 0.4 --resume --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility_thresh_eval 0.1 --stream_test --only_ped --ltrb_amodal --visibility 9 | # test with T.R. 10 | CUDA_VISIBLE_DEVICES=0 python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --track_thresh 0.4 --resume --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility_thresh_eval 0.1 --stream_test --only_ped --ltrb_amodal --visibility --max_age 32 11 | # test with public detection 12 | CUDA_VISIBLE_DEVICES=0 python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --track_thresh 0.4 --resume --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility_thresh_eval 0.1 --stream_test --only_ped --ltrb_amodal --visibility --public_det --load_results ../data/mot17/results/val_half_det.json 13 | cd .. 14 | -------------------------------------------------------------------------------- /experiments/nuScenes_3Dtracking.sh: -------------------------------------------------------------------------------- 1 | TBD -------------------------------------------------------------------------------- /experiments/pd.sh: -------------------------------------------------------------------------------- 1 | # Initial model pre-trained on NuScenes3D: https://drive.google.com/open?id=1ZSG9swryMEfBJ104WH8CP7kcypCobFlU 2 | # Resulting model trained on PD: https://tri-ml-public.s3.amazonaws.com/github/permatrack/pd_17fr_21ep_vis.pth 3 | 4 | cd src 5 | # train 6 | python main.py tracking --exp_id pd_supinvis --occlusion_thresh 0.15 --visibility_thresh 0.05 --dataset pd_tracking --dataset_version val --same_aug_pre --hm_disturb 0.0 --lost_disturb 0.0 --fp_disturb 0.0 --gpus 0,1,2,3,4,5,6,7 --batch_size 2 --load_model ../models/nuScenes_3Ddetection_e140.pth --val_intervals 2 --is_recurrent --gru_filter_size 7 --input_len 17 --pre_thresh 0.4 --hm_weight 0.5 --num_epochs 21 --lr_step 7 --const_v_over_occl --sup_invis --invis_hm_weight 20 --use_occl_len --occl_len_mult 5 --num_iter 5000 --visibility --visibility_thresh_eval 0.2 7 | # test 8 | CUDA_VISIBLE_DEVICES=0 python test.py tracking --exp_id pd_supinvis --dataset pd_tracking --dataset_version val --track_thresh 0.4 --resume --is_recurrent --debug 4 --gru_filter_size 7 --num_gru_layers 1 --stream_test 9 | cd .. 10 | -------------------------------------------------------------------------------- /readme/GETTING_STARTED.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | This document provides tutorials to train and evaluate PermaTrack. Before getting started, make sure you have finished [installation](INSTALL.md) and [dataset setup](DATA.md). 4 | 5 | ## Benchmark evaluation 6 | 7 | ### PD 8 | 9 | To test our pretrained model on the validation set of PD, download the [model](https://tri-ml-public.s3.amazonaws.com/github/permatrack/pd_17fr_21ep_vis.pth), copy it to `$PermaTrack_ROOT/models/`, and run 10 | 11 | ~~~ 12 | cd $PermaTrack_ROOT/src 13 | python test.py tracking --exp_id pd --dataset pd_tracking --dataset_version val --track_thresh 0.4 --load_model ../models/pd_17fr_21ep_vis.pth --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --stream_test 14 | ~~~ 15 | 16 | This will give a Track mAP of `66.96` if set up correctly. You can append `--debug 4` to the above command to visualize the predictions. 17 | 18 | Please note that we are ignoring ground truth invisible object annotations in the validation set of PD (methods are not penalized for missing those boxes), but we are using them to filter out predictions which have a high overlap with a ground truth invisible box (to avoid conting such predictions as false positives; this was important for a fair evaluation before we introdiced the visibility head). As a result, the perfromance of our method with and without visiblity estimation described in the paper does not change much on PD. In the main experiments we did not use the visiblity estimation during evaluation on PD, but you can add it by appending `--visibility --visibility_thresh_eval 0.2` to the above command. The expected Track mAP is `66.78`. 19 | 20 | ### KITTI Tracking 21 | 22 | To test the tracking performance on the validation set of KITTI with our pretrained model, download the [model](https://tri-ml-public.s3.amazonaws.com/github/permatrack/kitti_half_pd_5ep.pth), copy it to `$PermaTrack_ROOT/models/`, and run 23 | 24 | ~~~ 25 | python test.py tracking --exp_id kitti_half --dataset kitti_tracking --dataset_version val_half --track_thresh 0.4 --load_model ../models/kitti_half_pd_5ep.pth --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility --visibility_thresh_eval 0.2 --stream_test 26 | ~~~ 27 | 28 | The expected Track mAP is `70.53`. Here Track AP evluation also takes into account ignore regions in KITTI annotations (detections falling into these regions are not counted as false positives). 29 | 30 | ### MOT17 31 | 32 | To test the tracking performance on the validation set of MOT17, download the [model](https://tri-ml-public.s3.amazonaws.com/github/permatrack/mot_half.pth), copy it to `$PermaTrack_ROOT/models/`, and run 33 | 34 | ~~~ 35 | python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --track_thresh 0.4 --load_model ../models/mot_half_13fr_5ep_occlasinvis.pth --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility_thresh_eval 0.1 --stream_test --only_ped --ltrb_amodal --visibility 36 | ~~~ 37 | 38 | The expected IDF1 is `68.2`. 39 | 40 | To test with Track Rebirth, run 41 | 42 | ~~~ 43 | python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --track_thresh 0.4 --load_model ../models/mot_half_13fr_5ep_occlasinvis.pth --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility_thresh_eval 0.1 --stream_test --only_ped --ltrb_amodal --visibility --max_age 32 44 | ~~~ 45 | 46 | The expected IDF1 is `71.9`. 47 | 48 | To test with public detections, run 49 | 50 | ~~~ 51 | python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --track_thresh 0.4 --load_model ../models/mot_half_13fr_5ep_occlasinvis.pth --is_recurrent --gru_filter_size 7 --num_gru_layers 1 --visibility_thresh_eval 0.1 --stream_test --only_ped --ltrb_amodal --visibility --public_det --load_results ../data/mot17/results/val_half_det.json 52 | ~~~ 53 | 54 | The expected IDF1 is `67.0`. 55 | 56 | ### nuScenes 57 | 58 | To test the tracking performance on the validation set of nuScenes, download the [model](https://tri-ml-public.s3.amazonaws.com/github/permatrack/nu_stage_3_17fr.pth), copy it to `$PermaTrack_ROOT/models/`, update `motmetrics` with 59 | 60 | ~~~ 61 | pip install motmetrics==1.1.3 62 | ~~~ 63 | 64 | then run 65 | 66 | ~~~ 67 | CUDA_VISIBLE_DEVICES=1 python test.py tracking,ddd --exp_id nuscenes_tracking --dataset nuscenes_tracking --track_thresh 0.1 --resume --is_recurrent --gru_filter_size 7 --stream_test --load_model ../models/nu_stage_3_17fr.pth --visibility 68 | ~~~ 69 | 70 | The expected AMOTA is `10.9`. 71 | 72 | ## Training 73 | We have packed all the training scripts in the [experiments](../experiments) folder. 74 | Each model is trained on 8 Tesla V100 GPUs with 32GB of memory. 75 | If the training is terminated before finishing, you can use the same command with `--resume` to resume training. It will found the latest model with the same `exp_id`. 76 | All experiments rely on existing pretrained models, we provide the links to the corresponding models directly in the training scripts. 77 | -------------------------------------------------------------------------------- /readme/INSTALL.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | 4 | 1. We provide a Docker file to re-create the environment which was used in our experiments under `$PermaTrack_ROOT/docker/Dockerfile`. You can either configure the environment yourself using the docker file as a guide or build it via: 5 | ~~~ 6 | cd $PermaTrack_ROOT 7 | make docker-build 8 | make docker-start-interactive 9 | ~~~ 10 | 11 | 2. The only step that has to be done manually is compiling of deformabel convolutions module. 12 | 13 | ~~~ 14 | cd $PermaTrack_ROOT/src/lib/model/networks/ 15 | git clone https://github.com/CharlesShang/DCNv2/ 16 | cd DCNv2 17 | ./make.sh 18 | ~~~ 19 | -------------------------------------------------------------------------------- /readme/method.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/readme/method.png -------------------------------------------------------------------------------- /src/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, 'lib') 12 | add_path(lib_path) 13 | -------------------------------------------------------------------------------- /src/lib/dataset/dataset_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import numpy as np 8 | import json 9 | import os 10 | 11 | from .datasets.mot import MOT 12 | from .datasets.crowdhuman import CrowdHuman 13 | from .datasets.kitti_tracking import KITTITracking 14 | from .datasets.pd_tracking import PDTracking 15 | from .datasets.custom_dataset import CustomDataset 16 | from .datasets.nuscenes_tracking import nuScenesTracking 17 | 18 | dataset_factory = { 19 | 'custom': CustomDataset, 20 | 'mot': MOT, 21 | 'crowdhuman': CrowdHuman, 22 | 'kitti_tracking': KITTITracking, 23 | 'pd_tracking': PDTracking, 24 | 'nuscenes_tracking': nuScenesTracking 25 | } 26 | 27 | 28 | def get_dataset(dataset): 29 | return dataset_factory[dataset] 30 | -------------------------------------------------------------------------------- /src/lib/dataset/datasets/crowdhuman.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import numpy as np 8 | import json 9 | import os 10 | 11 | from ..generic_dataset import GenericDataset 12 | from ..video_dataset import VideoDataset 13 | 14 | class CrowdHuman(VideoDataset): 15 | num_categories = 1 16 | num_joints = 17 17 | default_resolution = [512, 512] 18 | max_objs = 500 19 | class_name = ['person'] 20 | cat_ids = {1: 1} 21 | def __init__(self, opt, split): 22 | data_dir = os.path.join(opt.data_dir, 'crowdhuman') 23 | img_dir = os.path.join( 24 | data_dir, 'CrowdHuman_{}'.format(split), 'Images') 25 | ann_path = os.path.join(data_dir, 'annotations', 26 | '{}.json').format(split) 27 | 28 | print('==> initializing CityPersons {} data.'.format(split)) 29 | 30 | self.images = None 31 | # load image list and coco 32 | super(CrowdHuman, self).__init__(opt, split, ann_path, img_dir) 33 | 34 | self.num_samples = len(self.images) 35 | self.same_aug_pre = False 36 | self.stride = 1 37 | self.shift = 0.05 38 | self.box_size_thresh = [0] 39 | 40 | print('Loaded {} {} samples'.format(split, self.num_samples)) 41 | 42 | def _to_float(self, x): 43 | return float("{:.2f}".format(x)) 44 | 45 | def _save_results(self, records, fpath): 46 | with open(fpath,'w') as fid: 47 | for record in records: 48 | line = json.dumps(record)+'\n' 49 | fid.write(line) 50 | return fpath 51 | 52 | def convert_eval_format(self, all_bboxes): 53 | detections = [] 54 | person_id = 1 55 | for image_id in all_bboxes: 56 | if type(all_bboxes[image_id]) != type({}): 57 | # newest format 58 | dtboxes = [] 59 | for j in range(len(all_bboxes[image_id])): 60 | item = all_bboxes[image_id][j] 61 | if item['class'] != person_id: 62 | continue 63 | bbox = item['bbox'] 64 | bbox[2] -= bbox[0] 65 | bbox[3] -= bbox[1] 66 | bbox_out = list(map(self._to_float, bbox[0:4])) 67 | detection = { 68 | "tag": 1, 69 | "box": bbox_out, 70 | "score": float("{:.2f}".format(item['score'])) 71 | } 72 | dtboxes.append(detection) 73 | img_info = self.coco.loadImgs(ids=[image_id])[0] 74 | file_name = img_info['file_name'] 75 | detections.append({'ID': file_name[:-4], 'dtboxes': dtboxes}) 76 | return detections 77 | 78 | def __len__(self): 79 | return self.num_samples 80 | 81 | def save_results(self, results, save_dir): 82 | self._save_results(self.convert_eval_format(results), 83 | '{}/results_crowdhuman.odgt'.format(save_dir)) 84 | def run_eval(self, results, save_dir, write_to_file=False, dataset_version="withcrowd1000_visible_nocamerafilter_val"): 85 | self.save_results(results, save_dir) 86 | # try: 87 | # os.system('python tools/crowdhuman_eval/demo.py ' + \ 88 | # '../data/crowdhuman/annotation_val.odgt ' + \ 89 | # '{}/results_crowdhuman.odgt'.format(save_dir)) 90 | # except: 91 | # print('Crowdhuman evaluation not setup!') 92 | -------------------------------------------------------------------------------- /src/lib/dataset/datasets/custom_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from ..generic_dataset import GenericDataset 6 | 7 | class CustomDataset(GenericDataset): 8 | num_categories = 1 9 | default_resolution = [-1, -1] 10 | class_name = [''] 11 | max_objs = 128 12 | cat_ids = {1: 1} 13 | def __init__(self, opt, split): 14 | assert (opt.custom_dataset_img_path != '') and \ 15 | (opt.custom_dataset_ann_path != '') and \ 16 | (opt.num_classes != -1) and \ 17 | (opt.input_h != -1) and (opt.input_w != -1), \ 18 | 'The following arguments must be specified for custom datasets: ' + \ 19 | 'custom_dataset_img_path, custom_dataset_ann_path, num_classes, ' + \ 20 | 'input_h, input_w.' 21 | img_dir = opt.custom_dataset_img_path 22 | ann_path = opt.custom_dataset_ann_path 23 | self.num_categories = opt.num_classes 24 | self.class_name = ['' for _ in range(self.num_categories)] 25 | self.default_resolution = [opt.input_h, opt.input_w] 26 | self.cat_ids = {i: i for i in range(1, self.num_categories + 1)} 27 | 28 | self.images = None 29 | # load image list and coco 30 | super().__init__(opt, split, ann_path, img_dir) 31 | 32 | self.num_samples = len(self.images) 33 | print('Loaded Custom dataset {} samples'.format(self.num_samples)) 34 | 35 | def __len__(self): 36 | return self.num_samples 37 | 38 | def run_eval(self, results, save_dir): 39 | pass 40 | -------------------------------------------------------------------------------- /src/lib/dataset/datasets/mot.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import numpy as np 8 | import json 9 | import os 10 | from collections import defaultdict 11 | from ..video_dataset import VideoDataset 12 | 13 | class MOT(VideoDataset): 14 | num_categories = 1 15 | default_resolution = [544, 960] 16 | class_name = [''] 17 | max_objs = 256 18 | cat_ids = {1: 1, -1: -1} 19 | def __init__(self, opt, split): 20 | self.dataset_version = opt.dataset_version 21 | self.year = 17 22 | print('Using MOT {} {}'.format(self.year, self.dataset_version)) 23 | data_dir = os.path.join(opt.data_dir, 'mot{}'.format(self.year)) 24 | 25 | if split == 'val': 26 | ann_file = '{}.json'.format('val_half') 27 | elif split == 'test': 28 | ann_file = '{}.json'.format('test') 29 | elif self.dataset_version == '17trainval': 30 | ann_file = '{}.json'.format('train_interp') 31 | else: 32 | ann_file = '{}.json'.format('train_half_interp') 33 | 34 | img_dir = os.path.join(data_dir, '{}'.format( 35 | 'test' if 'test' in self.dataset_version else 'train')) 36 | 37 | print('ann_file', ann_file) 38 | ann_path = os.path.join(data_dir, 'annotations', ann_file) 39 | 40 | self.images = None 41 | # load image list and coco 42 | super(MOT, self).__init__(opt, split, ann_path, img_dir) 43 | 44 | self.num_samples = len(self.images) 45 | self.box_size_thresh = [0] 46 | print('Loaded MOT {} {} {} samples'.format( 47 | self.dataset_version, split, self.num_samples)) 48 | 49 | def _to_float(self, x): 50 | return float("{:.2f}".format(x)) 51 | 52 | def __len__(self): 53 | return self.num_samples 54 | 55 | def save_results(self, results, save_dir): 56 | results_dir = os.path.join(save_dir, 'results_mot{}'.format(self.dataset_version)) 57 | if not os.path.exists(results_dir): 58 | os.mkdir(results_dir) 59 | for video in self.coco.dataset['videos']: 60 | video_id = video['id'] 61 | file_name = video['file_name'] 62 | out_path = os.path.join(results_dir, '{}.txt'.format(file_name)) 63 | f = open(out_path, 'w') 64 | images = self.video_to_images[video_id] 65 | tracks = defaultdict(list) 66 | for image_info in images: 67 | if not (image_info['id'] in results): 68 | continue 69 | result = results[image_info['id']] 70 | frame_id = image_info['frame_id'] 71 | for item in result: 72 | if item['age'] != 1: 73 | continue 74 | if 'visibility' in item and not item['visibility']: 75 | continue 76 | if not ('tracking_id' in item): 77 | item['tracking_id'] = np.random.randint(100000) 78 | if item['age'] != 1: 79 | continue 80 | if 'visibility' in item and not item['visibility']: 81 | continue 82 | tracking_id = item['tracking_id'] 83 | bbox = item['bbox'] 84 | bbox = [bbox[0], bbox[1], bbox[2], bbox[3]] 85 | tracks[tracking_id].append([frame_id] + bbox) 86 | rename_track_id = 0 87 | for track_id in sorted(tracks): 88 | rename_track_id += 1 89 | for t in tracks[track_id]: 90 | f.write('{},{},{:.2f},{:.2f},{:.2f},{:.2f},-1,-1,-1,-1\n'.format( 91 | t[0], rename_track_id, t[1], t[2], t[3]-t[1], t[4]-t[2])) 92 | f.close() 93 | 94 | def run_eval(self, results, save_dir, write_to_file=False, dataset_version=""): 95 | self.save_results(results, save_dir) 96 | gt_type_str = '{}'.format( 97 | '_train_half' if '17halftrain' in self.opt.dataset_version \ 98 | else '_val_half' if '17halfval' in self.opt.dataset_version \ 99 | else '') 100 | gt_type_str = '--gt_type {}'.format(gt_type_str) if gt_type_str != '' else '' 101 | print('python tools/eval_motchallenge.py ' + \ 102 | '../data/mot{}/{}/ '.format(self.year, 'train') + \ 103 | '{}/results_mot{}/ '.format(save_dir, self.dataset_version) + \ 104 | gt_type_str + ' --eval_official') 105 | os.system('python tools/eval_motchallenge.py ' + \ 106 | '../data/mot{}/{}/ '.format(self.year, 'train') + \ 107 | '{}/results_mot{}/ '.format(save_dir, self.dataset_version) + \ 108 | gt_type_str + ' --eval_official') 109 | -------------------------------------------------------------------------------- /src/lib/dataset/datasets/pd_tracking.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | import numpy as np 7 | import torch 8 | import json 9 | import cv2 10 | import os 11 | import math 12 | 13 | from ..video_dataset import VideoDataset 14 | 15 | class PDTracking(VideoDataset): 16 | num_categories = 5 17 | dataset_folder = 'pd' 18 | default_resolution = [384, 960] 19 | class_name = ['Pedestrian', 'Car', 'Cyclist', 'Caravan/RV', 'Truck'] 20 | # negative id is for "not as negative sample for abs(id)". 21 | # 0 for ignore losses for all categories in the bounding box region 22 | # ['Pedestrian', 'Car', 'Bicyclist', 'Bus', 'Caravan/RV', 'OtherMovable', 23 | # 'Motorcycle', 'Motorcyclist', 'OtherRider', 'Train', 'Truck', 'Dontcare'] 24 | cat_ids = {1:1, 2:2, 3:3, 4:-9999, 5:4, 6:-2, 7:-9999, 8:-1, 9:-1, 10:-9999, 11:5} 25 | max_objs = 500 26 | def __init__(self, opt, split, rank=None): 27 | data_dir = os.path.join(opt.data_dir, self.dataset_folder) 28 | split_ = 'train' if opt.dataset_version != 'test' else 'test' #'test' 29 | img_dir = data_dir 30 | if split == 'train': 31 | ann_file_ = "train" 32 | else: 33 | ann_file_ = 'val' 34 | ann_path = os.path.join( 35 | data_dir, 'annotations', 'tracking_{}.json'.format( 36 | ann_file_)) 37 | self.images = None 38 | super(PDTracking, self).__init__(opt, split, ann_path, img_dir) 39 | 40 | self.box_size_thresh = [300, 500, 300, 500, 500] 41 | 42 | if opt.only_ped: 43 | self.num_categories = 1 44 | self.class_name = ['person'] 45 | self.cat_ids = {1:1, 2:-9999, 3:-1, 4:-9999, 5:-9999, 6:-9999, 7:-9999, 8:-1, 9:-1, 10:-9999, 11:-9999} 46 | self.box_size_thresh = [300] 47 | 48 | if opt.nu: 49 | self.num_categories = 8 50 | self.class_name = ['Car', 'Truck', 'Bus', 'Trailer', 'construction_vehicle', 'Pedestrian', 'Motorcycle', 'Bicycle'] 51 | self.cat_ids = {1:6, 2:1, 3:0, 4:3, 5:1, 6:-1, 7:-7, 8:0, 9:0, 10:-9999, 11:2, 12:5, 13:-8} 52 | self.box_size_thresh = [500, 500, 500, 500, 500, 300, 500, 500] 53 | 54 | self.alpha_in_degree = False 55 | self.depth_scale = 1 56 | self.dep_mask = 0 57 | self.dim_mask = 1 58 | self.rot_mask = 0 59 | self.amodel_offset_mask = 0 60 | self.ignore_amodal = True 61 | self.num_samples = len(self.images) 62 | self.exp_id = opt.exp_id 63 | if opt.const_v_over_occl: 64 | self.const_v_over_occl = True 65 | 66 | print('Loaded {} {} samples'.format(split, self.num_samples)) 67 | 68 | def save_results_ioueval(self, results, save_dir): 69 | formattted_results = [] 70 | if not os.path.exists(save_dir): 71 | os.mkdir(save_dir) 72 | 73 | for video in self.coco.dataset['videos']: 74 | video_id = video['id'] 75 | images = self.video_to_images[video_id] 76 | 77 | for image_info in images: 78 | img_id = image_info['id'] 79 | if not (img_id in results): 80 | continue 81 | frame_id = image_info['frame_id'] 82 | for i in range(len(results[img_id])): 83 | item = results[img_id][i] 84 | if item['age'] != 1: 85 | continue 86 | if 'visibility' in item and not item['visibility']: 87 | continue 88 | category_id = item['class'] 89 | track_id = item['tracking_id'] if 'tracking_id' in item else -1 90 | bbox = [item['bbox'][0].item(), item['bbox'][1].item(), item['bbox'][2].item() - item['bbox'][0].item(), item['bbox'][3].item() - item['bbox'][1].item()] 91 | 92 | entry = {'video_id': video_id, 'image_id': img_id, 'category_id': category_id, 'track_id': track_id, 'bbox': bbox, 'score': item['score'].item()} 93 | formattted_results.append(entry) 94 | 95 | print(save_dir + '/iou_eval.json') 96 | json.dump(formattted_results, open(save_dir + '/iou_eval.json', 'w')) 97 | 98 | def run_eval(self, results, save_dir, write_to_file=False, dataset_version="val"): 99 | self.save_results_ioueval(results, save_dir) 100 | os.chdir("../tao") 101 | command = 'python scripts/evaluation/evaluate.py ' + \ 102 | '../data/%s/annotations/tracking_%s_tao.json ' % (self.dataset_folder, dataset_version) + \ 103 | '{}/iou_eval.json'.format(save_dir) + ' --config-updates CATEGORIES 1,2' 104 | 105 | if write_to_file: 106 | print("Writing to file") 107 | command += ' > ../exp/tracking/{}/eval_out.txt'.format(self.exp_id) 108 | os.system(command) 109 | 110 | def __len__(self): 111 | return self.num_samples 112 | 113 | def _to_float(self, x): 114 | return float("{:.2f}".format(x)) 115 | -------------------------------------------------------------------------------- /src/lib/dataset/joint_loader.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | class JointIterator: 4 | def __init__(self, iter1, iter2, dataset1, dataset2): 5 | self.iter1 = iter1 6 | self.iter2 = iter2 7 | self.num_steps = [5, 5] 8 | self.loader_ind = 0 9 | self.counter = self.num_steps[self.loader_ind] 10 | self.dataset1 = dataset1 11 | self.dataset2 = dataset2 12 | 13 | def __next__(self): 14 | if self.counter == 0: 15 | ind = random.randint(0, 1) 16 | self.loader_ind = ind 17 | self.counter = self.num_steps[ind] 18 | 19 | if self.loader_ind == 0: 20 | result = next(self.iter1, None) 21 | if result is None: 22 | self.iter1 = iter(self.dataset1) 23 | result = next(self.iter1, None) 24 | if result is None: 25 | raise StopIteration 26 | else: 27 | result = next(self.iter2, None) 28 | if result is None: 29 | self.iter2 = iter(self.dataset2) 30 | result = next(self.iter2, None) 31 | if result is None: 32 | raise StopIteration 33 | 34 | self.counter -= 1 35 | 36 | return result 37 | 38 | class JointLoader: 39 | 40 | def __init__(self, dataset1, dataset2): 41 | self.dataset1 = dataset1 42 | self.dataset2 = dataset2 43 | self.dataset = dataset1.dataset 44 | 45 | def __iter__(self): 46 | return JointIterator(iter(self.dataset1), iter(self.dataset2), self.dataset1, self.dataset2) 47 | 48 | def __len__(self): 49 | return len(self.dataset1) + len(self.dataset2) 50 | -------------------------------------------------------------------------------- /src/lib/external/.gitignore: -------------------------------------------------------------------------------- 1 | bbox.c 2 | bbox.cpython-35m-x86_64-linux-gnu.so 3 | bbox.cpython-36m-x86_64-linux-gnu.so 4 | 5 | nms.c 6 | nms.cpython-35m-x86_64-linux-gnu.so 7 | nms.cpython-36m-x86_64-linux-gnu.so 8 | -------------------------------------------------------------------------------- /src/lib/external/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | -------------------------------------------------------------------------------- /src/lib/external/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/src/lib/external/__init__.py -------------------------------------------------------------------------------- /src/lib/external/setup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from distutils.core import setup 3 | from distutils.extension import Extension 4 | from Cython.Build import cythonize 5 | 6 | extensions = [ 7 | Extension( 8 | "nms", 9 | ["nms.pyx"], 10 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"] 11 | ) 12 | ] 13 | 14 | setup( 15 | name="coco", 16 | ext_modules=cythonize(extensions), 17 | include_dirs=[numpy.get_include()] 18 | ) 19 | -------------------------------------------------------------------------------- /src/lib/logger.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 6 | import os 7 | import time 8 | import sys 9 | import torch 10 | import subprocess 11 | USE_TENSORBOARD = True 12 | try: 13 | import tensorboardX 14 | print('Using tensorboardX') 15 | except: 16 | USE_TENSORBOARD = False 17 | 18 | class Logger(object): 19 | def __init__(self, opt): 20 | """Create a summary writer logging to log_dir.""" 21 | if not os.path.exists(opt.save_dir): 22 | os.makedirs(opt.save_dir) 23 | if not os.path.exists(opt.debug_dir): 24 | os.makedirs(opt.debug_dir) 25 | 26 | time_str = time.strftime('%Y-%m-%d-%H-%M') 27 | 28 | args = dict((name, getattr(opt, name)) for name in dir(opt) 29 | if not name.startswith('_')) 30 | file_name = os.path.join(opt.save_dir, 'opt.txt') 31 | with open(file_name, 'wt') as opt_file: 32 | opt_file.write('==> torch version: {}\n'.format(torch.__version__)) 33 | opt_file.write('==> cudnn version: {}\n'.format( 34 | torch.backends.cudnn.version())) 35 | opt_file.write('==> Cmd:\n') 36 | opt_file.write(str(sys.argv)) 37 | opt_file.write('\n==> Opt:\n') 38 | for k, v in sorted(args.items()): 39 | opt_file.write(' %s: %s\n' % (str(k), str(v))) 40 | 41 | log_dir = opt.save_dir + '/logs_{}'.format(time_str) 42 | if USE_TENSORBOARD: 43 | self.writer = tensorboardX.SummaryWriter(log_dir=log_dir) 44 | else: 45 | if not os.path.exists(os.path.dirname(log_dir)): 46 | os.mkdir(os.path.dirname(log_dir)) 47 | if not os.path.exists(log_dir): 48 | os.mkdir(log_dir) 49 | self.log = open(log_dir + '/log.txt', 'w') 50 | try: 51 | os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir)) 52 | except: 53 | pass 54 | self.start_line = True 55 | 56 | def write(self, txt): 57 | if self.start_line: 58 | time_str = time.strftime('%Y-%m-%d-%H-%M') 59 | self.log.write('{}: {}'.format(time_str, txt)) 60 | else: 61 | self.log.write(txt) 62 | self.start_line = False 63 | if '\n' in txt: 64 | self.start_line = True 65 | self.log.flush() 66 | 67 | def close(self): 68 | self.log.close() 69 | 70 | def scalar_summary(self, tag, value, step): 71 | """Log a scalar variable.""" 72 | if USE_TENSORBOARD: 73 | self.writer.add_scalar(tag, value, step) 74 | -------------------------------------------------------------------------------- /src/lib/model/data_parallel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules import Module 3 | from torch.nn.parallel.scatter_gather import gather 4 | from torch.nn.parallel.replicate import replicate 5 | from torch.nn.parallel.parallel_apply import parallel_apply 6 | 7 | 8 | from .scatter_gather import scatter_kwargs 9 | 10 | class _DataParallel(Module): 11 | r"""Implements data parallelism at the module level. 12 | 13 | This container parallelizes the application of the given module by 14 | splitting the input across the specified devices by chunking in the batch 15 | dimension. In the forward pass, the module is replicated on each device, 16 | and each replica handles a portion of the input. During the backwards 17 | pass, gradients from each replica are summed into the original module. 18 | 19 | The batch size should be larger than the number of GPUs used. It should 20 | also be an integer multiple of the number of GPUs so that each chunk is the 21 | same size (so that each GPU processes the same number of samples). 22 | 23 | See also: :ref:`cuda-nn-dataparallel-instead` 24 | 25 | Arbitrary positional and keyword inputs are allowed to be passed into 26 | DataParallel EXCEPT Tensors. All variables will be scattered on dim 27 | specified (default 0). Primitive types will be broadcasted, but all 28 | other types will be a shallow copy and can be corrupted if written to in 29 | the model's forward pass. 30 | 31 | Args: 32 | module: module to be parallelized 33 | device_ids: CUDA devices (default: all devices) 34 | output_device: device location of output (default: device_ids[0]) 35 | 36 | Example:: 37 | 38 | >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2]) 39 | >>> output = net(input_var) 40 | """ 41 | 42 | # TODO: update notes/cuda.rst when this class handles 8+ GPUs well 43 | 44 | def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 45 | super(_DataParallel, self).__init__() 46 | 47 | if not torch.cuda.is_available(): 48 | self.module = module 49 | self.device_ids = [] 50 | return 51 | 52 | if device_ids is None: 53 | device_ids = list(range(torch.cuda.device_count())) 54 | if output_device is None: 55 | output_device = device_ids[0] 56 | self.dim = dim 57 | self.module = module 58 | self.device_ids = device_ids 59 | self.chunk_sizes = chunk_sizes 60 | self.output_device = output_device 61 | if len(self.device_ids) == 1: 62 | self.module.cuda(device_ids[0]) 63 | 64 | def forward(self, *inputs, **kwargs): 65 | if not self.device_ids: 66 | return self.module(*inputs, **kwargs) 67 | inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes) 68 | if len(self.device_ids) == 1: 69 | return self.module(*inputs[0], **kwargs[0]) 70 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) 71 | outputs = self.parallel_apply(replicas, inputs, kwargs) 72 | return self.gather(outputs, self.output_device) 73 | 74 | def replicate(self, module, device_ids): 75 | return replicate(module, device_ids) 76 | 77 | def scatter(self, inputs, kwargs, device_ids, chunk_sizes): 78 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes) 79 | 80 | def parallel_apply(self, replicas, inputs, kwargs): 81 | return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) 82 | 83 | def gather(self, outputs, output_device): 84 | return gather(outputs, output_device, dim=self.dim) 85 | 86 | 87 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None): 88 | r"""Evaluates module(input) in parallel across the GPUs given in device_ids. 89 | 90 | This is the functional version of the DataParallel module. 91 | 92 | Args: 93 | module: the module to evaluate in parallel 94 | inputs: inputs to the module 95 | device_ids: GPU ids on which to replicate module 96 | output_device: GPU location of the output Use -1 to indicate the CPU. 97 | (default: device_ids[0]) 98 | Returns: 99 | a Variable containing the result of module(input) located on 100 | output_device 101 | """ 102 | if not isinstance(inputs, tuple): 103 | inputs = (inputs,) 104 | 105 | if device_ids is None: 106 | device_ids = list(range(torch.cuda.device_count())) 107 | 108 | if output_device is None: 109 | output_device = device_ids[0] 110 | 111 | inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim) 112 | if len(device_ids) == 1: 113 | return module(*inputs[0], **module_kwargs[0]) 114 | used_device_ids = device_ids[:len(inputs)] 115 | replicas = replicate(module, used_device_ids) 116 | outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) 117 | return gather(outputs, output_device, dim) 118 | 119 | def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 120 | if chunk_sizes is None: 121 | return torch.nn.DataParallel(module, device_ids, output_device, dim) 122 | standard_size = True 123 | for i in range(1, len(chunk_sizes)): 124 | if chunk_sizes[i] != chunk_sizes[0]: 125 | standard_size = False 126 | if standard_size: 127 | return torch.nn.DataParallel(module, device_ids, output_device, dim) 128 | return _DataParallel(module, device_ids, output_device, dim, chunk_sizes) -------------------------------------------------------------------------------- /src/lib/model/matcher.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from scipy.optimize import linear_sum_assignment 3 | from torch import nn 4 | 5 | from torchvision.ops.boxes import box_area 6 | 7 | 8 | # modified from torchvision to also return the union 9 | def box_iou(boxes1, boxes2): 10 | area1 = box_area(boxes1) 11 | area2 = box_area(boxes2) 12 | 13 | lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2] 14 | rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2] 15 | 16 | wh = (rb - lt).clamp(min=0) # [N,M,2] 17 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] 18 | 19 | union = area1[:, None] + area2 - inter 20 | 21 | iou = inter / union 22 | return iou, union 23 | 24 | def box_cxcywh_to_xyxy(x): 25 | x_c, y_c, w, h = x.unbind(-1) 26 | b = [(x_c - 0.5 * w), (y_c - 0.5 * h), 27 | (x_c + 0.5 * w), (y_c + 0.5 * h)] 28 | return torch.stack(b, dim=-1) 29 | 30 | def generalized_box_iou(boxes1, boxes2): 31 | """ 32 | Generalized IoU from https://giou.stanford.edu/ 33 | The boxes should be in [x0, y0, x1, y1] format 34 | Returns a [N, M] pairwise matrix, where N = len(boxes1) 35 | and M = len(boxes2) 36 | """ 37 | # degenerate boxes gives inf / nan results 38 | # so do an early check 39 | assert (boxes1[:, 2:] >= boxes1[:, :2]).all() 40 | assert (boxes2[:, 2:] >= boxes2[:, :2]).all() 41 | iou, union = box_iou(boxes1, boxes2) 42 | 43 | lt = torch.min(boxes1[:, None, :2], boxes2[:, :2]) 44 | rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) 45 | 46 | wh = (rb - lt).clamp(min=0) # [N,M,2] 47 | area = wh[:, :, 0] * wh[:, :, 1] 48 | 49 | return iou - (area - union) / area 50 | 51 | 52 | class HungarianMatcher(nn.Module): 53 | """This class computes an assignment between the targets and the predictions of the network 54 | For efficiency reasons, the targets don't include the no_object. Because of this, in general, 55 | there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions, 56 | while the others are un-matched (and thus treated as non-objects). 57 | """ 58 | 59 | def __init__(self, cost_class: float = 1, cost_bbox: float = 1, cost_giou: float = 1): 60 | """Creates the matcher 61 | Params: 62 | cost_class: This is the relative weight of the classification error in the matching cost 63 | cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost 64 | cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost 65 | """ 66 | super().__init__() 67 | self.cost_class = cost_class 68 | self.cost_bbox = cost_bbox 69 | self.cost_giou = cost_giou 70 | assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0, "all costs cant be 0" 71 | 72 | @torch.no_grad() 73 | def forward(self, outputs, targets): 74 | """ Performs the matching 75 | Params: 76 | outputs: This is a dict that contains at least these entries: 77 | "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits 78 | "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates 79 | targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing: 80 | "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth 81 | objects in the target) containing the class labels 82 | "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates 83 | Returns: 84 | A list of size batch_size, containing tuples of (index_i, index_j) where: 85 | - index_i is the indices of the selected predictions (in order) 86 | - index_j is the indices of the corresponding selected targets (in order) 87 | For each batch element, it holds: 88 | len(index_i) = len(index_j) = min(num_queries, num_target_boxes) 89 | """ 90 | bs, num_queries = outputs["pred_logits"].shape[:2] 91 | 92 | # We flatten to compute the cost matrices in a batch 93 | out_prob = outputs["pred_logits"].flatten(0, 1).softmax(-1) # [batch_size * num_queries, num_classes] 94 | out_bbox = outputs["pred_boxes"].flatten(0, 1) # [batch_size * num_queries, 4] 95 | 96 | # Also concat the target labels and boxes 97 | tgt_ids = torch.cat([v["labels"] for v in targets]) 98 | tgt_bbox = torch.cat([v["boxes"] for v in targets]) 99 | 100 | # Compute the classification cost. Contrary to the loss, we don't use the NLL, 101 | # but approximate it in 1 - proba[target class]. 102 | # The 1 is a constant that doesn't change the matching, it can be ommitted. 103 | cost_class = -out_prob[:, tgt_ids] 104 | 105 | # Compute the L1 cost between boxes 106 | cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1) 107 | 108 | # Compute the giou cost betwen boxes 109 | cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox)) 110 | 111 | # Final cost matrix 112 | C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou 113 | C = C.view(bs, num_queries, -1).cpu() 114 | 115 | sizes = [len(v["boxes"]) for v in targets] 116 | indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))] 117 | return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices] -------------------------------------------------------------------------------- /src/lib/model/model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torchvision.models as models 6 | import torch 7 | import torch.nn as nn 8 | import os 9 | import copy 10 | 11 | from .networks.dla import DLASeg 12 | from .networks.resdcn import PoseResDCN 13 | from .networks.resnet import PoseResNet 14 | from .networks.dlav0 import DLASegv0 15 | from .networks.generic_network import GenericNetwork 16 | 17 | _network_factory = { 18 | 'resdcn': PoseResDCN, 19 | 'dla': DLASeg, 20 | 'res': PoseResNet, 21 | 'dlav0': DLASegv0, 22 | 'generic': GenericNetwork 23 | } 24 | 25 | def create_model(arch, head, head_conv, opt=None): 26 | num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0 27 | arch = arch[:arch.find('_')] if '_' in arch else arch 28 | model_class = _network_factory[arch] 29 | model = model_class(num_layers, heads=head, head_convs=head_conv, opt=opt) 30 | return model 31 | 32 | 33 | def load_model(model, model_path, opt, optimizer=None): 34 | start_epoch = 0 35 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) 36 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch'])) 37 | state_dict_ = checkpoint['state_dict'] 38 | state_dict = {} 39 | 40 | # convert data_parallal to model 41 | for k in state_dict_: 42 | if k.startswith('module') and not k.startswith('module_list'): 43 | state_dict[k[7:]] = state_dict_[k] 44 | else: 45 | state_dict[k] = state_dict_[k] 46 | model_state_dict = model.state_dict() 47 | 48 | # check loaded parameters and created model parameters 49 | for k in state_dict: 50 | if k in model_state_dict: 51 | if (state_dict[k].shape != model_state_dict[k].shape) or \ 52 | (opt.reset_hm and k.startswith('hm') and (state_dict[k].shape[0] in [80, 1])): 53 | if opt.reuse_hm: 54 | print('Reusing parameter {}, required shape{}, '\ 55 | 'loaded shape{}.'.format( 56 | k, model_state_dict[k].shape, state_dict[k].shape)) 57 | if state_dict[k].shape[0] < state_dict[k].shape[0]: 58 | model_state_dict[k][:state_dict[k].shape[0]] = state_dict[k] 59 | else: 60 | model_state_dict[k] = state_dict[k][:model_state_dict[k].shape[0]] 61 | state_dict[k] = model_state_dict[k] 62 | else: 63 | print('Skip loading parameter {}, required shape{}, '\ 64 | 'loaded shape{}.'.format( 65 | k, model_state_dict[k].shape, state_dict[k].shape)) 66 | state_dict[k] = model_state_dict[k] 67 | else: 68 | print('Drop parameter {}.'.format(k)) 69 | 70 | for k in model_state_dict: 71 | if not (k in state_dict): 72 | print('No param {}.'.format(k)) 73 | state_dict[k] = model_state_dict[k] 74 | model.load_state_dict(state_dict, strict=False) 75 | 76 | # resume optimizer parameters 77 | if optimizer is not None and opt.resume: 78 | if 'optimizer' in checkpoint: 79 | # optimizer.load_state_dict(checkpoint['optimizer']) 80 | start_epoch = checkpoint['epoch'] 81 | start_lr = opt.lr 82 | for step in opt.lr_step: 83 | if start_epoch >= step: 84 | start_lr *= 0.1 85 | for param_group in optimizer.param_groups: 86 | param_group['lr'] = start_lr 87 | print('Resumed optimizer with start lr', start_lr) 88 | else: 89 | print('No optimizer parameters in checkpoint.') 90 | if optimizer is not None: 91 | return model, optimizer, start_epoch 92 | else: 93 | return model 94 | 95 | def save_model(path, epoch, model, optimizer=None): 96 | if isinstance(model, torch.nn.DataParallel): 97 | state_dict = model.module.state_dict() 98 | else: 99 | state_dict = model.state_dict() 100 | data = {'epoch': epoch, 101 | 'state_dict': state_dict} 102 | if not (optimizer is None): 103 | data['optimizer'] = optimizer.state_dict() 104 | torch.save(data, path) 105 | 106 | -------------------------------------------------------------------------------- /src/lib/model/networks/generic_network.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | from torch import nn 7 | from .backbones.dla import dla34 8 | from .backbones.resnet import Resnet 9 | from .backbones.mobilenet import MobileNetV2 10 | from .necks.dlaup import DLASeg 11 | from .necks.msraup import MSRAUp 12 | 13 | backbone_factory = { 14 | 'dla34': dla34, 15 | 'resnet': Resnet, 16 | 'mobilenet': MobileNetV2 17 | } 18 | 19 | neck_factory = { 20 | 'dlaup': DLASeg, 21 | 'msraup': MSRAUp 22 | } 23 | 24 | def fill_fc_weights(layers): 25 | for m in layers.modules(): 26 | if isinstance(m, nn.Conv2d): 27 | if m.bias is not None: 28 | nn.init.constant_(m.bias, 0) 29 | 30 | class GenericNetwork(nn.Module): 31 | def __init__(self, num_layers, heads, head_convs, num_stacks=1, opt=None): 32 | super(GenericNetwork, self).__init__() 33 | print('Using generic model with backbone {} and neck {}'.format( 34 | opt.backbone, opt.neck)) 35 | # assert (not opt.pre_hm) and (not opt.pre_img) 36 | if opt is not None and opt.head_kernel != 3: 37 | print('Using head kernel:', opt.head_kernel) 38 | head_kernel = opt.head_kernel 39 | else: 40 | head_kernel = 3 41 | self.opt = opt 42 | self.backbone = backbone_factory[opt.backbone](opt=opt) 43 | channels = self.backbone.channels 44 | self.neck = neck_factory[opt.neck](opt=opt, channels=channels) 45 | last_channel = self.neck.out_channel 46 | self.num_stacks = num_stacks 47 | self.heads = heads 48 | for head in self.heads: 49 | classes = self.heads[head] 50 | head_conv = head_convs[head] 51 | if len(head_conv) > 0: 52 | out = nn.Conv2d(head_conv[-1], classes, 53 | kernel_size=1, stride=1, padding=0, bias=True) 54 | conv = nn.Conv2d(last_channel, head_conv[0], 55 | kernel_size=head_kernel, 56 | padding=head_kernel // 2, bias=True) 57 | convs = [conv] 58 | for k in range(1, len(head_conv)): 59 | convs.append(nn.Conv2d(head_conv[k - 1], head_conv[k], 60 | kernel_size=1, bias=True)) 61 | if len(convs) == 1: 62 | fc = nn.Sequential(conv, nn.ReLU(inplace=True), out) 63 | elif len(convs) == 2: 64 | fc = nn.Sequential( 65 | convs[0], nn.ReLU(inplace=True), 66 | convs[1], nn.ReLU(inplace=True), out) 67 | elif len(convs) == 3: 68 | fc = nn.Sequential( 69 | convs[0], nn.ReLU(inplace=True), 70 | convs[1], nn.ReLU(inplace=True), 71 | convs[2], nn.ReLU(inplace=True), out) 72 | elif len(convs) == 4: 73 | fc = nn.Sequential( 74 | convs[0], nn.ReLU(inplace=True), 75 | convs[1], nn.ReLU(inplace=True), 76 | convs[2], nn.ReLU(inplace=True), 77 | convs[3], nn.ReLU(inplace=True), out) 78 | if 'hm' in head: 79 | fc[-1].bias.data.fill_(opt.prior_bias) 80 | else: 81 | fill_fc_weights(fc) 82 | else: 83 | fc = nn.Conv2d(last_channel, classes, 84 | kernel_size=1, stride=1, padding=0, bias=True) 85 | if 'hm' in head: 86 | fc.bias.data.fill_(opt.prior_bias) 87 | else: 88 | fill_fc_weights(fc) 89 | self.__setattr__(head, fc) 90 | 91 | def forward(self, x, pre_img=None, pre_hm=None): 92 | y = self.backbone(x, pre_img, pre_hm) 93 | feats = self.neck(y) 94 | out = [] 95 | if self.opt.model_output_list: 96 | for s in range(self.num_stacks): 97 | z = [] 98 | for head in sorted(self.heads): 99 | z.append(self.__getattr__(head)(feats[s])) 100 | out.append(z) 101 | else: 102 | for s in range(self.num_stacks): 103 | z = {} 104 | for head in self.heads: 105 | z[head] = self.__getattr__(head)(feats[s]) 106 | out.append(z) 107 | return out 108 | -------------------------------------------------------------------------------- /src/lib/model/networks/necks/msraup.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # Modified by Dequan Wang and Xingyi Zhou 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os 13 | import math 14 | import logging 15 | 16 | import torch 17 | import torch.nn as nn 18 | 19 | try: 20 | from ..DCNv2.dcn_v2 import DCN 21 | except: 22 | print('import DCN failed') 23 | DCN = None 24 | 25 | 26 | BN_MOMENTUM = 0.1 27 | 28 | def fill_up_weights(up): 29 | w = up.weight.data 30 | f = math.ceil(w.size(2) / 2) 31 | c = (2 * f - 1 - f % 2) / (2. * f) 32 | for i in range(w.size(2)): 33 | for j in range(w.size(3)): 34 | w[0, 0, i, j] = \ 35 | (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c)) 36 | for c in range(1, w.size(0)): 37 | w[c, 0, :, :] = w[0, 0, :, :] 38 | 39 | def fill_fc_weights(layers): 40 | for m in layers.modules(): 41 | if isinstance(m, nn.Conv2d): 42 | nn.init.normal_(m.weight, std=0.001) 43 | # torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu') 44 | # torch.nn.init.xavier_normal_(m.weight.data) 45 | if m.bias is not None: 46 | nn.init.constant_(m.bias, 0) 47 | 48 | class MSRAUp(nn.Module): 49 | # def __init__(self, block, layers, heads, head_conv): 50 | def __init__(self, opt, channels): 51 | super().__init__() 52 | self.opt = opt 53 | assert self.opt.msra_outchannel in [64, 256] 54 | self.deconv_with_bias = False 55 | self.inplanes = channels[-1] 56 | self.out_channel = self.opt.msra_outchannel 57 | # used for deconv layers 58 | if self.opt.msra_outchannel == 64: 59 | print('Using slimed resnet: 256 128 64 up channels.') 60 | self.deconv_layers = self._make_deconv_layer( 61 | 3, 62 | [256, 128, 64], 63 | [4, 4, 4], 64 | ) 65 | else: 66 | print('Using original resnet: 256 256 256 up channels.') 67 | print('Using 256 deconvs') 68 | self.deconv_layers = self._make_deconv_layer( 69 | 3, 70 | [256, 256, 256], 71 | [4, 4, 4], 72 | ) 73 | self.init_weights() 74 | 75 | 76 | def forward(self, x): 77 | x = self.deconv_layers(x[-1]) 78 | return [x] 79 | 80 | def _get_deconv_cfg(self, deconv_kernel, index): 81 | if deconv_kernel == 4: 82 | padding = 1 83 | output_padding = 0 84 | elif deconv_kernel == 3: 85 | padding = 1 86 | output_padding = 1 87 | elif deconv_kernel == 2: 88 | padding = 0 89 | output_padding = 0 90 | 91 | return deconv_kernel, padding, output_padding 92 | 93 | def _make_deconv_layer(self, num_layers, num_filters, num_kernels): 94 | assert num_layers == len(num_filters), \ 95 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 96 | assert num_layers == len(num_kernels), \ 97 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 98 | 99 | layers = [] 100 | for i in range(num_layers): 101 | kernel, padding, output_padding = \ 102 | self._get_deconv_cfg(num_kernels[i], i) 103 | 104 | planes = num_filters[i] 105 | fc = DCN(self.inplanes, planes, 106 | kernel_size=(3,3), stride=1, 107 | padding=1, dilation=1, deformable_groups=1) 108 | # fc = nn.Conv2d(self.inplanes, planes, 109 | # kernel_size=3, stride=1, 110 | # padding=1, dilation=1, bias=False) 111 | # fill_fc_weights(fc) 112 | up = nn.ConvTranspose2d( 113 | in_channels=planes, 114 | out_channels=planes, 115 | kernel_size=kernel, 116 | stride=2, 117 | padding=padding, 118 | output_padding=output_padding, 119 | bias=self.deconv_with_bias) 120 | fill_up_weights(up) 121 | 122 | layers.append(fc) 123 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) 124 | layers.append(nn.ReLU(inplace=True)) 125 | layers.append(up) 126 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) 127 | layers.append(nn.ReLU(inplace=True)) 128 | self.inplanes = planes 129 | 130 | return nn.Sequential(*layers) 131 | 132 | def init_weights(self): 133 | for name, m in self.deconv_layers.named_modules(): 134 | if isinstance(m, nn.BatchNorm2d): 135 | nn.init.constant_(m.weight, 1) 136 | nn.init.constant_(m.bias, 0) 137 | -------------------------------------------------------------------------------- /src/lib/model/scatter_gather.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch.nn.parallel._functions import Scatter, Gather 4 | 5 | 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None): 7 | r""" 8 | Slices variables into approximately equal chunks and 9 | distributes them across given GPUs. Duplicates 10 | references to objects that are not variables. Does not 11 | support Tensors. 12 | """ 13 | def scatter_map(obj): 14 | if isinstance(obj, Variable): 15 | return Scatter.apply(target_gpus, chunk_sizes, dim, obj) 16 | assert not torch.is_tensor(obj), "Tensors not supported in scatter." 17 | if isinstance(obj, tuple): 18 | return list(zip(*map(scatter_map, obj))) 19 | if isinstance(obj, list): 20 | return list(map(list, zip(*map(scatter_map, obj)))) 21 | if isinstance(obj, dict): 22 | return list(map(type(obj), zip(*map(scatter_map, obj.items())))) 23 | return [obj for targets in target_gpus] 24 | 25 | return scatter_map(inputs) 26 | 27 | 28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None): 29 | r"""Scatter with support for kwargs dictionary""" 30 | inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else [] 31 | kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else [] 32 | if len(inputs) < len(kwargs): 33 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 34 | elif len(kwargs) < len(inputs): 35 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 36 | inputs = tuple(inputs) 37 | kwargs = tuple(kwargs) 38 | return inputs, kwargs 39 | -------------------------------------------------------------------------------- /src/lib/model/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | def _sigmoid(x): 9 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4) 10 | return y 11 | 12 | def _sigmoid12(x): 13 | y = torch.clamp(x.sigmoid_(), 1e-12) 14 | return y 15 | 16 | def _gather_feat(feat, ind): 17 | dim = feat.size(2) 18 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 19 | feat = feat.gather(1, ind) 20 | return feat 21 | 22 | def _tranpose_and_gather_feat(feat, ind): 23 | feat = feat.permute(0, 2, 3, 1).contiguous() 24 | feat = feat.view(feat.size(0), -1, feat.size(3)) 25 | feat = _gather_feat(feat, ind) 26 | return feat 27 | 28 | def flip_tensor(x): 29 | return torch.flip(x, [3]) 30 | # tmp = x.detach().cpu().numpy()[..., ::-1].copy() 31 | # return torch.from_numpy(tmp).to(x.device) 32 | 33 | def flip_lr(x, flip_idx): 34 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 35 | shape = tmp.shape 36 | for e in flip_idx: 37 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 38 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 39 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) 40 | 41 | def flip_lr_off(x, flip_idx): 42 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 43 | shape = tmp.shape 44 | tmp = tmp.reshape(tmp.shape[0], 17, 2, 45 | tmp.shape[2], tmp.shape[3]) 46 | tmp[:, :, 0, :, :] *= -1 47 | for e in flip_idx: 48 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 49 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 50 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) 51 | 52 | def _nms(heat, kernel=3): 53 | pad = (kernel - 1) // 2 54 | 55 | hmax = nn.functional.max_pool2d( 56 | heat, (kernel, kernel), stride=1, padding=pad) 57 | keep = (hmax == heat).float() 58 | return heat * keep 59 | 60 | def _topk_channel(scores, K=100): 61 | batch, cat, height, width = scores.size() 62 | 63 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) 64 | 65 | topk_inds = topk_inds % (height * width) 66 | topk_ys = (topk_inds / width).int().float() 67 | topk_xs = (topk_inds % width).int().float() 68 | 69 | return topk_scores, topk_inds, topk_ys, topk_xs 70 | 71 | def _topk(scores, K=100): 72 | batch, cat, height, width = scores.size() 73 | 74 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) 75 | 76 | topk_inds = topk_inds % (height * width) 77 | topk_ys = (topk_inds / width).int().float() 78 | topk_xs = (topk_inds % width).int().float() 79 | 80 | topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K) 81 | topk_clses = (topk_ind / K).int() 82 | topk_inds = _gather_feat( 83 | topk_inds.view(batch, -1, 1), topk_ind).view(batch, K) 84 | topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K) 85 | topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K) 86 | 87 | return topk_score, topk_inds, topk_clses, topk_ys, topk_xs 88 | -------------------------------------------------------------------------------- /src/lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/src/lib/utils/__init__.py -------------------------------------------------------------------------------- /src/lib/utils/ddd_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | import cv2 7 | 8 | def comput_corners_3d(dim, rotation_y): 9 | # dim: 3 10 | # location: 3 11 | # rotation_y: 1 12 | # return: 8 x 3 13 | c, s = np.cos(rotation_y), np.sin(rotation_y) 14 | R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32) 15 | l, w, h = dim[2], dim[1], dim[0] 16 | x_corners = [l/2, l/2, -l/2, -l/2, l/2, l/2, -l/2, -l/2] 17 | y_corners = [0,0,0,0,-h,-h,-h,-h] 18 | z_corners = [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2] 19 | 20 | corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32) 21 | corners_3d = np.dot(R, corners).transpose(1, 0) 22 | return corners_3d 23 | 24 | def compute_box_3d(dim, location, rotation_y): 25 | # dim: 3 26 | # location: 3 27 | # rotation_y: 1 28 | # return: 8 x 3 29 | corners_3d = comput_corners_3d(dim, rotation_y) 30 | corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(1, 3) 31 | return corners_3d 32 | 33 | def project_to_image(pts_3d, P): 34 | # pts_3d: n x 3 35 | # P: 3 x 4 36 | # return: n x 2 37 | pts_3d_homo = np.concatenate( 38 | [pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1) 39 | pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0) 40 | pts_2d = pts_2d[:, :2] / pts_2d[:, 2:] 41 | # import pdb; pdb.set_trace() 42 | return pts_2d 43 | 44 | def compute_orientation_3d(dim, location, rotation_y): 45 | # dim: 3 46 | # location: 3 47 | # rotation_y: 1 48 | # return: 2 x 3 49 | c, s = np.cos(rotation_y), np.sin(rotation_y) 50 | R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32) 51 | orientation_3d = np.array([[0, dim[2]], [0, 0], [0, 0]], dtype=np.float32) 52 | orientation_3d = np.dot(R, orientation_3d) 53 | orientation_3d = orientation_3d + \ 54 | np.array(location, dtype=np.float32).reshape(3, 1) 55 | return orientation_3d.transpose(1, 0) 56 | 57 | def draw_box_3d(image, corners, c=(255, 0, 255), same_color=False): 58 | face_idx = [[0,1,5,4], 59 | [1,2,6, 5], 60 | [3,0,4,7], 61 | [2,3,7,6]] 62 | right_corners = [1, 2, 6, 5] if not same_color else [] 63 | left_corners = [0, 3, 7, 4] if not same_color else [] 64 | thickness = 4 if same_color else 2 65 | corners = corners.astype(np.int32) 66 | for ind_f in range(3, -1, -1): 67 | f = face_idx[ind_f] 68 | for j in range(4): 69 | # print('corners', corners) 70 | cc = c 71 | if (f[j] in left_corners) and (f[(j+1)%4] in left_corners): 72 | cc = (255, 0, 0) 73 | if (f[j] in right_corners) and (f[(j+1)%4] in right_corners): 74 | cc = (0, 0, 255) 75 | try: 76 | cv2.line(image, (corners[f[j], 0], corners[f[j], 1]), 77 | (corners[f[(j+1)%4], 0], corners[f[(j+1)%4], 1]), cc, thickness, lineType=cv2.LINE_AA) 78 | except: 79 | pass 80 | if ind_f == 0: 81 | try: 82 | cv2.line(image, (corners[f[0], 0], corners[f[0], 1]), 83 | (corners[f[2], 0], corners[f[2], 1]), c, 1, lineType=cv2.LINE_AA) 84 | cv2.line(image, (corners[f[1], 0], corners[f[1], 1]), 85 | (corners[f[3], 0], corners[f[3], 1]), c, 1, lineType=cv2.LINE_AA) 86 | except: 87 | pass 88 | # top_idx = [0, 1, 2, 3] 89 | return image 90 | 91 | def unproject_2d_to_3d(pt_2d, depth, P): 92 | # pts_2d: 2 93 | # depth: 1 94 | # P: 3 x 4 95 | # return: 3 96 | z = depth - P[2, 3] 97 | x = (pt_2d[0] * depth - P[0, 3] - P[0, 2] * z) / P[0, 0] 98 | y = (pt_2d[1] * depth - P[1, 3] - P[1, 2] * z) / P[1, 1] 99 | pt_3d = np.array([x, y, z], dtype=np.float32).reshape(3) 100 | return pt_3d 101 | 102 | def alpha2rot_y(alpha, x, cx, fx): 103 | """ 104 | Get rotation_y by alpha + theta - 180 105 | alpha : Observation angle of object, ranging [-pi..pi] 106 | x : Object center x to the camera center (x-W/2), in pixels 107 | rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi] 108 | """ 109 | rot_y = alpha + np.arctan2(x - cx, fx) 110 | if rot_y > np.pi: 111 | rot_y -= 2 * np.pi 112 | if rot_y < -np.pi: 113 | rot_y += 2 * np.pi 114 | return rot_y 115 | 116 | def rot_y2alpha(rot_y, x, cx, fx): 117 | """ 118 | Get rotation_y by alpha + theta - 180 119 | alpha : Observation angle of object, ranging [-pi..pi] 120 | x : Object center x to the camera center (x-W/2), in pixels 121 | rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi] 122 | """ 123 | alpha = rot_y - np.arctan2(x - cx, fx) 124 | if alpha > np.pi: 125 | alpha -= 2 * np.pi 126 | if alpha < -np.pi: 127 | alpha += 2 * np.pi 128 | return alpha 129 | 130 | 131 | def ddd2locrot(center, alpha, dim, depth, calib): 132 | # single image 133 | locations = unproject_2d_to_3d(center, depth, calib) 134 | locations[1] += dim[0] / 2 135 | rotation_y = alpha2rot_y(alpha, center[0], calib[0, 2], calib[0, 0]) 136 | return locations, rotation_y 137 | 138 | def project_3d_bbox(location, dim, rotation_y, calib): 139 | box_3d = compute_box_3d(dim, location, rotation_y) 140 | box_2d = project_to_image(box_3d, calib) 141 | return box_2d 142 | 143 | 144 | if __name__ == '__main__': 145 | calib = np.array( 146 | [[7.070493000000e+02, 0.000000000000e+00, 6.040814000000e+02, 4.575831000000e+01], 147 | [0.000000000000e+00, 7.070493000000e+02, 1.805066000000e+02, -3.454157000000e-01], 148 | [0.000000000000e+00, 0.000000000000e+00, 1.000000000000e+00, 4.981016000000e-03]], 149 | dtype=np.float32) 150 | alpha = -0.20 151 | tl = np.array([712.40, 143.00], dtype=np.float32) 152 | br = np.array([810.73, 307.92], dtype=np.float32) 153 | ct = (tl + br) / 2 154 | rotation_y = 0.01 155 | print('alpha2rot_y', alpha2rot_y(alpha, ct[0], calib[0, 2], calib[0, 0])) 156 | print('rotation_y', rotation_y) 157 | -------------------------------------------------------------------------------- /src/lib/utils/post_process.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | import cv2 7 | from .image import transform_preds_with_trans, get_affine_transform 8 | from .ddd_utils import ddd2locrot, comput_corners_3d 9 | from .ddd_utils import project_to_image, rot_y2alpha 10 | import numba 11 | 12 | def get_alpha(rot): 13 | # output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 14 | # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos] 15 | # return rot[:, 0] 16 | idx = rot[:, 1] > rot[:, 5] 17 | alpha1 = np.arctan2(rot[:, 2], rot[:, 3]) + (-0.5 * np.pi) 18 | alpha2 = np.arctan2(rot[:, 6], rot[:, 7]) + ( 0.5 * np.pi) 19 | return alpha1 * idx + alpha2 * (1 - idx) 20 | 21 | def generic_post_process( 22 | opt, dets, c, s, h, w, num_classes, calibs=None, height=-1, width=-1): 23 | if not ('scores' in dets): 24 | return [{}], [{}] 25 | ret = [] 26 | 27 | for i in range(len(dets['scores'])): 28 | preds = [] 29 | trans = get_affine_transform( 30 | c[i], s[i], 0, (w, h), inv=1).astype(np.float32) 31 | for j in range(len(dets['scores'][i])): 32 | if dets['scores'][i][j] < opt.out_thresh: 33 | break 34 | item = {} 35 | item['score'] = dets['scores'][i][j] 36 | item['class'] = int(dets['clses'][i][j]) + 1 37 | item['ct'] = transform_preds_with_trans( 38 | (dets['cts'][i][j]).reshape(1, 2), trans).reshape(2) 39 | 40 | if 'tracking' in dets: 41 | tracking = transform_preds_with_trans( 42 | (dets['tracking'][i][j] + dets['cts'][i][j]).reshape(1, 2), 43 | trans).reshape(2) 44 | item['tracking'] = tracking - item['ct'] 45 | 46 | if 'bboxes' in dets: 47 | bbox = transform_preds_with_trans( 48 | dets['bboxes'][i][j].reshape(2, 2), trans).reshape(4) 49 | item['bbox'] = bbox 50 | 51 | if 'visibility' in dets: 52 | if dets['visibility'][i][j] > opt.visibility_thresh_eval: 53 | item['visibility'] = True 54 | else: 55 | item['visibility'] = False 56 | 57 | if 'hps' in dets: 58 | pts = transform_preds_with_trans( 59 | dets['hps'][i][j].reshape(-1, 2), trans).reshape(-1) 60 | item['hps'] = pts 61 | 62 | if 'dep' in dets and len(dets['dep'][i]) > j: 63 | item['dep'] = dets['dep'][i][j] 64 | 65 | if 'dim' in dets and len(dets['dim'][i]) > j: 66 | item['dim'] = dets['dim'][i][j] 67 | 68 | if 'rot' in dets and len(dets['rot'][i]) > j: 69 | item['alpha'] = get_alpha(dets['rot'][i][j:j+1])[0] 70 | 71 | if 'rot' in dets and 'dep' in dets and 'dim' in dets \ 72 | and len(dets['dep'][i]) > j: 73 | if 'amodel_offset' in dets and len(dets['amodel_offset'][i]) > j: 74 | ct_output = dets['bboxes'][i][j].reshape(2, 2).mean(axis=0) 75 | amodel_ct_output = ct_output + dets['amodel_offset'][i][j] 76 | ct = transform_preds_with_trans( 77 | amodel_ct_output.reshape(1, 2), trans).reshape(2).tolist() 78 | else: 79 | bbox = item['bbox'] 80 | ct = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2] 81 | item['ct'] = ct 82 | item['loc'], item['rot_y'] = ddd2locrot( 83 | ct, item['alpha'], item['dim'], item['dep'], calibs[i]) 84 | 85 | preds.append(item) 86 | 87 | if 'nuscenes_att' in dets: 88 | for j in range(len(preds)): 89 | preds[j]['nuscenes_att'] = dets['nuscenes_att'][i][j] 90 | 91 | if 'velocity' in dets: 92 | for j in range(len(preds)): 93 | preds[j]['velocity'] = dets['velocity'][i][j] 94 | 95 | ret.append(preds) 96 | 97 | return ret 98 | -------------------------------------------------------------------------------- /src/lib/utils/tracker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.utils.linear_assignment_ import linear_assignment 3 | from numba import jit 4 | import copy 5 | 6 | class Tracker(object): 7 | def __init__(self, opt): 8 | self.opt = opt 9 | self.reset() 10 | 11 | def init_track(self, results): 12 | for item in results: 13 | if item['score'] > self.opt.new_thresh: 14 | self.id_count += 1 15 | # active and age are never used in the paper 16 | item['active'] = 1 17 | item['age'] = 1 18 | item['tracking_id'] = self.id_count 19 | if not ('ct' in item): 20 | bbox = item['bbox'] 21 | item['ct'] = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2] 22 | item['prev_ct'] = item['ct'] 23 | item['v'] = [0, 0] 24 | self.tracks.append(item) 25 | 26 | def reset(self): 27 | self.id_count = 0 28 | self.tracks = [] 29 | 30 | def step(self, results, public_det=None): 31 | N = len(results) 32 | M = len(self.tracks) 33 | 34 | dets = np.array( 35 | [det['ct'] + det['tracking'] for det in results], np.float32) # N x 2 36 | track_size = np.array([((track['bbox'][2] - track['bbox'][0]) * \ 37 | (track['bbox'][3] - track['bbox'][1])) \ 38 | for track in self.tracks], np.float32) # M 39 | track_cat = np.array([track['class'] for track in self.tracks], np.int32) # M 40 | item_size = np.array([((item['bbox'][2] - item['bbox'][0]) * \ 41 | (item['bbox'][3] - item['bbox'][1])) \ 42 | for item in results], np.float32) # N 43 | item_cat = np.array([item['class'] for item in results], np.int32) # N 44 | tracks = np.array( 45 | [pre_det['ct'] for pre_det in self.tracks], np.float32) # M x 2 46 | 47 | dist = (((tracks.reshape(1, -1, 2) - \ 48 | dets.reshape(-1, 1, 2)) ** 2).sum(axis=2)) # N x M 49 | 50 | invalid = ((dist > track_size.reshape(1, M)) + \ 51 | (dist > item_size.reshape(N, 1)) + \ 52 | (item_cat.reshape(N, 1) != track_cat.reshape(1, M))) > 0 53 | dist = dist + invalid * 1e18 54 | 55 | if self.opt.hungarian: 56 | item_score = np.array([item['score'] for item in results], np.float32) # N 57 | dist[dist > 1e18] = 1e18 58 | matched_indices = linear_assignment(dist) 59 | else: 60 | matched_indices = greedy_assignment(copy.deepcopy(dist)) 61 | unmatched_dets = [d for d in range(dets.shape[0]) \ 62 | if not (d in matched_indices[:, 0])] 63 | unmatched_tracks = [d for d in range(tracks.shape[0]) \ 64 | if not (d in matched_indices[:, 1])] 65 | 66 | if self.opt.hungarian: 67 | matches = [] 68 | for m in matched_indices: 69 | if dist[m[0], m[1]] > 1e16: 70 | unmatched_dets.append(m[0]) 71 | unmatched_tracks.append(m[1]) 72 | else: 73 | matches.append(m) 74 | matches = np.array(matches).reshape(-1, 2) 75 | else: 76 | matches = matched_indices 77 | 78 | ret = [] 79 | for m in matches: 80 | track = results[m[0]] 81 | track['tracking_id'] = self.tracks[m[1]]['tracking_id'] 82 | track['age'] = 1 83 | track['active'] = 1 84 | track['prev_ct'] = self.tracks[m[1]]['ct'] 85 | track['v'] = [x - y for x,y in zip(track['ct'], track['prev_ct'])] 86 | ret.append(track) 87 | 88 | if self.opt.public_det and len(unmatched_dets) > 0: 89 | # Public detection: only create tracks from provided detections 90 | pub_dets = np.array([d['ct'] for d in public_det], np.float32) 91 | dist3 = ((dets.reshape(-1, 1, 2) - pub_dets.reshape(1, -1, 2)) ** 2).sum( 92 | axis=2) 93 | matched_dets = [d for d in range(dets.shape[0]) \ 94 | if not (d in unmatched_dets)] 95 | dist3[matched_dets] = 1e18 96 | for j in range(len(pub_dets)): 97 | i = dist3[:, j].argmin() 98 | if dist3[i, j] < item_size[i]: 99 | dist3[i, :] = 1e18 100 | track = results[i] 101 | if track['score'] > self.opt.new_thresh: 102 | self.id_count += 1 103 | track['tracking_id'] = self.id_count 104 | track['age'] = 1 105 | track['active'] = 1 106 | ret.append(track) 107 | else: 108 | # Private detection: create tracks for all un-matched detections 109 | for i in unmatched_dets: 110 | track = results[i] 111 | if track['score'] > self.opt.new_thresh: 112 | self.id_count += 1 113 | track['tracking_id'] = self.id_count 114 | track['age'] = 1 115 | track['active'] = 1 116 | track['prev_ct'] = track['ct'] 117 | track['v'] = [0, 0] 118 | ret.append(track) 119 | 120 | # Never used 121 | for i in unmatched_tracks: 122 | track = self.tracks[i] 123 | if track['age'] < self.opt.max_age: 124 | track['age'] += 1 125 | track['active'] = 1 # 0 126 | bbox = track['bbox'] 127 | ct = track['ct'] 128 | # v = track['v'] 129 | v = [0, 0] 130 | track['bbox'] = [ 131 | bbox[0] + v[0], bbox[1] + v[1], 132 | bbox[2] + v[0], bbox[3] + v[1]] 133 | track['ct'] = [ct[0] + v[0], ct[1] + v[1]] 134 | ret.append(track) 135 | self.tracks = ret 136 | return ret 137 | 138 | def greedy_assignment(dist, v_dist=None): 139 | matched_indices = [] 140 | if dist.shape[1] == 0: 141 | return np.array(matched_indices, np.int32).reshape(-1, 2) 142 | for i in range(dist.shape[0]): 143 | j = dist[i].argmin() 144 | if dist[i][j] < 1e16: 145 | dist[:, j] = 1e18 146 | matched_indices.append([i, j]) 147 | return np.array(matched_indices, np.int32).reshape(-1, 2) 148 | -------------------------------------------------------------------------------- /src/lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | 7 | class AverageMeter(object): 8 | """Computes and stores the average and current value""" 9 | def __init__(self): 10 | self.reset() 11 | 12 | def reset(self): 13 | self.val = 0 14 | self.avg = 0 15 | self.sum = 0 16 | self.count = 0 17 | 18 | def update(self, val, n=1): 19 | self.val = val 20 | self.sum += val * n 21 | self.count += n 22 | if self.count > 0: 23 | self.avg = self.sum / self.count -------------------------------------------------------------------------------- /src/tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, '../lib') 12 | add_path(lib_path) 13 | -------------------------------------------------------------------------------- /src/tools/annot_bbox.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | import cv2 5 | import argparse 6 | import numpy as np 7 | image_ext = ['jpg', 'jpeg', 'png', 'webp'] 8 | 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('--image_path', default='') 11 | parser.add_argument('--save_path', default='') 12 | MAX_CACHE = 20 13 | CAT_NAMES = ['cat'] 14 | 15 | def _sort_expt(pts): 16 | t, l, b, r = 0, 0, 0, 0 17 | for i in range(4): 18 | if pts[i][0] < pts[l][0]: 19 | l = i 20 | if pts[i][1] < pts[t][1]: 21 | t = i 22 | if pts[i][0] > pts[r][0]: 23 | r = i 24 | if pts[i][1] > pts[b][1]: 25 | b = i 26 | ret = [pts[t], pts[l], pts[b], pts[r]] 27 | return ret 28 | 29 | def _expt2bbox(expt): 30 | expt = np.array(expt, dtype=np.int32) 31 | bbox = [int(expt[:, 0].min()), int(expt[:, 1].min()), 32 | int(expt[:, 0].max()), int(expt[:, 1].max())] 33 | return bbox 34 | 35 | def save_txt(txt_name, pts_cls): 36 | ret = [] 37 | for i in range(len(pts_cls)): 38 | ret.append(np.array(pts_cls[i][:4], dtype=np.int32).reshape(8).tolist() \ 39 | + [pts_cls[i][4]]) 40 | np.savetxt(txt_name, np.array(ret, dtype=np.int32), fmt='%d') 41 | 42 | def click(event, x, y, flags, param): 43 | global expt_cls, bboxes, pts 44 | if event == cv2.EVENT_LBUTTONDOWN: 45 | pts.append([x, y]) 46 | cv2.circle(img, (x, y), 5, (255, 0, 255), -1) 47 | if len(pts) == 4: 48 | expt = _sort_expt(pts) 49 | bbox = _expt2bbox(expt) 50 | expt_cls.append(expt + [cls]) 51 | cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), 52 | (255, 0, 255), 2, cv2.LINE_AA) 53 | pts = [] 54 | 55 | if __name__ == '__main__': 56 | cat_info = [] 57 | for i, cat in enumerate(CAT_NAMES): 58 | cat_info.append({'name': cat, 'id': i + 1}) 59 | 60 | args = parser.parse_args() 61 | if args.save_path == '': 62 | args.save_path = os.path.join(args.image_path, '..', 'click_annotation') 63 | if not os.path.exists(args.save_path): 64 | os.mkdir(args.save_path) 65 | 66 | ann_path = os.path.join(args.save_path, 'annotations.json') 67 | if os.path.exists(ann_path): 68 | anns = json.load(open(ann_path, 'r')) 69 | else: 70 | anns = {'annotations': [], 'images': [], 'categories': cat_info} 71 | 72 | assert os.path.exists(args.image_path) 73 | ls = os.listdir(args.image_path) 74 | image_names = [] 75 | for file_name in sorted(ls): 76 | ext = file_name[file_name.rfind('.') + 1:].lower() 77 | if (ext in image_ext): 78 | image_names.append(file_name) 79 | 80 | i = 0 81 | cls = 1 82 | cached = 0 83 | while i < len(image_names): 84 | image_name = image_names[i] 85 | txt_name = os.path.join( 86 | args.save_path, image_name[:image_name.rfind('.')] + '.txt') 87 | if os.path.exists(txt_name) or image_name in anns: 88 | i = i + 1 89 | continue 90 | image_path = os.path.join(args.image_path, image_name) 91 | img = cv2.imread(image_path) 92 | cv2.namedWindow(image_name) 93 | cv2.setMouseCallback(image_name, click) 94 | expt_cls, pts = [], [] 95 | while True: 96 | finished = False 97 | cv2.imshow(image_name, img) 98 | key = cv2.waitKey(1) 99 | if key == 100: 100 | i = i + 1 101 | save_txt(txt_name, expt_cls) 102 | image_id = len(anns['images']) 103 | image_info = {'file_name': image_name, 'id': image_id} 104 | anns['images'].append(image_info) 105 | for ann in expt_cls: 106 | ann_id = len(anns['annotations']) 107 | ann_dict = {'image_id': image_id, 'id': ann_id, 'categoty_id': ann[4], 108 | 'bbox': _expt2bbox(ann[:4]), 'extreme_points': ann[:4]} 109 | anns['annotations'].append(ann_dict) 110 | cached = cached + 1 111 | print('saved to ', txt_name) 112 | if cached > MAX_CACHE: 113 | print('Saving json', ann_path) 114 | json.dump(anns, open(ann_path, 'w')) 115 | cached = 0 116 | break 117 | elif key == 97: 118 | i = i - 1 119 | break 120 | elif key == 27: 121 | json.dump(anns, open(ann_path, 'w')) 122 | sys.exit(0) 123 | cv2.destroyAllWindows() 124 | -------------------------------------------------------------------------------- /src/tools/convert_crowdhuman_to_coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import json 4 | import cv2 5 | 6 | DATA_PATH = '../../data/crowdhuman/' 7 | OUT_PATH = DATA_PATH + 'annotations/' 8 | SPLITS = ['val', 'train'] 9 | DEBUG = False 10 | 11 | def load_func(fpath): 12 | print('fpath', fpath) 13 | assert os.path.exists(fpath) 14 | with open(fpath,'r') as fid: 15 | lines = fid.readlines() 16 | records =[json.loads(line.strip('\n')) for line in lines] 17 | return records 18 | 19 | if __name__ == '__main__': 20 | if not os.exists(OUT_PATH): 21 | os.mkdir(OUT_PATH) 22 | for split in SPLITS: 23 | data_path = DATA_PATH + split 24 | out_path = OUT_PATH + '{}.json'.format(split) 25 | out = {'images': [], 'annotations': [], 26 | 'categories': [{'id': 1, 'name': 'person'}]} 27 | ann_path = DATA_PATH + '/annotation_{}.odgt'.format(split) 28 | anns_data = load_func(ann_path) 29 | image_cnt = 0 30 | ann_cnt = 0 31 | video_cnt = 0 32 | for ann_data in anns_data: 33 | image_cnt += 1 34 | image_info = {'file_name': '{}.jpg'.format(ann_data['ID']), 35 | 'id': image_cnt} 36 | out['images'].append(image_info) 37 | if split != 'test': 38 | anns = ann_data['gtboxes'] 39 | for i in range(len(anns)): 40 | ann_cnt += 1 41 | ann = {'id': ann_cnt, 42 | 'category_id': 1, 43 | 'image_id': image_cnt, 44 | 'bbox_vis': anns[i]['vbox'], 45 | 'bbox': anns[i]['fbox'], 46 | 'iscrowd': 1 if 'extra' in anns[i] and \ 47 | 'ignore' in anns[i]['extra'] and \ 48 | anns[i]['extra']['ignore'] == 1 else 0} 49 | out['annotations'].append(ann) 50 | print('loaded {} for {} images and {} samples'.format( 51 | split, len(out['images']), len(out['annotations']))) 52 | json.dump(out, open(out_path, 'w')) 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /src/tools/convert_kitti_to_tao.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | DATA_PATH = '../../data/kitti_tracking/' 5 | SPLITS = ['tracking_val_half'] 6 | 7 | 8 | def get_cats_for_vid(anns, vid_id, image2vid): 9 | all_categories = set([]) 10 | for cat in anns['categories']: 11 | all_categories.add(cat['id']) 12 | 13 | positive = set([]) 14 | for ann in anns['annotations']: 15 | ann_vid_id = image2vid[ann['image_id']] 16 | if ann_vid_id == vid_id: 17 | category = ann['category_id'] 18 | positive.add(category) 19 | 20 | return positive, all_categories - positive 21 | 22 | 23 | def get_image2video_map(anns): 24 | mapping = {} 25 | for img in anns['images']: 26 | mapping[img['id']] = img['video_id'] 27 | img['frame_index'] = img['frame_id'] - 1 28 | 29 | return mapping, anns 30 | 31 | 32 | def unique_track_ids(anns, image2vid): 33 | unique_tracks = {} 34 | track_counter = 0 35 | for ann in anns['annotations']: 36 | orig_track_id = ann['track_id'] 37 | image_id = ann['image_id'] 38 | ann['area'] = ann['bbox'][2] * ann['bbox'][3] 39 | video_id = image2vid[image_id] 40 | vid_track_pair = f"{video_id}_{orig_track_id}" 41 | if vid_track_pair not in unique_tracks: 42 | unique_tracks[vid_track_pair] = track_counter 43 | track_counter += 1 44 | 45 | tracks = [] 46 | processed_tracks = set([]) 47 | for ann in anns['annotations']: 48 | orig_track_id = ann['track_id'] 49 | image_id = ann['image_id'] 50 | video_id = image2vid[image_id] 51 | vid_track_pair = f"{video_id}_{orig_track_id}" 52 | ann['track_id'] = unique_tracks[vid_track_pair] 53 | if ann['track_id'] not in processed_tracks: 54 | track = {'id': ann['track_id'], 'category_id': ann['category_id'], 'video_id': video_id} 55 | processed_tracks.add(ann['track_id']) 56 | tracks.append(track) 57 | 58 | anns['tracks'] = tracks 59 | 60 | return anns 61 | 62 | 63 | 64 | if __name__ == '__main__': 65 | ann_dir = DATA_PATH + '/annotations/' 66 | 67 | for split in SPLITS: 68 | print("Processing split %s" % split) 69 | anns = json.load(open(ann_dir + split + ".json")) 70 | image2vid, anns = get_image2video_map(anns) 71 | for vid in anns['videos']: 72 | print("Processing video %s" % vid['file_name']) 73 | vid['not_exhaustive_category_ids'] = [] 74 | positives, negatives = get_cats_for_vid(anns, vid['id'], image2vid) 75 | vid['neg_category_ids'] = list(negatives) 76 | 77 | anns = unique_track_ids(anns, image2vid) 78 | 79 | json.dump(anns, open(ann_dir + split + "_tao.json", 'w')) -------------------------------------------------------------------------------- /src/tools/convert_mot_det_to_results.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | import os 4 | from collections import defaultdict 5 | split = 'val_half' 6 | 7 | DET_PATH = '../../data/mot17/train/' 8 | ANN_PATH = '../../data/mot17/annotations/{}.json'.format(split) 9 | OUT_DIR = '../../data/mot17/results/' 10 | OUT_PATH = OUT_DIR + '{}_det.json'.format(split) 11 | 12 | if __name__ == '__main__': 13 | if not os.path.exists(OUT_DIR): 14 | os.mkdir(OUT_DIR) 15 | seqs = [s for s in os.listdir(DET_PATH)] 16 | data = json.load(open(ANN_PATH, 'r')) 17 | images = data['images'] 18 | image_to_anns = defaultdict(list) 19 | for seq in sorted(seqs): 20 | print('seq', seq) 21 | seq_path = '{}/{}/'.format(DET_PATH, seq) 22 | if not os.path.exists(seq_path + 'det/det_val_half.txt'): 23 | continue 24 | if split == 'val_half': 25 | ann_path = seq_path + 'det/det_val_half.txt' 26 | train_ann_path = seq_path + 'det/det_train_half.txt' 27 | train_anns = np.loadtxt(train_ann_path, dtype=np.float32, delimiter=',') 28 | frame_base = int(train_anns[:, 0].max()) 29 | else: 30 | ann_path = seq_path + 'det/det.txt' 31 | frame_base = 0 32 | # if not IS_THIRD_PARTY: 33 | anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',') 34 | for i in range(len(anns)): 35 | frame_id = int(anns[i][0]) 36 | file_name = '{}/img1/{:06d}.jpg'.format(seq, frame_id + frame_base) 37 | bbox = (anns[i][2:6]).tolist() 38 | score = 1 # float(anns[i][8]) 39 | image_to_anns[file_name].append(bbox + [score]) 40 | 41 | results = {} 42 | for image_info in images: 43 | image_id = image_info['id'] 44 | file_name = image_info['file_name'] 45 | dets = image_to_anns[file_name] 46 | results[image_id] = [] 47 | for det in dets: 48 | bbox = [float(det[0]), float(det[1]), \ 49 | float(det[0] + det[2]), float(det[1] + det[3])] 50 | ct = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2] 51 | results[image_id].append( 52 | {'bbox': bbox, 'score': float(det[4]), 'class': 1, 'ct': ct}) 53 | out_path = OUT_PATH 54 | json.dump(results, open(out_path, 'w')) 55 | -------------------------------------------------------------------------------- /src/tools/convert_mot_to_coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import json 4 | import cv2 5 | 6 | DATA_PATH = '../../data/mot17/' 7 | OUT_PATH = DATA_PATH + 'annotations/' 8 | SPLITS = ['train_half', 'val_half', 'train', 'test'] 9 | CREATE_SPLITTED_ANN = True 10 | CREATE_SPLITTED_DET = True 11 | 12 | if __name__ == '__main__': 13 | for split in SPLITS: 14 | if split == 'test': 15 | HALF_VIDEO = False 16 | else: 17 | HALF_VIDEO = True 18 | data_path = DATA_PATH + (split if not HALF_VIDEO else 'train') 19 | out_path = OUT_PATH + '{}.json'.format(split) 20 | out = {'images': [], 'annotations': [], 21 | 'categories': [{'id': 1, 'name': 'pedestrain'}], 22 | 'videos': []} 23 | seqs = os.listdir(data_path) 24 | image_cnt = 0 25 | ann_cnt = 0 26 | video_cnt = 0 27 | for seq in sorted(seqs): 28 | if '.DS_Store' in seq: 29 | continue 30 | if 'mot17' in DATA_PATH and (split != 'test' and not ('FRCNN' in seq)): 31 | continue 32 | video_cnt += 1 33 | out['videos'].append({ 34 | 'id': video_cnt, 35 | 'file_name': seq}) 36 | seq_path = '{}/{}/'.format(data_path, seq) 37 | img_path = seq_path + 'img1/' 38 | ann_path = seq_path + 'gt/gt.txt' 39 | images = os.listdir(img_path) 40 | num_images = len([image for image in images if 'jpg' in image]) 41 | if HALF_VIDEO and ('half' in split): 42 | image_range = [0, num_images // 2] if 'train' in split else \ 43 | [num_images // 2 + 1, num_images - 1] 44 | else: 45 | image_range = [0, num_images - 1] 46 | for i in range(num_images): 47 | if (i < image_range[0] or i > image_range[1]): 48 | continue 49 | image_info = {'file_name': '{}/img1/{:06d}.jpg'.format(seq, i + 1), 50 | 'id': image_cnt + i + 1, 51 | 'frame_id': i + 1 - image_range[0], 52 | 'prev_image_id': image_cnt + i if i > 0 else -1, 53 | 'next_image_id': \ 54 | image_cnt + i + 2 if i < num_images - 1 else -1, 55 | 'video_id': video_cnt} 56 | out['images'].append(image_info) 57 | print('{}: {} images'.format(seq, num_images)) 58 | if split != 'test': 59 | det_path = seq_path + 'det/det.txt' 60 | anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',') 61 | dets = np.loadtxt(det_path, dtype=np.float32, delimiter=',') 62 | if CREATE_SPLITTED_ANN and ('half' in split): 63 | anns_out = np.array([anns[i] for i in range(anns.shape[0]) if \ 64 | int(anns[i][0]) - 1 >= image_range[0] and \ 65 | int(anns[i][0]) - 1 <= image_range[1]], np.float32) 66 | anns_out[:, 0] -= image_range[0] 67 | gt_out = seq_path + '/gt/gt_{}.txt'.format(split) 68 | fout = open(gt_out, 'w') 69 | for o in anns_out: 70 | fout.write( 71 | '{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:.6f}\n'.format( 72 | int(o[0]),int(o[1]),int(o[2]),int(o[3]),int(o[4]),int(o[5]), 73 | int(o[6]),int(o[7]),o[8])) 74 | fout.close() 75 | if CREATE_SPLITTED_DET and ('half' in split): 76 | dets_out = np.array([dets[i] for i in range(dets.shape[0]) if \ 77 | int(dets[i][0]) - 1 >= image_range[0] and \ 78 | int(dets[i][0]) - 1 <= image_range[1]], np.float32) 79 | dets_out[:, 0] -= image_range[0] 80 | det_out = seq_path + '/det/det_{}.txt'.format(split) 81 | dout = open(det_out, 'w') 82 | for o in dets_out: 83 | dout.write( 84 | '{:d},{:d},{:.1f},{:.1f},{:.1f},{:.1f},{:.6f}\n'.format( 85 | int(o[0]),int(o[1]),float(o[2]),float(o[3]),float(o[4]),float(o[5]), 86 | float(o[6]))) 87 | dout.close() 88 | 89 | print(' {} ann images'.format(int(anns[:, 0].max()))) 90 | for i in range(anns.shape[0]): 91 | frame_id = int(anns[i][0]) 92 | if (frame_id - 1 < image_range[0] or frame_id - 1 > image_range[1]): 93 | continue 94 | track_id = int(anns[i][1]) 95 | cat_id = int(anns[i][7]) 96 | ann_cnt += 1 97 | if not ('15' in DATA_PATH): 98 | if not (float(anns[i][8]) >= 0.25): 99 | continue 100 | if not (int(anns[i][6]) == 1): 101 | continue 102 | if (int(anns[i][7]) in [3, 4, 5, 6, 9, 10, 11]): # Non-person 103 | continue 104 | if (int(anns[i][7]) in [2, 7, 8, 12]): # Ignored person 105 | category_id = -1 106 | else: 107 | category_id = 1 108 | else: 109 | category_id = 1 110 | ann = {'id': ann_cnt, 111 | 'category_id': category_id, 112 | 'image_id': image_cnt + frame_id, 113 | 'track_id': track_id, 114 | 'bbox': anns[i][2:6].tolist(), 115 | 'conf': float(anns[i][6])} 116 | out['annotations'].append(ann) 117 | image_cnt += num_images 118 | print('loaded {} for {} images and {} samples'.format( 119 | split, len(out['images']), len(out['annotations']))) 120 | json.dump(out, open(out_path, 'w')) 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /src/tools/eval_kitti_track/data/tracking/evaluate_tracking.seqmap: -------------------------------------------------------------------------------- 1 | 0000 empty 000000 000154 2 | 0001 empty 000000 000447 3 | 0002 empty 000000 000233 4 | 0003 empty 000000 000144 5 | 0004 empty 000000 000314 6 | 0005 empty 000000 000297 7 | 0006 empty 000000 000270 8 | 0007 empty 000000 000800 9 | 0008 empty 000000 000390 10 | 0009 empty 000000 000803 11 | 0010 empty 000000 000294 12 | 0011 empty 000000 000373 13 | 0012 empty 000000 000078 14 | 0013 empty 000000 000340 15 | 0014 empty 000000 000106 16 | 0015 empty 000000 000376 17 | 0016 empty 000000 000209 18 | 0017 empty 000000 000145 19 | 0018 empty 000000 000339 20 | 0019 empty 000000 001059 21 | 0020 empty 000000 000837 22 | -------------------------------------------------------------------------------- /src/tools/eval_kitti_track/data/tracking/evaluate_tracking.seqmap.test: -------------------------------------------------------------------------------- 1 | 0000 empty 000000 000465 2 | 0001 empty 000000 000147 3 | 0002 empty 000000 000243 4 | 0003 empty 000000 000257 5 | 0004 empty 000000 000421 6 | 0005 empty 000000 000809 7 | 0006 empty 000000 000114 8 | 0007 empty 000000 000215 9 | 0008 empty 000000 000165 10 | 0009 empty 000000 000349 11 | 0010 empty 000000 001176 12 | 0011 empty 000000 000774 13 | 0012 empty 000000 000694 14 | 0013 empty 000000 000152 15 | 0014 empty 000000 000850 16 | 0015 empty 000000 000701 17 | 0016 empty 000000 000510 18 | 0017 empty 000000 000305 19 | 0018 empty 000000 000180 20 | 0019 empty 000000 000404 21 | 0020 empty 000000 000173 22 | 0021 empty 000000 000203 23 | 0022 empty 000000 000436 24 | 0023 empty 000000 000430 25 | 0024 empty 000000 000316 26 | 0025 empty 000000 000176 27 | 0026 empty 000000 000170 28 | 0027 empty 000000 000085 29 | 0028 empty 000000 000175 30 | -------------------------------------------------------------------------------- /src/tools/eval_kitti_track/data/tracking/evaluate_tracking.seqmap.training: -------------------------------------------------------------------------------- 1 | 0000 empty 000000 000154 2 | 0001 empty 000000 000447 3 | 0002 empty 000000 000233 4 | 0003 empty 000000 000144 5 | 0004 empty 000000 000314 6 | 0005 empty 000000 000297 7 | 0006 empty 000000 000270 8 | 0007 empty 000000 000800 9 | 0008 empty 000000 000390 10 | 0009 empty 000000 000803 11 | 0010 empty 000000 000294 12 | 0011 empty 000000 000373 13 | 0012 empty 000000 000078 14 | 0013 empty 000000 000340 15 | 0014 empty 000000 000106 16 | 0015 empty 000000 000376 17 | 0016 empty 000000 000209 18 | 0017 empty 000000 000145 19 | 0018 empty 000000 000339 20 | 0019 empty 000000 001059 21 | 0020 empty 000000 000837 22 | -------------------------------------------------------------------------------- /src/tools/eval_kitti_track/data/tracking/evaluate_trackingtrain_1-2.seqmap: -------------------------------------------------------------------------------- 1 | 0000 empty 000000 000154 2 | 0001 empty 000000 000447 3 | 0002 empty 000000 000233 4 | 0003 empty 000000 000144 5 | 0004 empty 000000 000314 6 | 0005 empty 000000 000297 7 | 0006 empty 000000 000270 8 | 0007 empty 000000 000800 9 | 0008 empty 000000 000390 10 | 0009 empty 000000 000803 11 | -------------------------------------------------------------------------------- /src/tools/eval_kitti_track/data/tracking/evaluate_trackingtrain_2-2.seqmap: -------------------------------------------------------------------------------- 1 | 0010 empty 000000 000294 2 | 0011 empty 000000 000373 3 | 0012 empty 000000 000078 4 | 0013 empty 000000 000340 5 | 0014 empty 000000 000106 6 | 0015 empty 000000 000376 7 | 0016 empty 000000 000209 8 | 0017 empty 000000 000145 9 | 0018 empty 000000 000339 10 | 0019 empty 000000 001059 11 | 0020 empty 000000 000837 12 | -------------------------------------------------------------------------------- /src/tools/eval_kitti_track/data/tracking/evaluate_trackingval_half.seqmap: -------------------------------------------------------------------------------- 1 | 0000 empty 000000 000077 2 | 0001 empty 000000 000224 3 | 0002 empty 000000 000117 4 | 0003 empty 000000 000072 5 | 0004 empty 000000 000157 6 | 0005 empty 000000 000149 7 | 0006 empty 000000 000135 8 | 0007 empty 000000 000400 9 | 0008 empty 000000 000195 10 | 0009 empty 000000 000402 11 | 0010 empty 000000 000147 12 | 0011 empty 000000 000187 13 | 0012 empty 000000 000039 14 | 0013 empty 000000 000170 15 | 0014 empty 000000 000053 16 | 0015 empty 000000 000188 17 | 0016 empty 000000 000105 18 | 0017 empty 000000 000073 19 | 0018 empty 000000 000170 20 | 0019 empty 000000 000530 21 | 0020 empty 000000 000419 22 | -------------------------------------------------------------------------------- /src/tools/eval_kitti_track/mailpy.py: -------------------------------------------------------------------------------- 1 | class Mail: 2 | """ Dummy class to print messages without sending e-mails""" 3 | def __init__(self,mailaddress): 4 | pass 5 | def msg(self,msg): 6 | print(msg) 7 | def finalize(self,success,benchmark,sha_key,mailaddress=None): 8 | if success: 9 | print("Results for %s (benchmark: %s) sucessfully created" % (benchmark,sha_key)) 10 | else: 11 | print("Creating results for %s (benchmark: %s) failed" % (benchmark,sha_key)) 12 | 13 | -------------------------------------------------------------------------------- /src/tools/eval_motchallenge.py: -------------------------------------------------------------------------------- 1 | """py-motmetrics - metrics for multiple object tracker (MOT) benchmarking. 2 | Christoph Heindl, 2017 3 | https://github.com/cheind/py-motmetrics 4 | Modified by Xingyi Zhou 5 | """ 6 | 7 | import argparse 8 | import glob 9 | import os 10 | import logging 11 | import motmetrics as mm 12 | import pandas as pd 13 | from collections import OrderedDict 14 | from pathlib import Path 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser(description=""" 18 | Compute metrics for trackers using MOTChallenge ground-truth data. 19 | Files 20 | ----- 21 | All file content, ground truth and test files, have to comply with the 22 | format described in 23 | Milan, Anton, et al. 24 | "Mot16: A benchmark for multi-object tracking." 25 | arXiv preprint arXiv:1603.00831 (2016). 26 | https://motchallenge.net/ 27 | Structure 28 | --------- 29 | Layout for ground truth data 30 | //gt/gt.txt 31 | //gt/gt.txt 32 | ... 33 | Layout for test data 34 | /.txt 35 | /.txt 36 | ... 37 | Sequences of ground truth and test will be matched according to the `` 38 | string.""", formatter_class=argparse.RawTextHelpFormatter) 39 | 40 | parser.add_argument('groundtruths', type=str, help='Directory containing ground truth files.') 41 | parser.add_argument('tests', type=str, help='Directory containing tracker result files') 42 | parser.add_argument('--gt_type', type=str, default='') 43 | parser.add_argument('--eval_official', action='store_true') 44 | parser.add_argument('--loglevel', type=str, help='Log level', default='info') 45 | parser.add_argument('--fmt', type=str, help='Data format', default='mot15-2D') 46 | parser.add_argument('--solver', type=str, help='LAP solver to use') 47 | return parser.parse_args() 48 | 49 | def compare_dataframes(gts, ts): 50 | accs = [] 51 | names = [] 52 | for k, tsacc in ts.items(): 53 | if k in gts: 54 | logging.info('Comparing {}...'.format(k)) 55 | accs.append(mm.utils.compare_to_groundtruth(gts[k], tsacc, 'iou', distth=0.5)) 56 | names.append(k) 57 | else: 58 | logging.warning('No ground truth for {}, skipping.'.format(k)) 59 | 60 | return accs, names 61 | 62 | if __name__ == '__main__': 63 | 64 | args = parse_args() 65 | 66 | loglevel = getattr(logging, args.loglevel.upper(), None) 67 | if not isinstance(loglevel, int): 68 | raise ValueError('Invalid log level: {} '.format(args.loglevel)) 69 | logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s - %(message)s', datefmt='%I:%M:%S') 70 | 71 | if args.solver: 72 | mm.lap.default_solver = args.solver 73 | 74 | gt_type = args.gt_type 75 | print('gt_type', gt_type) 76 | gtfiles = glob.glob( 77 | os.path.join(args.groundtruths, '*/gt/gt{}.txt'.format(gt_type))) 78 | print('gt_files', gtfiles) 79 | tsfiles = [f for f in glob.glob(os.path.join(args.tests, '*.txt')) if not os.path.basename(f).startswith('eval')] 80 | 81 | logging.info('Found {} groundtruths and {} test files.'.format(len(gtfiles), len(tsfiles))) 82 | logging.info('Available LAP solvers {}'.format(mm.lap.available_solvers)) 83 | logging.info('Default LAP solver \'{}\''.format(mm.lap.default_solver)) 84 | logging.info('Loading files.') 85 | 86 | gt = OrderedDict([(Path(f).parts[-3], mm.io.loadtxt(f, fmt=args.fmt, min_confidence=1)) for f in gtfiles]) 87 | ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], mm.io.loadtxt(f, fmt=args.fmt)) for f in tsfiles]) 88 | 89 | mh = mm.metrics.create() 90 | accs, names = compare_dataframes(gt, ts) 91 | 92 | logging.info('Running metrics') 93 | metrics = ['recall', 'precision', 'num_unique_objects', 'mostly_tracked', \ 94 | 'partially_tracked', 'mostly_lost', 'num_false_positives', 'num_misses', \ 95 | 'num_switches', 'num_fragmentations', 'mota', 'motp', 'num_objects'] 96 | summary = mh.compute_many( 97 | accs, names=names, 98 | metrics=metrics, generate_overall=True) 99 | # summary = mh.compute_many(accs, names=names, metrics=mm.metrics.motchallenge_metrics, generate_overall=True) 100 | # print(mm.io.render_summary( 101 | # summary, formatters=mh.formatters, 102 | # namemap=mm.io.motchallenge_metric_names)) 103 | div_dict = { 104 | 'num_objects': ['num_false_positives', 'num_misses', 105 | 'num_switches', 'num_fragmentations'], 106 | 'num_unique_objects': ['mostly_tracked', 'partially_tracked', 107 | 'mostly_lost']} 108 | for divisor in div_dict: 109 | for divided in div_dict[divisor]: 110 | summary[divided] = (summary[divided] / summary[divisor]) 111 | fmt = mh.formatters 112 | change_fmt_list = ['num_false_positives', 'num_misses', 'num_switches', 113 | 'num_fragmentations', 'mostly_tracked', 'partially_tracked', 114 | 'mostly_lost'] 115 | for k in change_fmt_list: 116 | fmt[k] = fmt['mota'] 117 | print(mm.io.render_summary( 118 | summary, formatters=fmt, 119 | namemap=mm.io.motchallenge_metric_names)) 120 | if args.eval_official: 121 | metrics = mm.metrics.motchallenge_metrics + ['num_objects'] 122 | summary = mh.compute_many( 123 | accs, names=names, 124 | metrics=metrics, generate_overall=True) 125 | print(mm.io.render_summary( 126 | summary, formatters=mh.formatters, 127 | namemap=mm.io.motchallenge_metric_names)) 128 | logging.info('Completed') 129 | -------------------------------------------------------------------------------- /src/tools/get_mot_17.sh: -------------------------------------------------------------------------------- 1 | mkdir ../../data/mot17 2 | cd ../../data/mot17 3 | wget https://motchallenge.net/data/MOT17.zip 4 | unzip MOT17.zip 5 | rm MOT17.zip 6 | mkdir annotations 7 | mv MOT17/train . 8 | mv MOT17/test . 9 | rm -rf MOT17 10 | cd ../../src/tools/ 11 | python convert_mot_to_coco.py 12 | python interp_mot.py 13 | python convert_mot_det_to_results.py -------------------------------------------------------------------------------- /src/tools/interp_mot.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import defaultdict 3 | import pycocotools.coco as coco 4 | import copy 5 | 6 | SPLITS = ['train_half', 'train'] 7 | 8 | def interpolate(track, start_ann, start_ind, end_ann, end_ind, max_id, frames): 9 | print('interpolating from %d to %d' % (start_ind, end_ind)) 10 | start_box = start_ann['bbox'] 11 | end_box = end_ann['bbox'] 12 | len_occl = end_ind - start_ind 13 | x_step = (end_box[0] - start_box[0]) / len_occl 14 | y_step = (end_box[1] - start_box[1]) / len_occl 15 | step = 1 16 | for i in range(start_ind + 1, end_ind): 17 | new_ann = copy.deepcopy(start_ann) 18 | new_ann['bbox'][0] += step * x_step 19 | new_ann['bbox'][1] += step * y_step 20 | new_ann['occlusion'] = 0.01 21 | new_ann['id'] = max_id 22 | new_ann['image_id'] = frames[i]['id'] 23 | max_id += 1 24 | track[i] = new_ann 25 | step += 1 26 | 27 | return max_id 28 | 29 | def process_video(frames, dataset, max_id): 30 | tracks = {} 31 | for i, frame in enumerate(frames): 32 | invis_count = 0 33 | occl_count = 0 34 | ann_ids = dataset.getAnnIds(imgIds=[frame['id']]) 35 | anns = dataset.loadAnns(ids=ann_ids) 36 | for ann in anns: 37 | track_id = ann['track_id'] 38 | ann['occlusion'] = 1 39 | if track_id not in tracks: 40 | tracks[track_id] = [None] * len(frames) 41 | tracks[track_id][i] = ann 42 | 43 | for track_id in tracks.keys(): 44 | track = tracks[track_id] 45 | last_seen = None 46 | start_ind = None 47 | in_occl = False 48 | for i, ann in enumerate(track): 49 | if ann is not None and in_occl: 50 | max_id = interpolate(track, last_seen, start_ind, ann, i, max_id, frames) 51 | in_occl = False 52 | 53 | if ann is not None: 54 | last_seen = ann 55 | start_ind = i 56 | if ann is None and last_seen is not None: 57 | in_occl = True 58 | 59 | annotations = [] 60 | for track_id in tracks.keys(): 61 | track = tracks[track_id] 62 | for i, ann in enumerate(track): 63 | if ann is not None: 64 | annotations.append(ann) 65 | 66 | return annotations, max_id 67 | 68 | 69 | if __name__ == '__main__': 70 | for split in SPLITS: 71 | data = json.load(open('../../data/mot17/annotations/%s.json' % split)) 72 | coco_anns = coco.COCO('../../data/mot17/annotations/%s.json' % split) 73 | 74 | max_id = -1 75 | for ann in data['annotations']: 76 | if ann['id'] > max_id: 77 | max_id = ann['id'] 78 | 79 | max_id += 1 80 | 81 | video_to_images = defaultdict(list) 82 | video_to_image_map = {} 83 | for image in coco_anns.dataset['images']: 84 | video_to_images[image['video_id']].append(image) 85 | 86 | for vid_id in video_to_images.keys(): 87 | images = video_to_images[vid_id] 88 | images.sort(key=lambda x: x['frame_id']) 89 | video_to_images[vid_id] = images 90 | 91 | annotations = [] 92 | for vid_id in video_to_images.keys(): 93 | annotations_vid, max_id = process_video(video_to_images[vid_id], coco_anns, max_id) 94 | annotations.extend(annotations_vid) 95 | 96 | data['annotations'] = annotations 97 | 98 | json.dump(data, open('../../data/mot17/annotations/%s_interp.json' % split, 'w')) -------------------------------------------------------------------------------- /src/tools/remove_optimizers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | IN_PATH = '../../centertrack_models/' 4 | OUT_PATH = '../../models/' 5 | REMOVE_KEYS = ['base.fc'] 6 | 7 | if __name__ == '__main__': 8 | models = sorted(os.listdir(IN_PATH)) 9 | for model in models: 10 | model_path = IN_PATH + model 11 | print(model) 12 | data = torch.load(model_path) 13 | state_dict = data['state_dict'] 14 | keys = state_dict.keys() 15 | delete_keys = [] 16 | for k in keys: 17 | should_delete = False 18 | for remove_key in REMOVE_KEYS: 19 | if remove_key in k: 20 | should_delete = True 21 | if should_delete: 22 | delete_keys.append(k) 23 | for k in delete_keys: 24 | print('delete ', k) 25 | del state_dict[k] 26 | out_data = {'epoch': data['epoch'], 'state_dict': state_dict} 27 | torch.save(out_data, OUT_PATH + model) 28 | -------------------------------------------------------------------------------- /src/tools/vis_tracking_kitti.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | import glob 5 | import sys 6 | from collections import defaultdict 7 | from pathlib import Path 8 | 9 | DATA_PATH = '../../data/kitti_tracking/' 10 | IMG_PATH = DATA_PATH + 'data_tracking_image_2/testing/image_02/' 11 | SAVE_VIDEO = False 12 | IS_GT = False 13 | 14 | cats = ['Pedestrian', 'Car', 'Cyclist'] 15 | cat_ids = {cat: i for i, cat in enumerate(cats)} 16 | COLORS = [(255, 0, 255), (122, 122, 255), (255, 0, 0)] 17 | 18 | def draw_bbox(img, bboxes, c=(255, 0, 255)): 19 | for bbox in bboxes: 20 | color = COLORS[int(bbox[5])] 21 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), 22 | (int(bbox[2]), int(bbox[3])), 23 | color, 2, lineType=cv2.LINE_AA) 24 | ct = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2] 25 | txt = '{}'.format(int(bbox[4])) 26 | cv2.putText(img, txt, (int(ct[0]), int(ct[1])), 27 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, 28 | color, thickness=1, lineType=cv2.LINE_AA) 29 | 30 | if __name__ == '__main__': 31 | seqs = os.listdir(IMG_PATH) 32 | if SAVE_VIDEO: 33 | save_path = sys.argv[1][:sys.argv[1].rfind('/res')] + '/video' 34 | if not os.path.exists(save_path): 35 | os.mkdir(save_path) 36 | print('save_video_path', save_path) 37 | for seq in sorted(seqs): 38 | print('seq', seq) 39 | if '.DS_Store' in seq: 40 | continue 41 | # if SAVE_VIDEO: 42 | # fourcc = cv2.VideoWriter_fourcc(*'XVID') 43 | # video = cv2.VideoWriter( 44 | # '{}/{}.avi'.format(save_path, seq),fourcc, 10.0, (1024, 750)) 45 | 46 | 47 | preds = {} 48 | for K in range(1, len(sys.argv)): 49 | pred_path = sys.argv[K] + '/{}.txt'.format(seq) 50 | pred_file = open(pred_path, 'r') 51 | preds[K] = defaultdict(list) 52 | for line in pred_file: 53 | tmp = line[:-1].split(' ') 54 | frame_id = int(tmp[0]) 55 | track_id = int(tmp[1]) 56 | cat_id = cat_ids[tmp[2]] 57 | bbox = [float(tmp[6]), float(tmp[7]), float(tmp[8]), float(tmp[9])] 58 | score = float(tmp[17]) 59 | preds[K][frame_id].append(bbox + [track_id, cat_id, score]) 60 | 61 | images_path = '{}/{}/'.format(IMG_PATH, seq) 62 | images = os.listdir(images_path) 63 | num_images = len([image for image in images if 'png' in image]) 64 | 65 | for i in range(num_images): 66 | frame_id = i 67 | file_path = '{}/{:06d}.png'.format(images_path, i) 68 | img = cv2.imread(file_path) 69 | for K in range(1, len(sys.argv)): 70 | img_pred = img.copy() 71 | draw_bbox(img_pred, preds[K][frame_id]) 72 | cv2.imshow('pred{}'.format(K), img_pred) 73 | cv2.waitKey() 74 | # if SAVE_VIDEO: 75 | # video.write(img_pred) 76 | # if SAVE_VIDEO: 77 | # video.release() 78 | -------------------------------------------------------------------------------- /src/tools/vis_tracking_mot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | import glob 5 | import sys 6 | from collections import defaultdict 7 | from pathlib import Path 8 | 9 | GT_PATH = '../../data/mot17/test/' 10 | IMG_PATH = GT_PATH 11 | SAVE_VIDEO = True 12 | RESIZE = 2 13 | IS_GT = False 14 | 15 | def draw_bbox(img, bboxes, c=(255, 0, 255)): 16 | for bbox in bboxes: 17 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), 18 | (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])), 19 | c, 2, lineType=cv2.LINE_AA) 20 | ct = [bbox[0] + bbox[2] / 2, bbox[1] + bbox[3] / 2] 21 | txt = '{}'.format(bbox[4]) 22 | cv2.putText(img, txt, (int(ct[0]), int(ct[1])), 23 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, 24 | (255, 122, 255), thickness=1, lineType=cv2.LINE_AA) 25 | 26 | if __name__ == '__main__': 27 | seqs = os.listdir(GT_PATH) 28 | if SAVE_VIDEO: 29 | save_path = sys.argv[1][:sys.argv[1].rfind('/res')] + '/video' 30 | if not os.path.exists(save_path): 31 | os.mkdir(save_path) 32 | print('save_video_path', save_path) 33 | for seq in sorted(seqs): 34 | print('seq', seq) 35 | # if len(sys.argv) > 2 and not sys.argv[2] in seq: 36 | # continue 37 | if '.DS_Store' in seq: 38 | continue 39 | # if SAVE_VIDEO: 40 | # fourcc = cv2.VideoWriter_fourcc(*'XVID') 41 | # video = cv2.VideoWriter( 42 | # '{}/{}.avi'.format(save_path, seq),fourcc, 10.0, (1024, 750)) 43 | seq_path = '{}/{}/'.format(GT_PATH, seq) 44 | if IS_GT: 45 | ann_path = seq_path + 'gt/gt.txt' 46 | else: 47 | ann_path = seq_path + 'det/det.txt' 48 | anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',') 49 | print('anns shape', anns.shape) 50 | image_to_anns = defaultdict(list) 51 | for i in range(anns.shape[0]): 52 | if (not IS_GT) or (int(anns[i][6]) == 1 and float(anns[i][8]) >= 0.25): 53 | frame_id = int(anns[i][0]) 54 | track_id = int(anns[i][1]) 55 | bbox = (anns[i][2:6] / RESIZE).tolist() 56 | image_to_anns[frame_id].append(bbox + [track_id]) 57 | 58 | image_to_preds = {} 59 | for K in range(1, len(sys.argv)): 60 | image_to_preds[K] = defaultdict(list) 61 | pred_path = sys.argv[K] + '/{}.txt'.format(seq) 62 | try: 63 | preds = np.loadtxt(pred_path, dtype=np.float32, delimiter=',') 64 | except: 65 | preds = np.loadtxt(pred_path, dtype=np.float32, delimiter=' ') 66 | for i in range(preds.shape[0]): 67 | frame_id = int(preds[i][0]) 68 | track_id = int(preds[i][1]) 69 | bbox = (preds[i][2:6] / RESIZE).tolist() 70 | image_to_preds[K][frame_id].append(bbox + [track_id]) 71 | 72 | img_path = seq_path + 'img1/' 73 | images = os.listdir(img_path) 74 | num_images = len([image for image in images if 'jpg' in image]) 75 | 76 | for i in range(num_images): 77 | frame_id = i + 1 78 | file_name = '{}/img1/{:06d}.jpg'.format(seq, i + 1) 79 | file_path = IMG_PATH + file_name 80 | img = cv2.imread(file_path) 81 | if RESIZE != 1: 82 | img = cv2.resize(img, (img.shape[1] // RESIZE, img.shape[0] // RESIZE)) 83 | for K in range(1, len(sys.argv)): 84 | img_pred = img.copy() 85 | draw_bbox(img_pred, image_to_preds[K][frame_id]) 86 | cv2.imshow('pred{}'.format(K), img_pred) 87 | draw_bbox(img, image_to_anns[frame_id]) 88 | cv2.imshow('gt', img) 89 | cv2.waitKey() 90 | # if SAVE_VIDEO: 91 | # video.write(img_pred) 92 | # if SAVE_VIDEO: 93 | # video.release() 94 | -------------------------------------------------------------------------------- /tao/.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | tao.egg-info 3 | .ipynb_checkpoints 4 | cache 5 | .vscode 6 | tao/data/s3_cache 7 | .mypy_cache 8 | debug/ 9 | _internal_links.yaml 10 | _pull_internal_changes.py 11 | __pycache__ 12 | -------------------------------------------------------------------------------- /tao/LICENSE: -------------------------------------------------------------------------------- 1 | NOTE: This license applies to the code in this repository. 2 | 3 | MIT License 4 | 5 | Copyright (c) 2020 TAO Dataset 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | -------------------------------------------------------------------------------- /tao/README.md: -------------------------------------------------------------------------------- 1 | # TAO: A Large-Scale Benchmark for Tracking Any Object 2 | 3 | [[Paper](https://arxiv.org/abs/2005.10356)] [[Website](http://taodataset.org)] 4 | 5 | [Achal Dave](http://www.achaldave.com/), [Tarasha Khurana](http://www.cs.cmu.edu/~tkhurana/), [Pavel Tokmakov](https://pvtokmakov.github.io/home/), [Cordelia Schmid](https://thoth.inrialpes.fr/~schmid/), [Deva Ramanan](http://www.cs.cmu.edu/~deva/) 6 | 7 | ## Latest updates 8 | 9 | - \[2020.07.10\]: The ECCV challenge is now live at the 10 | [MOTChallenge website](https://motchallenge.net/results/ECCV_2020_TAO_Challenge/)! 11 | See [here](docs/challenge.md) for more details. 12 | - \[2020.07.02\]: TAO was accepted to ECCV '20 as a spotlight presentation! 13 | - \[2020.02.20\]: We will be hosting a workshop and challenge at ECCV'20. See [here](http://taodataset.org/workshop/) for details. 14 | 15 | ## Setup 16 | 17 | 1. Clone this repo 18 | ``` 19 | git clone https://github.com/TAO-Dataset/tao 20 | ``` 21 | 1. Install TAO toolkit: 22 | ``` 23 | pip install git+https://github.com/TAO-Dataset/tao 24 | ``` 25 | 26 | ## Download dataset 27 | 28 | See [download instructions](./docs/download.md). 29 | 30 | ## Challenge 31 | 32 | We will be hosting a challenge at our 33 | [ECCV '20 workshop](taodataset.org/workshop/). See [here](docs/challenge.md) for details. 34 | 35 | ## Evaluation 36 | 37 | See [evaluation information](./docs/evaluation.md). Contains information on submitting to the challenge server. 38 | 39 | ## Run baseline trackers 40 | 41 | See [tracker instructions](./docs/trackers.md). 42 | 43 | ## Questions? 44 | 45 | Please see the [faqs](./docs/faqs.md) to check if we've anticipated your 46 | question. If not, for questions about TAO usage or the challenge, please use 47 | this Google Group: https://groups.google.com/forum/#!forum/tao-dataset/ 48 | 49 | For bug reports regarding the toolkit, annotations, or image download, please 50 | file an issue in this repository. 51 | 52 | -------------------------------------------------------------------------------- /tao/docs/challenge.md: -------------------------------------------------------------------------------- 1 | # TAO ECCV'20 Multi-Object Tracking Challenge 2 | 3 | We are excited to host a challenge on TAO as part of our 4 | [ECCV workshop](http://taodataset.org/workshop/). 5 | The challenge is hosted on the [motchallenge.net](https://motchallenge.net/) website: 6 | [link](https://motchallenge.net/results/ECCV_2020_TAO_Challenge/). 7 | 8 | ## Important Dates 9 | 10 | - July 10: Challenge released! 11 | - August 16: Challenge closes, winners contacted to prepare presentation for ECCV workshop. 12 | - August 23: ECCV workshop date. Challenge results announced, along with 13 | presentations by challenge submission authors. 14 | 15 | ## Prizes 16 | 17 | We will have the following prizes for the winning entries! 18 | 19 | - First place: $1,500 cash prize, presentation at ECCV workshop. 20 | - Second place: $500 cash prize, presentation at ECCV workshop. 21 | - Honorable mention(s): $250 cash prize, presentation at ECCV workshop. 22 | 23 | ## Protocol 24 | 25 | - **Evaluation data**: The ECCV '20 challenge evaluates multi-object tracking 26 | on the TAO test set. 27 | 28 | - **Training data**: We do not impose any restrictions on the training data used for 29 | submissions, except that the TAO test videos may not be used for training in any way. 30 | This explicitly precludes, for example, unsupervised training on the TAO test set. 31 | However, the TAO validation videos may be used for training in a supervised or 32 | unsupervised manner. 33 | We encourage training on the LVIS v0.5 dataset, which provides 34 | ample detection training data for categories evaluated in TAO. 35 | 36 | - **WARNING**: The TAO test set contains sequences from existing datasets, which 37 | must be excluded from training. These sequences can be seen from the test 38 | json. In particular, a number of LaSOT training sequences are present in the TAO 39 | test set. 40 | 41 | - For submission instructions, see [evaluation.md](evaluation.md). 42 | 43 | 44 | ## FAQs 45 | 46 | Please see [faqs.md](./faqs.md). 47 | -------------------------------------------------------------------------------- /tao/docs/detector_train.md: -------------------------------------------------------------------------------- 1 | # Training your own detectors 2 | 3 | To train your own detectors, follow the steps below: 4 | 5 | 1. Download the LVIS v0.5 annotations and (LVIS v0.5 + COCO) training 6 | annotations from 7 | [here](https://drive.google.com/file/d/1rPSSIVSer7pweyJS-uqAfIF59uZVJ0Nx/view), 8 | and extract them to `./data/detectron_datasets/lvis-coco`. 9 | 10 | 1. Setup [detectron2](https://github.com/facebookresearch/detectron2). 11 | 12 | 1. Download the COCO `train2017` and `val2017` datasets, and link them to: 13 | 14 | ``` 15 | ./data/detectron_datasets/lvis-coco/train2017 16 | ./data/detectron_datasets/lvis-coco/val2017 17 | ``` 18 | 19 | 1. Use the provided `./scripts/detectors/detectron2_train_net.py` script to 20 | train your detector. 21 | 22 | ``` 23 | python scripts/detectors/detectron2_train_net.py \ 24 | --num-gpus 8 \ 25 | --config-file ./data/configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml \ 26 | DATASETS.TRAIN "('lvis_v0.5_coco_2017_train', )" \ 27 | OUTPUT_DIR /path/to/output-dir 28 | ``` 29 | 30 | This script was tested with detectron2 commit id 31 | fd87af71eebc660dde2f50e4693869bb04f66015. 32 | 33 | -------------------------------------------------------------------------------- /tao/docs/download.md: -------------------------------------------------------------------------------- 1 | # Download TAO 2 | 3 | TAO contains videos from 7 datasets: ArgoVerse, AVA, BDD-100k, Charades, HACS, 4 | LaSOT, and YFCC-100M. 5 | 6 | 1. Download TAO train, val and test sets from the MOTChallenge 7 | [download page](https://motchallenge.net/tao_download.php). 8 | 9 | 1. Uncompress each downloaded file in a single directory, which we will refer to as 10 | `$TAO_DIR`. 11 |
The directory should have the following structure:

12 | 13 | ```bash 14 | └── frames 15 | └── train 16 | ├── ArgoVerse 17 | ├── BDD 18 | ├── Charades 19 | ├── HACS 20 | ├── LaSOT 21 | └── YFCC100M 22 | ``` 23 |

24 | 25 | 1. Download annotations: 26 | 27 | ```bash 28 | python scripts/download/download_annotations.py $TAO_DIR --split train 29 | ``` 30 | 31 | 1. Verify that the dataset was downloaded correctly: 32 | 33 | ```bash 34 | python scripts/download/verify.py $TAO_ROOT --split train 35 | ``` 36 | ## Request video deletion 37 | 38 | If you would like to request a video be deleted from TAO (e.g., because you are 39 | featured in the video or you own the rights), please email me at 40 | achald@cs.cmu.edu. 41 | 42 | -------------------------------------------------------------------------------- /tao/docs/download_hacs_alt.md: -------------------------------------------------------------------------------- 1 | Download and extract from YouTube. 2 | 3 | ``` 4 | python scripts/download/download_hacs.py $TAO_ROOT --split train 5 | ``` 6 | 7 | You can ignore YoutubeDL errors that are printed by this script (e.g., Video not 8 | available). Videos that could not be downloaded will be collected in 9 | `$TAO_ROOT/hacs_missing/missing.txt`. You can request the original HACS videos 10 | by filling out these forms: https://forms.gle/hZD612H5TXDQDozv9 11 | -------------------------------------------------------------------------------- /tao/docs/evaluation.md: -------------------------------------------------------------------------------- 1 | # Evaluating Trackers 2 | 3 | ## Results format 4 | 5 | The TAO toolkit expects results in the same format as COCO, but with additional 6 | `track_id` and `video_id` fields. Specifically, `results.json` should have the 7 | following format: 8 | 9 | ``` 10 | [{ 11 | "image_id" : int, 12 | "category_id" : int, 13 | "bbox" : [x,y,width,height], 14 | "score" : float, 15 | "track_id": int, 16 | "video_id": int 17 | }] 18 | ``` 19 | 20 | 21 | ## Evaluation (toolkit) 22 | 23 | The TAO toolkit provides code for evaluating tracker results. 24 | 25 | ```python 26 | import logging 27 | from tao.toolkit.tao import TaoEval 28 | 29 | # TAO uses logging to print results. Make sure logging is set to show INFO 30 | # messages, or you won't see any evaluation results. 31 | logging.setLevel(logging.INFO) 32 | tao_eval = TaoEval('/path/to/annotations.json', '/path/to/results.json') 33 | tao_eval.run() 34 | tao_eval.print_results() 35 | ``` 36 | 37 | ## Evaluation (command-line) 38 | 39 | TAO also comes with a higher-level `evaluate.py` script which incorporates 40 | various additional features for evaluation. 41 | 42 | In all the examples below, let - 43 | - `$ANNOTATIONS` be the `/path/to/annotations.json` 44 | - `$RESULTS` be the `/path/to/results.json` 45 | - `$OUTPUT_DIR` be the `/path/to/output/logdir`. 46 | 47 | We demonstrate some features below; for more, take a look at the config 48 | description in [`./tao/utils/evaluation.py`](/tao/utils/evaluation.py). 49 | 50 | - Simple evaluation, with logging to an output directory 51 | 52 | ```bash 53 | python scripts/evaluate.py \ 54 | $ANNOTATIONS $RESULTS --output-dir $OUTPUT_DIR \ 55 | ``` 56 | 57 | -
Classification oracle

58 | 59 | ```bash 60 | python scripts/evaluate.py \ 61 | $ANNOTATIONS $RESULTS --output-dir $OUTPUT_DIR \ 62 | --config-updates ORACLE.TYPE class 63 | ``` 64 |

65 | 66 | -
Track oracle (for linking detections)

67 | 68 | ```bash 69 | python scripts/evaluate.py \ 70 | $ANNOTATIONS $RESULTS --output-dir $OUTPUT_DIR \ 71 | --config-updates ORACLE.TYPE track 72 | ``` 73 |

74 | 75 | -
Evaluate MOTA

76 | 77 | ```bash 78 | python scripts/evaluate.py \ 79 | $ANNOTATIONS $RESULTS --output-dir $OUTPUT_DIR \ 80 | --config-updates MOTA.ENABLED True 81 | ``` 82 |

83 | 84 | -
Evaluate at (3D) IoU threshold of 0.9

85 | 86 | ```bash 87 | python scripts/evaluate.py \ 88 | $ANNOTATIONS $RESULTS --output-dir $OUTPUT_DIR \ 89 | --config-updates EVAL_IOUS "[0.9]" 90 | ``` 91 |

92 | 93 | -
Evaluate at multiple (3D) IoU thresholds

94 | 95 | ```bash 96 | python scripts/evaluate.py \ 97 | $ANNOTATIONS $RESULTS --output-dir $OUTPUT_DIR \ 98 | --config-updates \ 99 | EVAL_IOUS "[0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]" 100 | ``` 101 |

102 | 103 | -
Category agnostic evaluation

104 | 105 | ```bash 106 | python scripts/evaluate.py \ 107 | $ANNOTATIONS $RESULTS --output-dir $OUTPUT_DIR \ 108 | --config-updates CATEGORY_AGNOSTIC True 109 | ``` 110 |

111 | 112 | -
Report evaluation by source dataset

113 | 114 | ```bash 115 | python scripts/evaluate.py \ 116 | $ANNOTATIONS $RESULTS --output-dir $OUTPUT_DIR \ 117 | --config-updates EVAL_BY_DATASET True 118 | ``` 119 |

120 | 121 | ## Evaluation (challenge server) 122 | 123 | For local evaluation, evaluate with steps above on the released validation 124 | set. When submitting test set results to the 125 | [challenge server](https://motchallenge.net/login/), follow same format for 126 | json files as mentioned above. 127 | 128 | The server requires you to submit train, validation and test set results. 129 | We request you to submit these three json files for facilitating progress in 130 | the tracking community. However, if absolutely necessary, submit empty json 131 | files for train and validation. Create a .zip archive that deflates into the 132 | following files 133 | 134 | ```bash 135 | ./TAO_test.json 136 | ./TAO_train.json 137 | ./TAO_val.json 138 | ``` 139 | -------------------------------------------------------------------------------- /tao/docs/faqs.md: -------------------------------------------------------------------------------- 1 | # Frequently asked questions 2 | 3 | 1. Why does the training set only contain 216 LVIS categories? 4 | 5 | TAO contains a total of 482 LVIS categories. However, not all categories 6 | are present in the train, val, and test sets. Instead, we encourage researchers to 7 | train detectors on the LVIS v0.5 dataset, which contains a superset of 8 | the 482 categories, and trackers on existing single-object tracking datasets. 9 | TAO is primarily a benchmark dataset, but we provide a small set of training videos 10 | for tuning trackers. 11 | 12 | 1. Why do the LVIS v1 dataset categories not match with the TAO categories? 13 | 14 | Tao was constructed to be aligned with the LVIS v0.5 dataset. The LVIS v1 update 15 | changes the category names and ids in the LVIS dataset. We are looking into updating 16 | TAO to use the LVIS v1 categories. For now, you may either train on the LVIS v0.5 17 | dataset, or construct your own mapping from LVIS v1 categories to TAO categories 18 | using the 'synset' field. 19 | 20 | 1. Is there any restriction on which data I can train on? 21 | 22 | The only restriction is that you may not train on videos in the TAO test set. 23 | You can see a list of videos in the TAO test set from the test set json file 24 | shared with the annotations. In particular, a number of LaSOT training videos 25 | are in the TAO test set, and must not be used for training. 26 | 27 | Apart from this, there are currently no restrictions on training datasets. 28 | 29 | 1. Are only LVIS categories evaluated in TAO? 30 | 31 | Currently (as of July 2020), we are focusing on the LVIS categories within TAO. 32 | The ECCV challenge will only evaluate on these categories. We intend to formalize 33 | a protocol for evaluation on the non-LVIS categories later this year. 34 | 35 | 1. Is there a single-object tracking track in the ECCV '20 challenge? 36 | 37 | Currently, there is no single-object / user-initialized tracking track in 38 | the challenge. We are looking into ways to host a challenge for user-initialized 39 | tracking on held out data (e.g., by asking researchers to submit code which we run 40 | locally on the held out test set). If you have any suggestions or 41 | feedback, please contact us! 42 | -------------------------------------------------------------------------------- /tao/docs/manual_download.md: -------------------------------------------------------------------------------- 1 | These are alternative instructions that mimic the helper script in 2 | [scripts/download/download_helper.py](/scripts/download/download_helper.py), 3 | in case the helper script causes issues. Please read 4 | [./download.md](./download.md) first. 5 | 6 | 1. Download TAO annotations to $TAO_DIR 7 | 8 | ``` 9 | wget 'https://github.com/TAO-Dataset/annotations/archive/v1.0.tar.gz' 10 | tar xzvf v1.0.tar.gz 11 | mv annotations-v1.0 annotations 12 | ``` 13 | 14 | 1. Extract frames from BDD, Charades, HACS and YFCC-100M. 15 | 16 | ``` 17 | python scripts/download/extract_frames.py $TAO_ROOT --split train 18 | ``` 19 |
After this, your directory should have the following structure:

20 | 21 | ``` 22 | ├── frames 23 | │ └── train 24 | │ ├── ArgoVerse 25 | │ ├── BDD 26 | │ ├── Charades 27 | │ ├── HACS 28 | │ ├── LaSOT 29 | │ └── YFCC100M 30 | └── videos 31 | └── train 32 | ├── BDD 33 | ├── Charades 34 | ├── HACS 35 | └── YFCC100M 36 | ``` 37 |

38 | 39 | 1. Download and extract frames from AVA: 40 | 41 | ``` 42 | python scripts/download/download_ava.py $TAO_ROOT --split train 43 | ``` 44 | 45 | 1. Finally, you can verify that you have downloaded TAO. 46 | 47 |
Expected directory structure

48 | 49 | ``` 50 | ├── frames 51 | │ └── train 52 | │ ├── ArgoVerse 53 | │ ├── AVA 54 | │ ├── BDD 55 | │ ├── Charades 56 | │ ├── HACS 57 | │ ├── LaSOT 58 | │ └── YFCC100M 59 | └── videos 60 | └── train 61 | ├── BDD 62 | ├── Charades 63 | └── YFCC100M 64 | ``` 65 |

66 | 67 | You can run the following command to check that TAO was properly extracted: 68 | 69 | ``` 70 | python scripts/download/verify.py $TAO_ROOT --split train 71 | ``` 72 | -------------------------------------------------------------------------------- /tao/docs/trackers.md: -------------------------------------------------------------------------------- 1 | # Running trackers on TAO 2 | 3 | ## SORT 4 | 5 | Here, we will reproduce a simpler variant of the SORT result presented in TAO. 6 | Specifically, we will reproduce the following row from Table 13 in our 7 | supplementary material. 8 | 9 | | NMS Thresh | Det / image | Det score | `max_age` | `min_hits` | `min_iou` | Track mAP | 10 | | ---------- | ----------- | --------- | --------- | ---------- | --------- | --------- | 11 | | 0.5 | 300 | 0.0005 | 100 | 1 | 0.1 | 11.3 | 12 | 13 | ### Run detectors 14 | 15 | 1. Download and decompress the detection model and config from [here](https://drive.google.com/file/d/13BdXSQDqK0t-LrF2CrwJtT9lFc48u83H/view?usp=sharing) or [here](https://cdn3.vision.in.tum.de/~tao/baselines/detector-r101-fpn-1x-lvis-coco.zip) to 16 | `$DETECTRON_MODEL`. 17 | 18 | If you would like to re-train the detector, please see [this doc](./detector_train.md). 19 | 20 | 1. Setup and install 21 | [detectron2](https://github.com/facebookresearch/detectron2) 22 | 1. Run the detector on TAO: 23 | 24 | ``` 25 | python scripts/detectors/detectron2_infer.py \ 26 | --gpus 0 1 2 3 \ 27 | --root $TAO_ROOT/train \ 28 | --output /path/to/detectron2/output/train \ 29 | --config $DETECTRON_MODEL/config.yaml \ 30 | --opts MODEL.WEIGHTS $DETECTRON_MODEL/model_final.pth 31 | ``` 32 | 33 | On a machine with 4 2080TIs, the above took about 8 hours to run on the 34 | train set. 35 | 36 | ### Run [SORT](https://github.com/abewley/sort) 37 | 38 | ``` 39 | python scripts/trackers/sort/track.py \ 40 | --detections-dir /path/to/detectron2/output/train \ 41 | --annotations $TAO_ROOT/annotations/train.json \ 42 | --output-dir /path/to/sort/output/train \ 43 | --workers 8 44 | ``` 45 | 46 | On our machine, the above took about 11 hours to run on the train set. 47 | 48 | ### Evaluate 49 | 50 | ``` 51 | python scripts/evaluation/evaluate.py \ 52 | $TAO_ROOT/annotations/train.json \ 53 | /path/to/sort/output/train/results.json 54 | ``` 55 | 56 | This should report an AP of 11.3. 57 | -------------------------------------------------------------------------------- /tao/scripts/detectors/detectron2_infer.py: -------------------------------------------------------------------------------- 1 | # Modified from detectron2/demo/demo.py 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import argparse 5 | import logging 6 | import os 7 | import pickle 8 | from pathlib import Path 9 | 10 | import numpy as np 11 | import torch 12 | from detectron2.config import get_cfg 13 | from detectron2.data.detection_utils import read_image 14 | from detectron2.engine.defaults import DefaultPredictor 15 | from pycocotools import mask 16 | from script_utils.common import common_setup 17 | from tqdm import tqdm 18 | 19 | from tao.utils.parallel.fixed_gpu_pool import FixedGpuPool 20 | 21 | 22 | def init_model(init_args, context): 23 | os.environ['CUDA_VISIBLE_DEVICES'] = str(context['gpu']) 24 | context['predictor'] = DefaultPredictor(init_args['config']) 25 | 26 | 27 | def infer(kwargs, context): 28 | predictor = context['predictor'] 29 | image_path = kwargs['image_path'] 30 | output_path = kwargs['output_path'] 31 | img = read_image(str(image_path), format="BGR") 32 | 33 | predictions = predictor(img) 34 | predictions = predictions["instances"].get_fields() 35 | boxes_decoded = predictions["pred_boxes"].tensor.cpu().numpy().tolist() 36 | scores_decoded = predictions["scores"].cpu().numpy().tolist() 37 | classes_decoded = predictions["pred_classes"].cpu().numpy().tolist() 38 | masks_decoded = None 39 | if args.save_masks: 40 | masks_decoded = predictions["pred_masks"].cpu().numpy().astype(np.bool) 41 | save(boxes_decoded, scores_decoded, classes_decoded, masks_decoded, 42 | output_path) 43 | 44 | 45 | def save(boxes_decoded, scores_decoded, classes_decoded, masks_decoded, 46 | results_path): 47 | predictions_decoded = {} 48 | predictions_decoded["instances"] = { 49 | "pred_boxes": boxes_decoded, 50 | "scores": scores_decoded, 51 | "pred_classes": classes_decoded, 52 | } 53 | if masks_decoded is not None: 54 | rles = mask.encode( 55 | np.array(masks_decoded.transpose((1, 2, 0)), 56 | order='F', 57 | dtype=np.uint8)) 58 | for rle in rles: 59 | rle["counts"] = rle["counts"].decode("utf-8") 60 | predictions_decoded['instances']['pred_masks'] = rles 61 | with open(results_path, 'wb') as f: 62 | pickle.dump(predictions_decoded, f) 63 | 64 | 65 | def setup_cfg(args): 66 | # load config from file and command-line arguments 67 | cfg = get_cfg() 68 | cfg.merge_from_file(args.config_file) 69 | cfg.merge_from_list(args.opts) 70 | if not args.save_masks: 71 | cfg.MODEL.MASK_ON = False 72 | cfg.freeze() 73 | return cfg 74 | 75 | 76 | def get_parser(): 77 | parser = argparse.ArgumentParser(description="Detectron2 Demo") 78 | parser.add_argument("--root", required=True, type=Path) 79 | parser.add_argument("--output", 80 | required=True, 81 | type=Path, 82 | help="Directory to save output pickles.") 83 | parser.add_argument("--config-file", 84 | required=True, 85 | type=Path, 86 | help="path to config file") 87 | parser.add_argument('--gpus', default=[0], nargs='+', type=int) 88 | parser.add_argument( 89 | "--opts", 90 | help="Modify model config options using the command-line", 91 | default=[], 92 | nargs=argparse.REMAINDER) 93 | parser.add_argument( 94 | '--save-masks', default=False, action='store_true') 95 | return parser 96 | 97 | 98 | if __name__ == "__main__": 99 | args = get_parser().parse_args() 100 | Path(args.output).mkdir(exist_ok=True, parents=True) 101 | common_setup(__file__, args.output, args) 102 | # Prevent detectron from flooding terminal with messages. 103 | logging.getLogger('detectron2.checkpoint.c2_model_loading').setLevel( 104 | logging.WARNING) 105 | logging.getLogger('fvcore.common.checkpoint').setLevel( 106 | logging.WARNING) 107 | logger = logging.root 108 | 109 | cfg = setup_cfg(args) 110 | 111 | threads_per_worker = 4 112 | torch.set_num_threads(threads_per_worker) 113 | os.environ['OMP_NUM_THREADS'] = str(threads_per_worker) 114 | 115 | all_files = args.root.rglob('*.jpg') 116 | 117 | # Arguments to init_model() 118 | init_args = {'config': cfg} 119 | 120 | # Tasks to pass to infer() 121 | infer_tasks = [] 122 | for path in tqdm(all_files, 123 | mininterval=1, 124 | dynamic_ncols=True, 125 | desc='Collecting frames'): 126 | relative = path.relative_to(args.root) 127 | output_pkl = (args.output / relative).with_suffix('.pkl') 128 | if output_pkl.exists(): 129 | continue 130 | output_pkl.parent.mkdir(exist_ok=True, parents=True) 131 | infer_tasks.append({'image_path': path, 'output_path': output_pkl}) 132 | 133 | if len(args.gpus) == 1: 134 | context = {'gpu': args.gpus[0]} 135 | init_model(init_args, context) 136 | for task in tqdm(infer_tasks, 137 | mininterval=1, 138 | desc='Running detector', 139 | dynamic_ncols=True): 140 | infer(task, context) 141 | else: 142 | pool = FixedGpuPool( 143 | args.gpus, initializer=init_model, initargs=init_args) 144 | list( 145 | tqdm(pool.imap_unordered(infer, infer_tasks), 146 | total=len(infer_tasks), 147 | mininterval=10, 148 | desc='Running detector', 149 | dynamic_ncols=True)) 150 | -------------------------------------------------------------------------------- /tao/scripts/detectors/merge_coco_with_lvis.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import itertools 3 | import json 4 | import logging 5 | from pathlib import Path 6 | 7 | import numpy as np 8 | from pycocotools.coco import COCO 9 | import pycocotools.mask as mask_util 10 | from script_utils.common import common_setup 11 | from tqdm import tqdm 12 | 13 | 14 | ROOT = Path(__file__).resolve().parent.parent.parent 15 | 16 | 17 | def main(): 18 | # Use first line of file docstring as description if it exists. 19 | parser = argparse.ArgumentParser( 20 | description=__doc__.split('\n')[0] if __doc__ else '', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | parser.add_argument('--lvis', type=Path, required=True) 23 | parser.add_argument('--coco', type=Path, required=True) 24 | parser.add_argument('--mapping', 25 | type=Path, 26 | default=ROOT / 'data/lvis_coco_to_synset.json') 27 | parser.add_argument('--output-json', 28 | type=Path, 29 | required=True) 30 | parser.add_argument( 31 | '--iou-thresh', 32 | default=0.7, 33 | type=float, 34 | help=('If a COCO annotation overlaps with an LVIS annotations with ' 35 | 'IoU over this threshold, we use only the LVIS annotation.')) 36 | 37 | args = parser.parse_args() 38 | args.output_json.parent.mkdir(exist_ok=True, parents=True) 39 | common_setup(args.output_json.name + '.log', args.output_json.parent, args) 40 | 41 | coco = COCO(args.coco) 42 | lvis = COCO(args.lvis) 43 | 44 | synset_to_lvis_id = {x['synset']: x['id'] for x in lvis.cats.values()} 45 | coco_to_lvis_category = {} 46 | with open(args.mapping, 'r') as f: 47 | name_mapping = json.load(f) 48 | for category in coco.cats.values(): 49 | mapped = name_mapping[category['name']] 50 | assert mapped['coco_cat_id'] == category['id'] 51 | synset = mapped['synset'] 52 | if synset not in synset_to_lvis_id: 53 | logging.debug( 54 | f'Found no LVIS category for "{category["name"]}" from COCO') 55 | continue 56 | coco_to_lvis_category[category['id']] = synset_to_lvis_id[synset] 57 | 58 | for image_id, image in coco.imgs.items(): 59 | if image_id in lvis.imgs: 60 | coco_name = coco.imgs[image_id]['file_name'] 61 | lvis_name = lvis.imgs[image_id]['file_name'] 62 | assert coco_name in lvis_name 63 | else: 64 | logging.info( 65 | f'Image {image_id} in COCO, but not annotated in LVIS') 66 | 67 | lvis_highest_id = max(x['id'] for x in lvis.anns.values()) 68 | ann_id_generator = itertools.count(lvis_highest_id + 1) 69 | new_annotations = [] 70 | for image_id, lvis_anns in tqdm(lvis.imgToAnns.items()): 71 | if image_id not in coco.imgToAnns: 72 | logging.info( 73 | f'Image {image_id} in LVIS, but not annotated in COCO') 74 | continue 75 | 76 | coco_anns = coco.imgToAnns[image_id] 77 | # Compute IoU between coco_anns and lvis_anns 78 | # Shape (num_coco_anns, num_lvis_anns) 79 | mask_iou = mask_util.iou([coco.annToRLE(x) for x in coco_anns], 80 | [lvis.annToRLE(x) for x in lvis_anns], 81 | pyiscrowd=np.zeros(len(lvis_anns))) 82 | does_overlap = mask_iou.max(axis=1) > args.iou_thresh 83 | to_add = [] 84 | for i, ann in enumerate(coco_anns): 85 | if does_overlap[i]: 86 | continue 87 | if ann['category_id'] not in coco_to_lvis_category: 88 | continue 89 | ann['category_id'] = coco_to_lvis_category[ann['category_id']] 90 | ann['id'] = next(ann_id_generator) 91 | to_add.append(ann) 92 | new_annotations.extend(to_add) 93 | 94 | with open(args.lvis, 'r') as f: 95 | merged = json.load(f) 96 | merged['annotations'].extend(new_annotations) 97 | with open(args.output_json, 'w') as f: 98 | json.dump(merged, f) 99 | 100 | 101 | if __name__ == "__main__": 102 | main() 103 | -------------------------------------------------------------------------------- /tao/scripts/download/download_annotations.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import urllib.error 3 | import urllib.request 4 | from pathlib import Path 5 | 6 | import subprocess 7 | 8 | ANNOTATIONS_TAR_GZ = 'https://github.com/TAO-Dataset/annotations/archive/v1.1.tar.gz' 9 | 10 | 11 | def banner_log(msg): 12 | banner = '#' * len(msg) 13 | print(f'\n{banner}\n{msg}\n{banner}') 14 | 15 | 16 | def log_and_run(cmd, *args, **kwargs): 17 | print(f'Running command:\n{" ".join(cmd)}') 18 | subprocess.run(cmd, *args, **kwargs) 19 | 20 | 21 | def main(): 22 | # Use first line of file docstring as description if it exists. 23 | parser = argparse.ArgumentParser( 24 | description=__doc__.split('\n')[0] if __doc__ else '', 25 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 26 | parser.add_argument('tao_root', type=Path) 27 | parser.add_argument('--split', 28 | required=True, 29 | choices=['train', 'val', 'test']) 30 | 31 | args = parser.parse_args() 32 | 33 | assert args.tao_root.exists(), ( 34 | f'TAO_ROOT does not exist at {args.tao_root}') 35 | 36 | annotations_dir = args.tao_root / 'annotations' 37 | if annotations_dir.exists(): 38 | print(f'Annotations directory already exists; skipping.') 39 | else: 40 | annotations_compressed = args.tao_root / 'annotations.tar.gz' 41 | if not annotations_compressed.exists(): 42 | banner_log('Downloading annotations') 43 | try: 44 | urllib.request.urlretrieve(ANNOTATIONS_TAR_GZ, 45 | annotations_compressed) 46 | except urllib.error.HTTPError as e: 47 | if e.code == 404: 48 | print(f'Unable to download annotations.tar.gz. Please ' 49 | f'download it manually from\n' 50 | f'{ANNOTATIONS_TAR_GZ}\n' 51 | f'and save it to {args.tao_root}.') 52 | return 53 | raise 54 | banner_log('Extracting annotations') 55 | log_and_run([ 56 | 'tar', 'xzvf', 57 | str(annotations_compressed), '-C', 58 | str(args.tao_root) 59 | ]) 60 | (args.tao_root / 'annotations-1.1').rename(annotations_dir) 61 | 62 | 63 | if __name__ == "__main__": 64 | main() 65 | -------------------------------------------------------------------------------- /tao/scripts/download/download_cfg.yaml: -------------------------------------------------------------------------------- 1 | TAO_ANNOTATIONS: 2 | TRAIN: /data/achald/track_dataset/annotations/scale/4-18/tao-format/train_federated_lvis.json 3 | VAL: /data/achald/track_dataset/annotations/scale/4-18/tao-format/validation_federated_lvis.json 4 | CHECKSUMS: 5 | VERIFY: True 6 | PATH: /data/achald/track_dataset/annotations/scale/4-18/tao-format/with_test_unfederated/checksums.json 7 | AVA: 8 | MOVIES: 9 | # Contains symlinks to /data/all/AVA/data 10 | DIR: /scratch/achald/tao/release/ava/ 11 | LASOT: 12 | DATASET_ROOT: /ssd1/achald/lasot 13 | CREATE_SYMLINKS: True 14 | CHARADES: 15 | VIDEOS_DIR: /data/all/Charades/Charades_v1/videos 16 | BDD: 17 | VIDEOS_DIR: /data/achald/track_dataset/bdd/val/videos/val_00/ -------------------------------------------------------------------------------- /tao/scripts/download/download_helper.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import urllib.error 3 | import urllib.request 4 | from pathlib import Path 5 | 6 | import subprocess 7 | 8 | # ANNOTATIONS_TAR_GZ = 'https://github.com/TAO-Dataset/annotations/archive/v1.0.tar.gz' 9 | # Temporary URL while in beta. 10 | ANNOTATIONS_TAR_GZ = 'https://achal-public.s3.amazonaws.com/release-beta/annotations/annotations.tar.gz' 11 | 12 | 13 | def banner_log(msg): 14 | banner = '#' * len(msg) 15 | print(f'\n{banner}\n{msg}\n{banner}') 16 | 17 | 18 | def log_and_run(cmd, *args, **kwargs): 19 | print(f'Running command:\n{" ".join(cmd)}') 20 | subprocess.run(cmd, *args, **kwargs) 21 | 22 | 23 | def main(): 24 | # Use first line of file docstring as description if it exists. 25 | parser = argparse.ArgumentParser( 26 | description=__doc__.split('\n')[0] if __doc__ else '', 27 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 28 | parser.add_argument('tao_root', type=Path) 29 | parser.add_argument('--split', 30 | required=True, 31 | choices=['train', 'val', 'test']) 32 | 33 | args = parser.parse_args() 34 | 35 | assert args.tao_root.exists(), ( 36 | f'TAO_ROOT does not exist at {args.tao_root}') 37 | 38 | annotations_dir = args.tao_root / 'annotations' 39 | if annotations_dir.exists(): 40 | print(f'Annotations directory already exists; skipping.') 41 | else: 42 | annotations_compressed = args.tao_root / 'annotations.tar.gz' 43 | if not annotations_compressed.exists(): 44 | banner_log('Downloading annotations') 45 | try: 46 | urllib.request.urlretrieve(ANNOTATIONS_TAR_GZ, 47 | annotations_compressed) 48 | except urllib.error.HTTPError as e: 49 | if e.code == 404: 50 | print(f'Unable to download annotations.tar.gz. Please ' 51 | f'download it manually from\n' 52 | f'{ANNOTATIONS_TAR_GZ}\n' 53 | f'and save it to {args.tao_root}.') 54 | return 55 | raise 56 | banner_log('Extracting annotations') 57 | log_and_run([ 58 | 'tar', 'xzvf', 59 | str(annotations_compressed), '-C', 60 | str(args.tao_root) 61 | ]) 62 | (args.tao_root / 'annotations-1.0').rename(annotations_dir) 63 | 64 | banner_log("Extracting BDD, Charades, HACS, and YFCC frames") 65 | log_and_run([ 66 | 'python', 'scripts/download/extract_frames.py', 67 | str(args.tao_root), '--split', args.split 68 | ]) 69 | 70 | banner_log("Downloading AVA videos") 71 | log_and_run([ 72 | 'python', 'scripts/download/download_ava.py', 73 | str(args.tao_root), '--split', args.split 74 | ]) 75 | 76 | banner_log("Verifying TAO frames") 77 | log_and_run([ 78 | 'python', 'scripts/download/verify.py', 79 | str(args.tao_root), '--split', args.split 80 | ]) 81 | 82 | 83 | if __name__ == "__main__": 84 | main() 85 | -------------------------------------------------------------------------------- /tao/scripts/download/extract_frames.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | from collections import defaultdict 5 | from pathlib import Path 6 | 7 | from script_utils.common import common_setup 8 | 9 | from tao.utils.download import ( 10 | are_tao_frames_dumped, dump_tao_frames, remove_non_tao_frames) 11 | 12 | 13 | def main(): 14 | # Use first line of file docstring as description if it exists. 15 | parser = argparse.ArgumentParser( 16 | description=__doc__.split('\n')[0] if __doc__ else '', 17 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 18 | parser.add_argument('root', type=Path) 19 | parser.add_argument('--split', 20 | required=True, 21 | choices=['train', 'val', 'test']) 22 | parser.add_argument('--sources', 23 | default=['BDD', 'HACS', 'Charades', 'YFCC100M'], 24 | choices=['BDD', 'HACS', 'Charades', 'YFCC100M']) 25 | parser.add_argument('--workers', default=8, type=int) 26 | 27 | args = parser.parse_args() 28 | log_dir = args.root / 'logs' 29 | log_dir.mkdir(exist_ok=True, parents=True) 30 | common_setup(__file__, log_dir, args) 31 | 32 | ann_path = args.root / f'annotations/{args.split}.json' 33 | with open(ann_path, 'r') as f: 34 | tao = json.load(f) 35 | 36 | checksums_path = ( 37 | args.root / f'annotations/checksums/{args.split}_checksums.json') 38 | with open(checksums_path, 'r') as f: 39 | checksums = json.load(f) 40 | 41 | videos_by_dataset = defaultdict(list) 42 | for video in tao['videos']: 43 | videos_by_dataset[video['metadata']['dataset']].append(video) 44 | 45 | videos_dir = args.root / 'videos' 46 | frames_dir = args.root / 'frames' 47 | for dataset in args.sources: 48 | # Collect list of videos 49 | ext = '.mov' if dataset == 'BDD' else '.mp4' 50 | videos = videos_by_dataset[dataset] 51 | video_paths = [ 52 | videos_dir / f"{video['name']}{ext}" for video in videos 53 | ] 54 | output_frame_dirs = [frames_dir / video['name'] for video in videos] 55 | 56 | # List of (video, video path, frame directory) tuples 57 | to_dump = [] 58 | for video, video_path, frame_dir in zip(videos, video_paths, 59 | output_frame_dirs): 60 | if not video_path.exists(): 61 | raise ValueError(f'Could not find video at {video_path}') 62 | video_checksums = checksums[video['name']] 63 | if frame_dir.exists() and are_tao_frames_dumped( 64 | frame_dir, video_checksums, warn=False): 65 | continue 66 | to_dump.append((video, video_path, frame_dir)) 67 | 68 | # Dump frames from each video 69 | logging.info(f'{dataset}: Extracting frames') 70 | dump_tao_frames([x[1] for x in to_dump], [x[2] for x in to_dump], 71 | workers=args.workers) 72 | 73 | to_dump = [] 74 | for video, video_path, frame_dir in zip(videos, video_paths, 75 | output_frame_dirs): 76 | video_checksums = checksums[video['name']] 77 | # Remove frames not used for TAO. 78 | remove_non_tao_frames(frame_dir, set(video_checksums.keys())) 79 | # Compare checksums for frames 80 | assert are_tao_frames_dumped(frame_dir, video_checksums), ( 81 | f'Not all TAO frames for {video["name"]} were extracted.') 82 | 83 | logging.info( 84 | f'{dataset}: Removing non-TAO frames, verifying extraction') 85 | logging.info(f'{dataset}: Successfully extracted!') 86 | 87 | 88 | if __name__ == "__main__": 89 | main() 90 | -------------------------------------------------------------------------------- /tao/scripts/download/gen_checksums.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | from collections import defaultdict 4 | from hashlib import md5 5 | from pathlib import Path 6 | 7 | from tqdm import tqdm 8 | from script_utils.common import common_setup 9 | 10 | from tao.utils import fs 11 | 12 | 13 | def main(): 14 | # Use first line of file docstring as description if it exists. 15 | parser = argparse.ArgumentParser( 16 | description=__doc__.split('\n')[0] if __doc__ else '', 17 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 18 | parser.add_argument('--frames-dir', type=Path, required=True) 19 | parser.add_argument('--output-json', type=Path, required=True) 20 | parser.add_argument('--tao-annotations', type=Path, required=True) 21 | 22 | args = parser.parse_args() 23 | output_dir = args.output_json.parent 24 | output_dir.mkdir(exist_ok=True, parents=True) 25 | common_setup(args.output_json.name, output_dir, args) 26 | 27 | with open(args.tao_annotations, 'r') as f: 28 | tao = json.load(f) 29 | videos = [x['name'] for x in tao['videos']] 30 | 31 | labeled_frames = defaultdict(set) 32 | for frame in tao['images']: 33 | video, frame_name = frame['file_name'].rsplit('/', 1) 34 | labeled_frames[video].add(frame_name) 35 | 36 | # videos = videos[:10] 37 | hashes = {} 38 | for video in tqdm(videos): 39 | frames = fs.glob_ext(args.frames_dir / video, ('.jpg', '.jpeg')) 40 | hashes[video] = {} 41 | for i, frame in tqdm(enumerate(frames)): 42 | if frame.name in labeled_frames[video]: 43 | with open(frame, 'rb') as f: 44 | hashes[video][frame.name] = md5(f.read()).hexdigest() 45 | else: 46 | hashes[video][frame.name] = '' 47 | if all(x == '' for x in hashes[video].values()): 48 | raise ValueError(f'Did not find any labeled frames for {video}') 49 | 50 | with open(args.output_json, 'w') as f: 51 | json.dump(hashes, f) 52 | 53 | 54 | if __name__ == "__main__": 55 | main() 56 | -------------------------------------------------------------------------------- /tao/scripts/download/meta/ava_file_names_test_v2.1.txt: -------------------------------------------------------------------------------- 1 | --205wugM18.mkv 2 | -APF0-L14kw.mkv 3 | -FLn0aeA6EU.mkv 4 | 0OLtK6SeTwo.mp4 5 | 1R7n8B8KkZE.mkv 6 | 1XZZnWMP4CU.mkv 7 | 2eTGj8zPykM.mkv 8 | 3-ivkPTSTSw.mp4 9 | 30Qkf0pq-PY.mkv 10 | 55R6Ng9w65o.mkv 11 | 6IebItD0ETQ.mkv 12 | 72MzYjWz_7g.mkv 13 | 7QstV153hbA.mkv 14 | 7SGCpWCNN84.mp4 15 | 7oY-kE-goOA.mkv 16 | 8FYx0LtfPTE.mkv 17 | 8oL0i5WorkE.mp4 18 | 9f8r96-it6c.mkv 19 | A8SUe2Yqn60.mkv 20 | A9WSiEDeu0I.mkv 21 | AwlY-zteegM.mkv 22 | BD3zaLKhkV4.mkv 23 | BLDTynQwGRI.mkv 24 | BU98nWUtT5E.mkv 25 | BV1VreCWZ64.mkv 26 | BnIFkfDhJ2w.mkv 27 | DaUzhc9_6io.mp4 28 | E-6ruyZFfZs.mkv 29 | E-fqjlYMFhE.mp4 30 | EHMP5-9KUdI.mp4 31 | EO1gLAoEZRA.mp4 32 | FONjBIXaM-0.mp4 33 | G0gDuIVKiXg.mkv 34 | GElolK2jG50.mkv 35 | GQxKfbvL3mg.mkv 36 | Gsm_ZBStr0s.mp4 37 | HPd4eMvs1Kg.mp4 38 | HeKz7BELAQc.mkv 39 | HtXWX0LnifY.mp4 40 | IC5M1EhJNfI.webm 41 | IIyYHprTP58.webm 42 | Ic0LMbDyc9Y.mkv 43 | JiBiCiK9HjY.mp4 44 | K-tICG1ek-E.mp4 45 | Ke8b1_yiUVQ.mkv 46 | KkAf75yOKqs.mkv 47 | KrMSZUQJlNM.mkv 48 | LO964EmiVfo.mkv 49 | Mz0FKktvMLY.mkv 50 | NUwem2aZa0Y.mp4 51 | O5y8zKl9X2E.mp4 52 | O8xkUcUJPNo.mkv 53 | OEUMcSba9t0.mp4 54 | OL_Wwo5W1Zs.mp4 55 | OQxN4ksema0.mkv 56 | P5EhajqkqPw.mkv 57 | QTf_v67C5KI.mp4 58 | Qes4a8HuyEc.mkv 59 | RCNuAys0Hsg.mkv 60 | RW-H3fN_79I.mp4 61 | Scg5LeZszCc.mkv 62 | Sntyb4omSfU.mkv 63 | SoNhz0WJZsI.mkv 64 | Uw7387tc9PU.mp4 65 | V6RX59GT-3k.mkv 66 | VNZ8JDb8sks.mkv 67 | ViY7CR2TSO8.mkv 68 | W8TFzEy0gp0.mkv 69 | WMFTBgYWJS8.mkv 70 | Wgytpy6TeUA.mp4 71 | WhkON_S-pQc.mp4 72 | XOe9GeojzCs.mp4 73 | YAAUPjq-L-Q.mp4 74 | Z0FEElATNjk.mkv 75 | Z42lnoj2n08.mkv 76 | ZS2C28fDC9U.mp4 77 | ZbeMNLwASVo.mkv 78 | ZsgPK0XGYoM.mp4 79 | Zu4iQJrlpo0.mkv 80 | _kbrVsCaaPo.mp4 81 | _vy57h5Oeys.mkv 82 | aDfOtlsdoWw.mkv 83 | bNP8Q_8u89A.webm 84 | bUVls-bf0jM.mkv 85 | bzGQK5lH-RA.mkv 86 | c5mlhcFYYZs.mp4 87 | cYt6NaQgcEk.mp4 88 | cqkChR44vkA.mkv 89 | fT_WjgJ_-r0.mkv 90 | gEI9qBdVt5I.mp4 91 | h7Atb503JwY.webm 92 | hgmK4Epb02E.mkv 93 | i9cuy3teV0w.mkv 94 | ipBRBABLSAk.mkv 95 | jKKXDh4lYd0.mkv 96 | kW5WyJ1QNpM.mkv 97 | keUOiCcHtoQ.mkv 98 | kvFlbTK812w.mkv 99 | l8_Mk3-sZsQ.mkv 100 | nAg_NVzLoAY.mkv 101 | nRzhjXMIXt4.mkv 102 | o-ZcbjLBtls.mkv 103 | ohn_RxyaCy4.mp4 104 | pSE4Dlork1Y.mp4 105 | pSdPmmJ3-ng.mp4 106 | rJibAAUEMDY.mkv 107 | rRL0Ce8e-RY.mkv 108 | rTCch_5JlkA.mp4 109 | s2z5UASlrP8.mkv 110 | sV3zZROy0uc.mkv 111 | tDF-BqFfF78.mkv 112 | tj-VmrMYtUI.mp4 113 | u97DLHpcw7c.mkv 114 | vL7N_xRJKJU.mp4 115 | vsMgg4snZzM.mkv 116 | w-jIrlwuv2Y.mkv 117 | wamBSoyRtbs.mkv 118 | woC9Vfbn74I.mkv 119 | xH1WLtZ8csM.mp4 120 | xJpDPrwLJh4.mkv 121 | xT2ogY6xEsI.mp4 122 | xYUx0drhUNk.mkv 123 | xauSNGP5yA0.mkv 124 | xdDTWBRWPLQ.mkv 125 | y4lBI_gFnqI.mkv 126 | y5o8w0FRj98.mkv 127 | yQdi5Ke4dNY.mkv 128 | yRRZkwtJCwU.mkv 129 | z5lg_3abT-s.mkv 130 | zm78XnWN7MU.mkv 131 | zvxnOrzTg0M.mp4 132 | -------------------------------------------------------------------------------- /tao/scripts/download/meta/ava_file_names_trainval_v2.1.txt: -------------------------------------------------------------------------------- 1 | _-Z6wFjXtGQ.mkv 2 | _145Aa_xkuE.mp4 3 | _7oWZq_s_Sk.mkv 4 | _a9SWtcaNj8.mkv 5 | _Ca3gOdOHxU.mp4 6 | _dBTTYDRdRQ.webm 7 | _eBah6c5kyA.mkv 8 | _ithRWANKB0.mp4 9 | _mAfwH6i90E.mkv 10 | -5KQ66BBWC4.mkv 11 | -FaXLcSFjUI.mp4 12 | -IELREHX_js.mp4 13 | -OyDO1g74vc.mp4 14 | -XpUuIgyUHE.mp4 15 | -ZFgsrolSxo.mkv 16 | 053oq2xB3oU.mkv 17 | 0f39OWEqJ24.mp4 18 | 0wBYFahr3uI.mp4 19 | 1j20qq1JyX4.mp4 20 | 1ReZIMmD_8E.mp4 21 | 26V9UzqSguo.mp4 22 | 2bxKkUgcqpk.mp4 23 | 2DUITARAsWQ.mp4 24 | 2E_e8JlvTlg.mkv 25 | 2FIHxnZKg6A.webm 26 | 2fwni_Kjf2M.mkv 27 | 2KpThOF_QmE.mkv 28 | 2PpxiG0WU18.mkv 29 | 2qQs3Y9OJX0.mkv 30 | 3_VjIRdXVdM.mkv 31 | 32HR3MnDZ8g.mp4 32 | 3IOE-Q3UWdA.mp4 33 | 4gVsDd8PV9U.mp4 34 | 4k-rTF3oZKw.mp4 35 | 4Y5qi1gD2Sw.mkv 36 | 4ZpjKfu6Cl8.mkv 37 | 55Ihr6uVIDA.mkv 38 | 5BDj0ow5hnA.mp4 39 | 5LrOQEt_XVM.mp4 40 | 5milLu-6bWI.mp4 41 | 5MxjqHfkWFI.mkv 42 | 5YPjcdLbs5g.mkv 43 | 6d5u6FHvz7Q.mkv 44 | 7g37N3eoQ9s.mkv 45 | 7nHkh4sP5Ks.mkv 46 | 7T5G0CmwTPo.mkv 47 | 7YpF6DntOYw.mkv 48 | 8aMv-ZGD4ic.mkv 49 | 8JSxLhDMGtE.mkv 50 | 8nO5FFbIAog.webm 51 | 8VZEwOCQ8bc.mkv 52 | 914yZXz-iRs.mkv 53 | 9bK05eBt1GM.mp4 54 | 9eAOr_ttXp0.mkv 55 | 9F2voT6QWvQ.mkv 56 | 9HOMUW7QNFc.mkv 57 | 9IF8uTRrWAM.mkv 58 | 9mLYmkonWZQ.mkv 59 | 9QbzS8bZXFE.mkv 60 | 9Rcxr3IEX4E.mkv 61 | 9tyiDEYiWiA.mkv 62 | 9Y_l9NsnYE0.mp4 63 | aDEYi1OG0vU.mkv 64 | Ag-pXiLrd48.mp4 65 | aMYcLyh9OhU.mkv 66 | AN07xQokfiE.mp4 67 | aRbLw-dU2XY.mp4 68 | ax3q-RkVIt4.mp4 69 | ayAMdYfJJLk.mkv 70 | AYebXQ8eUkM.mkv 71 | b-YoBU0XT90.mp4 72 | B1MAUxpKaV8.mkv 73 | b50s4AlOOKY.mkv 74 | b5pRYl_djbs.mp4 75 | bAVXp1oGjHA.mkv 76 | BCiuXAuCKAU.mp4 77 | bePts02nIY8.mkv 78 | bhlFavrh7WU.mkv 79 | bSZiZ4rOC7c.mkv 80 | BXCh3r-pPAM.mkv 81 | BY3sZmvUp-0.mp4 82 | C25wkwAMB-w.mkv 83 | C3qk4yAMANk.mkv 84 | c9pEMjPT16M.webm 85 | cc4y-yYm5Ao.mkv 86 | CG98XdYsgrA.mkv 87 | cKA-qeZuH_w.mkv 88 | cLiJgvrDlWw.mp4 89 | CMCPhm2L400.mkv 90 | covMYDBa5dk.mp4 91 | CrlfWnsS7ac.mkv 92 | cWYJHb25EVs.mp4 93 | CZ2NP8UsPuE.mkv 94 | D-BJTU6NxZ8.mkv 95 | D8Vhxbho1fY.mp4 96 | Db19rWN5BGo.mkv 97 | dgLApPvmfBE.mkv 98 | Di1MG6auDYo.mkv 99 | dMH8L7mqCNI.mkv 100 | E2jecoyAx1M.mkv 101 | E7JcKooKVsM.mp4 102 | eA55_shhKko.mkv 103 | Ecivp8t3MdY.mkv 104 | Ekwy7wzLfjc.mkv 105 | er7eeiJB6dI.mkv 106 | F3dPH6Xqf5M.mp4 107 | fD6VkIRlIRI.mkv 108 | Feu1_8NazPE.mp4 109 | fGgnNCbXZ20.mp4 110 | fNcxxBjEOgw.mkv 111 | fpprSy6AzKk.mkv 112 | fZs-yXm-uUs.mp4 113 | g1wyIcLPbq0.mp4 114 | G4qq1MRXCiY.mkv 115 | G5Yr20A5z_Q.mkv 116 | GBXK_SyfisM.mkv 117 | Gfdg_GcaNe8.mkv 118 | gjasEUDkbuc.mkv 119 | gjdgj04FzR0.mp4 120 | GozLjpMNADg.mkv 121 | gqmmpoO1JrY.mkv 122 | Gt61_Yekkgc.mp4 123 | Gvp-cj3bmIY.webm 124 | hbYvDvJrpNk.mp4 125 | hHgg9WI8dTk.mkv 126 | Hi8QeP_VPu0.mkv 127 | HJzgJ9ZjvJk.mkv 128 | HKjR70GCRPE.mp4 129 | Hscyg0vLKc8.mp4 130 | HTYT2vF-j_w.mkv 131 | HV0H6oc4Kvs.mkv 132 | HVAmkvLrthQ.mkv 133 | HymKCzQJbB8.mkv 134 | I8j6Xq2B5ys.mp4 135 | Ie35yEssHko.mkv 136 | IKdBLciu_-A.mp4 137 | iSlDMboCSao.mkv 138 | IuPC-z-M9u8.mkv 139 | IzvOYVMltkI.mp4 140 | J1jDc2rTJlg.mkv 141 | j35JnR0Q7Es.mp4 142 | J4bt4y9ShTA.mkv 143 | j5jmjhGBW44.mkv 144 | jBs_XYHI7gM.mkv 145 | jE0S8gYWftE.webm 146 | jgAwJ0RqmYg.mp4 147 | jI0HIlSsa3s.mkv 148 | JNb4nWexD0I.mkv 149 | jqZpiHlJUig.mkv 150 | K_SpqDJnlps.mkv 151 | kAsz-76DTDE.mkv 152 | Kb1fduj-jdY.mp4 153 | KHHgQ_Pe4cI.mkv 154 | KIy2a-nejxg.mp4 155 | kLDpP9QEVBs.mp4 156 | kMy-6RtoOVU.mkv 157 | kplbKz3_fZk.mkv 158 | Ksd1JQFHYWA.mp4 159 | KVq6If6ozMY.mkv 160 | KWoSGtglCms.mkv 161 | l-jxh8gpxuY.mkv 162 | l2XO3tQk8lI.mkv 163 | lDmLcWWBp1E.mkv 164 | Lg1jOu8cUBM.mkv 165 | LIavUJVrXaI.mkv 166 | LrDT25hmApw.mkv 167 | lT1zdTL-3SM.mkv 168 | lWXhqIAvarw.mkv 169 | M6cgEs9JgDo.mkv 170 | Ma2hgTmveKQ.mkv 171 | mfsbYdLx9wE.mkv 172 | miB-wo2PfLI.mkv 173 | mkcDANJjDcM.mkv 174 | N0Dt9i9IUNg.mkv 175 | N1K2bEZLL_A.mkv 176 | N5UD8FGzDek.mkv 177 | N7baJsMszJ0.mkv 178 | NEQ7Wpf-EtI.mkv 179 | nlinqZPgvVk.mkv 180 | NO2esmws190.mkv 181 | O_NYCUhZ9zw.mp4 182 | o4xQ-BEa3Ss.mkv 183 | O5m_0Yay4EU.mkv 184 | oD_wxyTHJ2I.mp4 185 | OfMdakd4bHI.mkv 186 | OGNnUvJq9RI.mkv 187 | oifTDWZvOhY.mkv 188 | oITFHwzfw_k.mkv 189 | om_83F5VwTQ.mp4 190 | oq_bufAhyl8.mkv 191 | Ov0za6Xb1LM.mkv 192 | oWhvucAskhk.mkv 193 | P60OxWahxBQ.mkv 194 | P90hF2S1JzA.mkv 195 | PcFEhUKhN6g.mkv 196 | pGP_oIdKmRY.mkv 197 | phrYEKv0rmw.mkv 198 | phVLLTMzmKk.mkv 199 | pieVIsGmLsc.mkv 200 | piYxcrMxVPw.mkv 201 | plkJ45_-pMk.mp4 202 | PmElx9ZVByw.mp4 203 | PNZQ2UJfyQE.mp4 204 | QaIMUi-elFo.mkv 205 | qBUu7cy-5Iw.mp4 206 | QCLQYnt3aMo.webm 207 | QD3L10bUnBo.mkv 208 | QJzocCGLdHU.mp4 209 | QMwT7DFA5O4.mkv 210 | QotkBTEePI8.mkv 211 | qpoWHELxL-4.mp4 212 | qrkff49p4E4.mp4 213 | qsTqtWVVSLM.mkv 214 | QTmwhrVal1g.mkv 215 | qx2vAO5ofmo.mp4 216 | r2llOyS-BmE.mkv 217 | rCb9-U4TArw.mp4 218 | rFgb2ECMcrY.mkv 219 | ri4P2enZT9o.mkv 220 | Riu4ZKk4YdQ.webm 221 | rJKeqfTlAeY.mkv 222 | rk8Xm0EAOWs.mkv 223 | Rm518TUhbRY.mkv 224 | rUYsoIIE37A.mp4 225 | rXFlJbXyZyc.mkv 226 | S0tkhGJjwLA.mkv 227 | sADELCyj10I.mkv 228 | SCh-ZImnyyk.mp4 229 | SHBMiL5f_3Q.mkv 230 | skiZueh4lfY.mkv 231 | sNQJfYvhcPk.mp4 232 | sUVhd0YTKgw.mkv 233 | T-Fc9ctuNVI.mkv 234 | t0V4drbYDnc.mkv 235 | t1LXrJOvPDg.mkv 236 | T26G6_AjJZ4.mkv 237 | TcB0IFBwk-k.mkv 238 | TCmNvNLRWrc.mkv 239 | tEoJW9ycmSY.mkv 240 | TEQ9sAj-DPo.mp4 241 | tghXjom3120.mkv 242 | tjqCzVjojCo.mkv 243 | TM5MPJIq1Is.mkv 244 | tNpZtigMc4g.mkv 245 | tt0t_a1EDCE.mkv 246 | TzaVHtLXOzY.mkv 247 | U_WzY2k8IBM.mkv 248 | u1ltv6r14KQ.mkv 249 | UgZFdrNT6W0.mkv 250 | uNT6HrrnqPU.webm 251 | UOfuzrwkclM.mkv 252 | UOyyTUX5Vo4.mkv 253 | uq_HBsvP548.mkv 254 | UrsCy6qIGoo.mkv 255 | UsLnxI_zGpY.mkv 256 | uwW0ejeosmk.mkv 257 | uzPI7FcF79U.mkv 258 | v0L-WkMO3s4.mp4 259 | vBbjA4tWCPg.mp4 260 | vfjywN5CN0Y.mkv 261 | Vmef_8MY46w.mkv 262 | VRlpH1MbWUw.mp4 263 | VsYPP2I0aUQ.mkv 264 | wEAeql4z1O0.mp4 265 | wfEOx36N4jA.mp4 266 | WKqbLbU68wU.mkv 267 | WlgxRNCHQzw.mkv 268 | wogRuPNBUi8.mp4 269 | wONG7Vh87B4.mkv 270 | WSPvfxtqisg.mkv 271 | WVde9pyaHg4.mkv 272 | x-6CtPWVi6E.mkv 273 | X5wWhZ2r9kc.mp4 274 | xeGWXqSvC-8.webm 275 | XF87VL5T0aA.mkv 276 | XglAvHaEtHY.mp4 277 | xJmRNZVDDCY.mkv 278 | xmqSaQPzL1E.mkv 279 | xO4ABy2iOQA.mp4 280 | xp67EC-Hvwk.mkv 281 | XpGRS72ghag.mkv 282 | XV_FF3WC7kA.mkv 283 | y7ncweROe9U.mkv 284 | yMtGmGa8KZ0.mkv 285 | yn9WN9lsHRE.mkv 286 | yo-Kg2YxlZs.mkv 287 | yqImJuC5UzI.mp4 288 | Ytga8ciKWJc.mkv 289 | yvgCGJ6vfkY.mkv 290 | YYWdB7h1INo.mkv 291 | z-fsLpGHq6o.mkv 292 | Z1YV6wB037M.mkv 293 | z3kgrh0L_80.mkv 294 | zC5Fh2tTS1U.mp4 295 | zG7mx8KiavA.mp4 296 | zlVkeKC6Ha8.mp4 297 | ZosVdkY76FU.mkv 298 | zR725veL-DI.mkv 299 | ZxQn8HVmXsY.mkv 300 | -------------------------------------------------------------------------------- /tao/scripts/download/verify.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | from collections import defaultdict 5 | from pathlib import Path 6 | 7 | from script_utils.common import common_setup 8 | from tqdm import tqdm 9 | 10 | from tao.utils.download import are_tao_frames_dumped 11 | 12 | 13 | def main(): 14 | # Use first line of file docstring as description if it exists. 15 | parser = argparse.ArgumentParser( 16 | description=__doc__.split('\n')[0] if __doc__ else '', 17 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 18 | parser.add_argument('root', type=Path) 19 | parser.add_argument('--split', 20 | required=True, 21 | choices=['train', 'val', 'test']) 22 | 23 | args = parser.parse_args() 24 | log_dir = args.root / 'logs' 25 | log_dir.mkdir(exist_ok=True, parents=True) 26 | common_setup(__file__, log_dir, args) 27 | 28 | ann_path = args.root / f'annotations/{args.split}.json' 29 | with open(ann_path, 'r') as f: 30 | tao = json.load(f) 31 | 32 | checksums_path = ( 33 | args.root / f'annotations/checksums/{args.split}_checksums.json') 34 | with open(checksums_path, 'r') as f: 35 | checksums = json.load(f) 36 | 37 | videos_by_dataset = defaultdict(list) 38 | for video in tao['videos']: 39 | videos_by_dataset[video['metadata']['dataset']].append(video) 40 | 41 | status = {} 42 | for dataset, videos in sorted(videos_by_dataset.items()): 43 | status[dataset] = True 44 | for video in tqdm(videos, desc=f'Verifying {dataset}'): 45 | name = video['name'] 46 | frame_dir = args.root / 'frames' / name 47 | if not are_tao_frames_dumped( 48 | frame_dir, checksums[name], warn=True, allow_extra=False): 49 | logging.warning( 50 | f'Frames for {name} are not extracted properly. ' 51 | f'Skipping rest of dataset.') 52 | status[dataset] = False 53 | break 54 | 55 | success = [] 56 | for dataset in sorted([d for d, v in status.items() if v]): 57 | success.append(f'{dataset: <12}: Verified ✓✓✓') 58 | 59 | failure = [] 60 | for dataset in sorted([d for d, v in status.items() if not v]): 61 | failure.append(f'{dataset: <12}: FAILED 𐄂𐄂𐄂') 62 | 63 | if success: 64 | logging.info('Success!\n' + ('\n'.join(success))) 65 | if failure: 66 | logging.warning('Some datasets were not properly extracted!\n' + 67 | ('\n'.join(failure))) 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /tao/scripts/evaluation/configs/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/tao/scripts/evaluation/configs/default.yaml -------------------------------------------------------------------------------- /tao/scripts/evaluation/evaluate.py: -------------------------------------------------------------------------------- 1 | """Evaluate tao results (helper script).""" 2 | 3 | import argparse 4 | import logging 5 | import numpy as np 6 | from pathlib import Path 7 | 8 | from script_utils.common import common_setup 9 | from tao.utils.evaluation import get_cfg_defaults, evaluate, log_eval 10 | from tao.utils.yacs_util import merge_from_file_with_base 11 | 12 | 13 | CONFIG_DIR = Path(__file__).resolve().parent / 'configs' 14 | 15 | 16 | def main(): 17 | # Use first line of file docstring as description if it exists. 18 | parser = argparse.ArgumentParser( 19 | description=__doc__.split('\n')[0] if __doc__ else '', 20 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 21 | parser.add_argument('annotations', type=Path) 22 | parser.add_argument('predictions', type=Path) 23 | parser.add_argument('--output-dir', type=Path) 24 | parser.add_argument('--config', 25 | type=Path, 26 | default=CONFIG_DIR / 'default.yaml') 27 | parser.add_argument('--config-updates', nargs='*') 28 | 29 | args = parser.parse_args() 30 | 31 | if args.output_dir: 32 | tensorboard_dir = args.output_dir / 'tensorboard' 33 | if tensorboard_dir.exists(): 34 | raise ValueError( 35 | 'Tensorboard dir already exists, not evaluating.') 36 | args.output_dir.mkdir(exist_ok=True, parents=True) 37 | log_path = common_setup(__file__, args.output_dir, args).name 38 | else: 39 | logging.getLogger().setLevel(logging.INFO) 40 | logging.basicConfig(format='%(asctime)s.%(msecs).03d: %(message)s', 41 | datefmt='%H:%M:%S') 42 | logging.info('Args:\n%s', vars(args)) 43 | log_path = None 44 | 45 | cfg = get_cfg_defaults() 46 | merge_from_file_with_base(cfg, args.config) 47 | if args.config_updates: 48 | cfg.merge_from_list(args.config_updates) 49 | cfg.freeze() 50 | 51 | if args.output_dir: 52 | with open(args.output_dir / 'config.yaml', 'w') as f: 53 | f.write(cfg.dump()) 54 | 55 | tao_eval = evaluate(args.annotations, args.predictions, cfg) 56 | area_index = tao_eval['tao_eval'].params.area_rng_lbl.index('all') 57 | time_index = tao_eval['tao_eval'].params.time_rng_lbl.index('all') 58 | category_aps = np.mean( 59 | tao_eval['tao_eval'].eval['precision'][:, :, :, area_index, time_index], 60 | axis=(0, 1)) 61 | for i, cat in enumerate(tao_eval['tao_eval'].params.cat_ids): 62 | print(f'category_id: {cat}, ap:{category_aps[i]}') 63 | log_eval(tao_eval, cfg, output_dir=args.output_dir, log_path=log_path) 64 | 65 | 66 | if __name__ == "__main__": 67 | main() 68 | -------------------------------------------------------------------------------- /tao/scripts/trackers/sort/README.md: -------------------------------------------------------------------------------- 1 | SORT 2 | ===== 3 | 4 | A simple online and realtime tracking algorithm for 2D multiple object tracking in video sequences. 5 | See an example [video here](https://motchallenge.net/movies/ETH-Linthescher-SORT.mp4). 6 | 7 | By Alex Bewley 8 | 9 | ### Introduction 10 | 11 | SORT is a barebones implementation of a visual multiple object tracking framework based on rudimentary data association and state estimation techniques. It is designed for online tracking applications where only past and current frames are available and the method produces object identities on the fly. While this minimalistic tracker doesn't handle occlusion or re-entering objects its purpose is to serve as a baseline and testbed for the development of future trackers. 12 | 13 | SORT was initially described in an [arXiv tech report](http://arxiv.org/abs/1602.00763). At the time of the initial publication, SORT was ranked the best *open source* multiple object tracker on the [MOT benchmark](https://motchallenge.net/results/2D_MOT_2015/). 14 | 15 | This code has been tested on Mac OSX 10.10, and Ubuntu 14.04, with Python 2.7 (anaconda). 16 | 17 | **Note:** A significant proportion of SORT's accuracy is attributed to the detections. 18 | For your convenience, this repo also contains *Faster* RCNN detections for the MOT benchmark sequences in the [benchmark format](https://motchallenge.net/instructions/). To run the detector yourself please see the original [*Faster* RCNN project](https://github.com/ShaoqingRen/faster_rcnn) or the python reimplementation of [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn) by Ross Girshick. 19 | 20 | **Also see:** 21 | A new and improved version of SORT with a Deep Association Metric implemented in tensorflow is available at [https://github.com/nwojke/deep_sort](https://github.com/nwojke/deep_sort) . 22 | 23 | ### License 24 | 25 | SORT is released under the GPL License (refer to the LICENSE file for details) to promote the open use of the tracker and future improvements. If you require a permissive license contact Alex (alex@bewley.ai). 26 | 27 | ### Citing SORT 28 | 29 | If you find this repo useful in your research, please consider citing: 30 | 31 | @inproceedings{Bewley2016_sort, 32 | author={Bewley, Alex and Ge, Zongyuan and Ott, Lionel and Ramos, Fabio and Upcroft, Ben}, 33 | booktitle={2016 IEEE International Conference on Image Processing (ICIP)}, 34 | title={Simple online and realtime tracking}, 35 | year={2016}, 36 | pages={3464-3468}, 37 | keywords={Benchmark testing;Complexity theory;Detectors;Kalman filters;Target tracking;Visualization;Computer Vision;Data Association;Detection;Multiple Object Tracking}, 38 | doi={10.1109/ICIP.2016.7533003} 39 | } 40 | 41 | 42 | ### Dependencies: 43 | 44 | This code makes use of the following packages: 45 | 1. [`scikit-learn`](http://scikit-learn.org/stable/) 46 | 0. [`scikit-image`](http://scikit-image.org/download) 47 | 0. [`FilterPy`](https://github.com/rlabbe/filterpy) 48 | 49 | To install required dependencies run: 50 | ``` 51 | $ pip install -r requirements.txt 52 | ``` 53 | 54 | 55 | ### Demo: 56 | 57 | To run the tracker with the provided detections: 58 | 59 | ``` 60 | $ cd path/to/sort 61 | $ python sort.py 62 | ``` 63 | 64 | To display the results you need to: 65 | 66 | 0. Download the [2D MOT 2015 benchmark dataset](https://motchallenge.net/data/2D_MOT_2015/#download) 67 | 0. Create a symbolic link to the dataset 68 | ``` 69 | $ ln -s /path/to/MOT2015_challenge/data/2DMOT2015 mot_benchmark 70 | ``` 71 | 0. Run the demo with the ```--display``` flag 72 | ``` 73 | $ python sort.py --display 74 | ``` 75 | 76 | 77 | ### Main Results 78 | 79 | Using the [MOT challenge devkit](https://motchallenge.net/devkit/) the method produces the following results (as described in the paper). 80 | 81 | Sequence | Rcll | Prcn | FAR | GT MT PT ML| FP FN IDs FM| MOTA MOTP MOTAL 82 | --------------- |:----:|:----:|:----:|:-------------:|:-------------------:|:------------------: 83 | TUD-Campus | 68.5 | 94.3 | 0.21 | 8 6 2 0| 15 113 6 9| 62.7 73.7 64.1 84 | ETH-Sunnyday | 77.5 | 81.9 | 0.90 | 30 11 16 3| 319 418 22 54| 59.1 74.4 60.3 85 | ETH-Pedcross2 | 51.9 | 90.8 | 0.39 | 133 17 60 56| 330 3014 77 103| 45.4 74.8 46.6 86 | ADL-Rundle-8 | 44.3 | 75.8 | 1.47 | 28 6 16 6| 959 3781 103 211| 28.6 71.1 30.1 87 | Venice-2 | 42.5 | 64.8 | 2.75 | 26 7 9 10| 1650 4109 57 106| 18.6 73.4 19.3 88 | KITTI-17 | 67.1 | 92.3 | 0.26 | 9 1 8 0| 38 225 9 16| 60.2 72.3 61.3 89 | *Overall* | 49.5 | 77.5 | 1.24 | 234 48 111 75| 3311 11660 274 499| 34.0 73.3 35.1 90 | 91 | 92 | ### Using SORT in your own project 93 | 94 | Below is the gist of how to instantiate and update SORT. See the ['__main__'](https://github.com/abewley/sort/blob/master/sort.py#L239) section of [sort.py](https://github.com/abewley/sort/blob/master/sort.py#L239) for a complete example. 95 | 96 | from sort import * 97 | 98 | #create instance of SORT 99 | mot_tracker = Sort() 100 | 101 | # get detections 102 | ... 103 | 104 | # update SORT 105 | track_bbs_ids = mot_tracker.update(detections) 106 | 107 | # track_bbs_ids is a np array where each row contains a valid bounding box and track_id (last column) 108 | ... 109 | 110 | 111 | -------------------------------------------------------------------------------- /tao/scripts/trackers/sort/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/tao/scripts/trackers/sort/__init__.py -------------------------------------------------------------------------------- /tao/scripts/trackers/sort/create_json_for_eval.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import random 5 | from pathlib import Path 6 | 7 | import numpy as np 8 | from natsort import natsorted 9 | from script_utils.common import common_setup 10 | from tqdm import tqdm 11 | 12 | 13 | def create_json(track_result, groundtruth, output_dir): 14 | # Image without extension -> image id 15 | image_stem_to_info = { 16 | x['file_name'].rsplit('.', 1)[0]: x for x in groundtruth['images'] 17 | } 18 | valid_videos = {x['name'] for x in groundtruth['videos']} 19 | 20 | all_annotations = [] 21 | found_predictions = {} 22 | for video in tqdm(valid_videos): 23 | video_npz = track_result / f'{video}.npz' 24 | if not video_npz.exists(): 25 | logging.error(f'Could not find video {video} at {video_npz}') 26 | continue 27 | video_result = np.load(video_npz) 28 | frame_names = [x for x in video_result.keys() if x != 'field_order'] 29 | video_found = {} 30 | for frame in natsorted(frame_names): 31 | # (x0, y0, x1, y1, class, score, box_index, track_id) 32 | frame_name = f'{video}/{frame}' 33 | if frame_name not in image_stem_to_info: 34 | continue 35 | video_found[frame_name] = True 36 | image_info = image_stem_to_info[frame_name] 37 | all_annotations.extend([{ 38 | # (x1, y1) -> (w, h) 39 | 'image_id': image_info['id'], 40 | 'video_id': image_info['video_id'], 41 | 'track_id': int(x[7]), 42 | 'bbox': [x[0], x[1], x[2] - x[0], x[3] - x[1]], 43 | 'category_id': x[4], 44 | 'score': x[5], 45 | } for x in video_result[frame]]) 46 | if not video_found: 47 | raise ValueError(f'Found no valid predictions for video {video}') 48 | found_predictions.update(video_found) 49 | if not found_predictions: 50 | raise ValueError('Found no valid predictions!') 51 | 52 | with_predictions = set(found_predictions.keys()) 53 | with_labels = set(image_stem_to_info.keys()) 54 | if with_predictions != with_labels: 55 | missing_videos = { 56 | x.rsplit('/', 1)[0] 57 | for x in with_labels - with_predictions 58 | } 59 | logging.warn( 60 | f'{len(with_labels - with_predictions)} images from ' 61 | f'{len(missing_videos)} videos did not have predictions!') 62 | 63 | with open(output_dir / 'results.json', 'w') as f: 64 | json.dump(all_annotations, f) 65 | 66 | 67 | def main(): 68 | # Use first line of file docstring as description if it exists. 69 | parser = argparse.ArgumentParser( 70 | description=__doc__.split('\n')[0] if __doc__ else '', 71 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 72 | parser.add_argument('--track-result', required=True, type=Path) 73 | parser.add_argument('--annotations-json', 74 | type=Path, 75 | help='Annotations json') 76 | parser.add_argument('--output-dir', required=True, type=Path) 77 | 78 | args = parser.parse_args() 79 | args.output_dir.mkdir(exist_ok=True, parents=True) 80 | common_setup(__file__, args.output_dir, args) 81 | 82 | with open(args.annotations_json, 'r') as f: 83 | groundtruth = json.load(f) 84 | 85 | create_json(args.track_result, groundtruth, args.output_dir) 86 | 87 | 88 | if __name__ == "__main__": 89 | main() 90 | -------------------------------------------------------------------------------- /tao/scripts/trackers/sort/requirements.txt: -------------------------------------------------------------------------------- 1 | scipy 2 | filterpy~=1.4.1 3 | numba~=0.38.1 4 | scikit-image~=0.14.0 5 | scikit-learn~=0.19.1 6 | lap~=0.4.0 7 | -------------------------------------------------------------------------------- /tao/scripts/trackers/sort/sort_with_detection_id.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sort import associate_detections_to_trackers, KalmanBoxTracker 4 | 5 | 6 | class SortWithDetectionId(object): 7 | def __init__(self, max_age=1, min_hits=3, iou_threshold=0.3): 8 | """ 9 | Sets key parameters for SORT 10 | """ 11 | self.max_age = max_age 12 | self.min_hits = min_hits 13 | self.trackers = [] 14 | self.frame_count = 0 15 | self.iou_threshold = iou_threshold 16 | 17 | def update(self, dets): 18 | """ 19 | Args: 20 | dets (np.array): Shape (num_boxes, 5), where each row contains 21 | [x1, y1, x2, y2, score] 22 | 23 | Retruns: 24 | tracks (np.array): Shape (num_boxes, 6), where each row contains 25 | [x1, y1, x2, y2, detection_index, track_id] 26 | """ 27 | self.frame_count += 1 28 | # get predicted locations from existing trackers. 29 | trks = np.zeros((len(self.trackers), 5)) 30 | to_del = [] 31 | ret = [] 32 | for t, trk in enumerate(trks): 33 | pos = self.trackers[t].predict()[0] 34 | trk[:] = [pos[0], pos[1], pos[2], pos[3], 0] 35 | if (np.any(np.isnan(pos))): 36 | to_del.append(t) 37 | 38 | trks = np.ma.compress_rows(np.ma.masked_invalid(trks)) 39 | for t in reversed(to_del): 40 | self.trackers.pop(t) 41 | matched, unmatched_dets, unmatched_trks = ( 42 | associate_detections_to_trackers(dets, trks, self.iou_threshold)) 43 | 44 | # update matched trackers with assigned detections 45 | track_to_det_index = {t: d for d, t in matched} 46 | # matched[i, 0] is matched to matched[i, 1] 47 | for t, trk in enumerate(self.trackers): 48 | if (t not in unmatched_trks): 49 | d = track_to_det_index[t] 50 | trk.update(dets[d, :]) 51 | 52 | # create and initialise new trackers for unmatched detections 53 | for i in unmatched_dets: 54 | trk = KalmanBoxTracker(dets[i, :]) 55 | self.trackers.append(trk) 56 | track_to_det_index[len(self.trackers) - 1] = i 57 | i = len(self.trackers) 58 | for t, trk in reversed(list(enumerate(self.trackers))): 59 | d = trk.get_state()[0] 60 | det_id = track_to_det_index.get(t, -1) 61 | if ((trk.time_since_update < 1) 62 | and (trk.hit_streak >= self.min_hits 63 | or self.frame_count <= self.min_hits)): 64 | ret.append( 65 | np.concatenate((d, [det_id, trk.id + 1])).reshape( 66 | 1, -1)) # +1 as MOT benchmark requires positive 67 | i -= 1 68 | # remove dead tracklet 69 | if (trk.time_since_update > self.max_age): 70 | self.trackers.pop(i) 71 | if (len(ret) > 0): 72 | return np.concatenate(ret) 73 | return np.empty((0, 6)) 74 | -------------------------------------------------------------------------------- /tao/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Note: To use the 'upload' functionality of this file, you must: 5 | # $ pipenv install twine --dev 6 | 7 | import io 8 | import os 9 | import sys 10 | from shutil import rmtree 11 | 12 | from setuptools import find_packages, setup, Command 13 | 14 | # Package meta-data. 15 | NAME = 'tao' 16 | DESCRIPTION = 'Track Any Object' 17 | URL = 'http://taodataset.org' 18 | EMAIL = 'achald@cs.cmu.edu' 19 | AUTHOR = 'Achal Dave' 20 | REQUIRES_PYTHON = '>=3.6.0' 21 | VERSION = '0.1.0' 22 | 23 | # What packages are required for this module to be executed? 24 | REQUIRED = [ 25 | 'script_utils @ git+https://github.com/achalddave/python-script-utils.git@v0.0.2#egg=script_utils', 26 | 'moviepy~=0.2', 'scipy', 'natsort', 'tqdm', 'yacs', 'boto3', 'youtube_dl', 27 | 'numba' 28 | # 'requests', 'maya', 'records', 29 | ] 30 | 31 | # What packages are optional? 32 | EXTRAS = { 33 | # 'fancy feature': ['django'], 34 | } 35 | 36 | # The rest you shouldn't have to touch too much :) 37 | # ------------------------------------------------ 38 | # Except, perhaps the License and Trove Classifiers! 39 | # If you do change the License, remember to change the Trove Classifier for that! 40 | 41 | here = os.path.abspath(os.path.dirname(__file__)) 42 | 43 | # Import the README and use it as the long-description. 44 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file! 45 | try: 46 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: 47 | long_description = '\n' + f.read() 48 | except FileNotFoundError: 49 | long_description = DESCRIPTION 50 | 51 | # Load the package's __version__.py module as a dictionary. 52 | about = {} 53 | if not VERSION: 54 | project_slug = NAME.lower().replace("-", "_").replace(" ", "_") 55 | with open(os.path.join(here, project_slug, '__version__.py')) as f: 56 | exec(f.read(), about) 57 | else: 58 | about['__version__'] = VERSION 59 | 60 | 61 | class UploadCommand(Command): 62 | """Support setup.py upload.""" 63 | 64 | description = 'Build and publish the package.' 65 | user_options = [] 66 | 67 | @staticmethod 68 | def status(s): 69 | """Prints things in bold.""" 70 | print('\033[1m{0}\033[0m'.format(s)) 71 | 72 | def initialize_options(self): 73 | pass 74 | 75 | def finalize_options(self): 76 | pass 77 | 78 | def run(self): 79 | try: 80 | self.status('Removing previous builds…') 81 | rmtree(os.path.join(here, 'dist')) 82 | except OSError: 83 | pass 84 | 85 | self.status('Building Source and Wheel (universal) distribution…') 86 | os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) 87 | 88 | self.status('Uploading the package to PyPI via Twine…') 89 | os.system('twine upload dist/*') 90 | 91 | self.status('Pushing git tags…') 92 | os.system('git tag v{0}'.format(about['__version__'])) 93 | os.system('git push --tags') 94 | 95 | sys.exit() 96 | 97 | 98 | # Where the magic happens: 99 | setup( 100 | name=NAME, 101 | version=about['__version__'], 102 | description=DESCRIPTION, 103 | long_description=long_description, 104 | long_description_content_type='text/markdown', 105 | author=AUTHOR, 106 | author_email=EMAIL, 107 | python_requires=REQUIRES_PYTHON, 108 | url=URL, 109 | packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]), 110 | # If your package is a single module, use this instead of 'packages': 111 | # py_modules=['tao'], 112 | 113 | # entry_points={ 114 | # 'console_scripts': ['mycli=mymodule:cli'], 115 | # }, 116 | install_requires=REQUIRED, 117 | extras_require=EXTRAS, 118 | include_package_data=True, 119 | license='MIT', 120 | classifiers=[ 121 | # Trove classifiers 122 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers 123 | 'License :: OSI Approved :: MIT License', 124 | 'Programming Language :: Python', 125 | 'Programming Language :: Python :: 3', 126 | 'Programming Language :: Python :: 3.6', 127 | 'Programming Language :: Python :: Implementation :: CPython', 128 | 'Programming Language :: Python :: Implementation :: PyPy' 129 | ], 130 | # $ setup.py publish support. 131 | cmdclass={ 132 | 'upload': UploadCommand, 133 | }, 134 | ) 135 | -------------------------------------------------------------------------------- /tao/tao/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/tao/tao/__init__.py -------------------------------------------------------------------------------- /tao/tao/toolkit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/tao/tao/toolkit/__init__.py -------------------------------------------------------------------------------- /tao/tao/toolkit/tao/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from .tao import Tao 3 | from .results import TaoResults 4 | from .eval import TaoEval 5 | 6 | logging.basicConfig( 7 | format="[%(asctime)s] %(name)s %(levelname)s: %(message)s", 8 | datefmt="%m/%d %H:%M:%S", 9 | level=logging.WARN, 10 | ) 11 | 12 | __all__ = ["Tao", "TaoResults", "TaoEval"] 13 | -------------------------------------------------------------------------------- /tao/tao/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/tao/tao/utils/__init__.py -------------------------------------------------------------------------------- /tao/tao/utils/detectron2/datasets.py: -------------------------------------------------------------------------------- 1 | from detectron2.data.datasets import register_coco_instances 2 | 3 | 4 | def register_datasets(): 5 | register_coco_instances( 6 | "lvis_v0.5_coco_2017_train", {}, 7 | "data/detectron_datasets/lvis-coco/lvis-0.5_coco2017_train.json", 8 | "data/detectron_datasets/lvis-coco/train2017") 9 | -------------------------------------------------------------------------------- /tao/tao/utils/download.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from hashlib import md5 3 | from multiprocessing import Pool 4 | from pathlib import Path 5 | 6 | from tqdm import tqdm 7 | 8 | from tao.utils.video import dump_frames 9 | 10 | 11 | def dump_frames_star(task): 12 | return dump_frames(*task) 13 | 14 | 15 | def dump_tao_frames(videos, 16 | output_dirs, 17 | workers, 18 | tqdm_desc='Converting to frames'): 19 | fps = None 20 | extension = '.jpg' 21 | jpeg_qscale = 2 22 | 23 | for output_dir in output_dirs: 24 | Path(output_dir).mkdir(exist_ok=True, parents=True) 25 | 26 | dump_frames_tasks = [] 27 | for video_path, output_dir in zip(videos, output_dirs): 28 | dump_frames_tasks.append( 29 | (video_path, output_dir, fps, extension, jpeg_qscale)) 30 | 31 | # dump_frames code logs when, e.g., the expected number of frames does not 32 | # match the number of dumped frames. But these logs can have false 33 | # positives that are confusing, so we check that frames are correctly 34 | # dumped ourselves separately based on frames in TAO annotations. 35 | _log_level = logging.root.level 36 | logging.root.setLevel(logging.ERROR) 37 | if workers > 1: 38 | pool = Pool(workers) 39 | try: 40 | list( 41 | tqdm(pool.imap_unordered(dump_frames_star, dump_frames_tasks), 42 | total=len(dump_frames_tasks), 43 | leave=False, 44 | desc=tqdm_desc)) 45 | except KeyboardInterrupt: 46 | print('Parent received control-c, exiting.') 47 | pool.terminate() 48 | else: 49 | for task in tqdm(dump_frames_tasks): 50 | dump_frames_star(task) 51 | logging.root.setLevel(_log_level) 52 | 53 | 54 | def frame_checksums_diff(frames_dir, checksums, early_exit=False): 55 | missing = [] 56 | mismatch = [] 57 | 58 | checksums = {k.replace('.jpeg', '.jpg'): v for k, v in checksums.items()} 59 | extra = [x for x in frames_dir.rglob('.jpg') if x.name not in checksums] 60 | 61 | for frame, cksum in checksums.items(): 62 | path = frames_dir / frame 63 | if not path.exists(): 64 | missing.append(path) 65 | if early_exit: 66 | break 67 | if cksum: 68 | with open(path, 'rb') as f: 69 | md5_digest = md5(f.read()).hexdigest() 70 | if md5_digest != cksum: 71 | # path, seen, expected 72 | mismatch.append((path, md5_digest, cksum)) 73 | if early_exit: 74 | break 75 | return missing, mismatch, extra 76 | 77 | 78 | def are_tao_frames_dumped(frames_dir, checksums, warn=True, allow_extra=True): 79 | missing, mismatch, extra = frame_checksums_diff(frames_dir, 80 | checksums, 81 | early_exit=True) 82 | if allow_extra: 83 | extra = [] 84 | if warn and extra: 85 | logging.warning(f'Unexpected frame at {extra[0]}!') 86 | if warn and missing: 87 | logging.warning(f'Could not find frame at {missing[0]}!') 88 | if warn and mismatch: 89 | path, seen, expected = mismatch[0] 90 | logging.warning( 91 | f'Checksum for {path} did not match! ' 92 | f'Expected: {expected}, saw: {seen}') 93 | return not mismatch and not missing and not extra 94 | 95 | 96 | def remove_non_tao_frames(frames_dir, keep_frames): 97 | frames = {x.split('.')[0] for x in keep_frames} 98 | extracted_frames = list(frames_dir.glob('*.jpg')) 99 | to_remove = [x for x in extracted_frames if x.stem not in frames] 100 | assert len(to_remove) != len(extracted_frames) 101 | for frame in to_remove: 102 | frame.unlink() 103 | -------------------------------------------------------------------------------- /tao/tao/utils/fs.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | 4 | IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm'] 5 | VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mkv', '.mov'] 6 | 7 | 8 | def dir_path(path): 9 | """Wrapper around Path that ensures this directory is created.""" 10 | if not isinstance(path, Path): 11 | path = Path(path) 12 | path.mkdir(exist_ok=True, parents=True) 13 | return path 14 | 15 | 16 | def file_path(path): 17 | """Wrapper around Path that ensures parent directories are created. 18 | 19 | x = mkdir_parents(dir / video_with_dir_prefix) 20 | is short-hand for 21 | x = Path(dir / video_with_dir_prefix) 22 | x.parent.mkdir(exist_ok=True, parents=True) 23 | """ 24 | if not isinstance(path, Path): 25 | path = Path(path) 26 | path.resolve().parent.mkdir(exist_ok=True, parents=True) 27 | return path 28 | 29 | 30 | def glob_ext(path, extensions, recursive=False): 31 | if not isinstance(path, Path): 32 | path = Path(path) 33 | if recursive: 34 | # Handle one level of symlinks. 35 | path_children = list(path.glob('*')) 36 | all_files = list(path_children) 37 | for x in path_children: 38 | if x.is_dir(): 39 | all_files += x.rglob('*') 40 | else: 41 | all_files = path.glob('*') 42 | return [ 43 | x for x in all_files if any(x.name.endswith(y) for y in extensions) 44 | ] 45 | 46 | 47 | def find_file_extensions(folder, stem, possible_extensions): 48 | if not isinstance(folder, Path): 49 | folder = Path(folder) 50 | for ext in possible_extensions: 51 | if ext[0] != '.': 52 | ext = f'.{ext}' 53 | path = folder / f'{stem}{ext}' 54 | if path.exists(): 55 | return path 56 | return None 57 | 58 | 59 | def is_image_file(filename): 60 | """Checks if a file is an image. 61 | 62 | Args: 63 | filename (string): path to a file 64 | Returns: 65 | bool: True if the filename ends with a known image extension 66 | """ 67 | filename_lower = filename.lower() 68 | return any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS) 69 | 70 | 71 | def simple_table(rows): 72 | lengths = [ 73 | max(len(row[i]) for row in rows) + 1 for i in range(len(rows[0])) 74 | ] 75 | row_format = ' '.join(('{:<%s}' % length) for length in lengths[:-1]) 76 | row_format += ' {}' # The last column can maintain its length. 77 | 78 | output = '' 79 | for i, row in enumerate(rows): 80 | if i > 0: 81 | output += '\n' 82 | output += row_format.format(*row) 83 | return output 84 | 85 | 86 | def parse_bool(arg): 87 | """Parse string to boolean. 88 | Using type=bool in argparse does not do the right thing. E.g. 89 | '--bool_flag False' will parse as True. See 90 | 91 | 92 | Usage: 93 | parser.add_argument( '--choice', type=parse_bool) 94 | """ 95 | if arg == 'True': 96 | return True 97 | elif arg == 'False': 98 | return False 99 | else: 100 | raise argparse.ArgumentTypeError("Expected 'True' or 'False'.") 101 | -------------------------------------------------------------------------------- /tao/tao/utils/misc.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import pickle 4 | 5 | from pathlib import Path 6 | from scipy.io import loadmat 7 | from tqdm import tqdm 8 | 9 | from tao.utils import misc 10 | 11 | 12 | def parse_bool(arg): 13 | """Parse string to boolean. 14 | 15 | Using type=bool in argparse does not do the right thing. E.g. 16 | '--bool_flag False' will parse as True. See 17 | 18 | """ 19 | if arg == 'True': 20 | return True 21 | elif arg == 'False': 22 | return False 23 | else: 24 | raise argparse.ArgumentTypeError("Expected 'True' or 'False'.") 25 | 26 | 27 | def load_detection_mat(mat): 28 | dictionary = {} 29 | f = loadmat(mat)['x'] 30 | result = {} 31 | # Assume mat files are of the format (x0, y0, x1, y1, label, score) 32 | if f.shape[1] == 6: 33 | result['pred_boxes'] = [[x[0], x[1], x[2], x[3]] for x in f[:, :4]] 34 | result['scores'] = [x for x in f[:, 5]] 35 | result['pred_classes'] = [x for x in f[:, 4]] 36 | elif f.shape[1] > 6: 37 | # Assume mat files are of the format 38 | # (x0, y0, x1, y1, label1_score, label2_score, ..., labeln_score) 39 | result['pred_boxes'] = [[x[0], x[1], x[2], x[3]] for x in f[:, :4]] 40 | result['scores'] = [] 41 | result['pred_classes'] = [] 42 | for box in f: 43 | label = box[4:].argmax() 44 | result['pred_classes'].append(label) 45 | result['scores'].append(box[label+4]) 46 | dictionary['instances'] = result 47 | return dictionary 48 | 49 | 50 | def load_detection_dir_as_results(root, 51 | annotations, 52 | detections_format='pickle', 53 | include_masks=False, 54 | score_threshold=None, 55 | max_dets_per_image=None, 56 | show_progress=False): 57 | """Load detections from dir as a results.json dict.""" 58 | if not isinstance(root, Path): 59 | root = Path(root) 60 | ext = { 61 | 'pickle': '.pickle', 62 | 'pkl': '.pkl', 63 | 'mat': '.mat' 64 | }[detections_format] 65 | bbox_annotations = [] 66 | if include_masks: 67 | segmentation_annotations = [] 68 | 69 | for image in tqdm(annotations['images'], 70 | desc='Collecting annotations', 71 | disable=not show_progress): 72 | path = (root / f'{image["file_name"]}').with_suffix(ext) 73 | if not path.exists(): 74 | logging.warn(f'Could not find detections for image ' 75 | f'{image["file_name"]} at {path}; skipping...') 76 | continue 77 | if detections_format in ('pickle', 'pkl'): 78 | with open(path, 'rb') as f: 79 | detections = pickle.load(f) 80 | else: 81 | detections = misc.load_detection_mat(path) 82 | 83 | num_detections = len(detections['instances']['scores']) 84 | indices = sorted(range(num_detections), 85 | key=lambda i: detections['instances']['scores'][i], 86 | reverse=True) 87 | 88 | if max_dets_per_image is not None: 89 | indices = indices[:max_dets_per_image] 90 | 91 | for idx in indices: 92 | entry = detections['instances']['pred_boxes'][idx] 93 | x1 = entry[0] 94 | y1 = entry[1] 95 | x2 = entry[2] 96 | y2 = entry[3] 97 | bbox = [int(x1), int(y1), int(x2-x1), int(y2-y1)] 98 | 99 | category = int(detections['instances']['pred_classes'][idx] + 1) 100 | score = detections['instances']['scores'][idx] 101 | if score_threshold is not None and score < score_threshold: 102 | continue 103 | 104 | try: 105 | score = score.item() 106 | except AttributeError: 107 | pass 108 | 109 | bbox_annotations.append({ 110 | 'image_id': image['id'], 111 | 'category_id': category, 112 | 'bbox': bbox, 113 | 'score': score, 114 | }) 115 | if include_masks: 116 | segmentation_annotations.append({ 117 | 'image_id': image['id'], 118 | 'category_id': category, 119 | 'segmentation': detections['instances']['pred_masks'][idx], 120 | 'score': score 121 | }) 122 | if include_masks: 123 | return bbox_annotations, segmentation_annotations 124 | else: 125 | return bbox_annotations 126 | -------------------------------------------------------------------------------- /tao/tao/utils/parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/permatrack/db160887a7817acc563e09c4f5b47cd51eac5820/tao/tao/utils/parallel/__init__.py -------------------------------------------------------------------------------- /tao/tao/utils/parallel/fixed_gpu_pool.py: -------------------------------------------------------------------------------- 1 | import multiprocessing as mp 2 | from tao.utils.parallel.pool_context import PoolWithContext 3 | 4 | 5 | class FixedGpuPool: 6 | """Pool where each process is attached to a specific GPU. 7 | 8 | Usage: 9 | def init(args, context): 10 | context['init_return'] = 'init' 11 | def run(args, context): 12 | return (context['gpu'], context['init_return'], args) 13 | p = FixedGpuPool([0, 1, 2, 3], init, None) 14 | print(p.map(run, ['task1', 'task2', 'task3'])) 15 | # [(0, 'init', 'task1'), (1, 'init', 'task2'), (2, 'hi', 'task3')] 16 | # NOTE: GPUs may be in different order 17 | """ 18 | 19 | def __init__(self, gpus, initializer=None, initargs=None): 20 | gpu_queue = mp.Manager().Queue() 21 | for gpu in gpus: 22 | gpu_queue.put(gpu) 23 | self.pool = PoolWithContext( 24 | len(gpus), _FixedGpuPool_init, (gpu_queue, initializer, initargs)) 25 | 26 | def map(self, task_fn, tasks): 27 | return self.pool.map(_FixedGpuPool_run, 28 | ((task_fn, task) for task in tasks)) 29 | 30 | def imap_unordered(self, task_fn, tasks): 31 | return self.pool.imap_unordered(_FixedGpuPool_run, 32 | ((task_fn, task) for task in tasks)) 33 | 34 | def close(self): 35 | self.pool.close() 36 | 37 | 38 | def _FixedGpuPool_init(args, context): 39 | gpu_queue, initializer, initargs = args 40 | context['gpu'] = gpu_queue.get() 41 | initializer(initargs, context=context) 42 | 43 | 44 | def _FixedGpuPool_run(args, context): 45 | task_fn, task_args = args 46 | return task_fn(task_args, context=context) 47 | 48 | 49 | if __name__ == "__main__": 50 | def _test_gpu_init(args, context): 51 | context['init_return'] = 'init' 52 | 53 | def _test_gpu_run(args, context): 54 | return (context['gpu'], context['init_return'], args) 55 | 56 | p = FixedGpuPool([0, 1, 2, 3], _test_gpu_init, 'init arg') 57 | print(p.map(_test_gpu_run, ['task1', 'task2', 'task3'])) 58 | -------------------------------------------------------------------------------- /tao/tao/utils/parallel/pool_context.py: -------------------------------------------------------------------------------- 1 | import multiprocessing as mp 2 | from collections.abc import Iterable 3 | 4 | 5 | _PoolWithContext_context = None 6 | 7 | 8 | def _PoolWithContext_init(initializer, init_args): 9 | global _PoolWithContext_context 10 | _PoolWithContext_context = {} 11 | if init_args is None: 12 | initializer(context=_PoolWithContext_context) 13 | else: 14 | initializer(init_args, context=_PoolWithContext_context) 15 | 16 | 17 | def _PoolWithContext_run(args): 18 | task_fn, task_args = args 19 | return task_fn(task_args, context=_PoolWithContext_context) 20 | 21 | 22 | class PoolWithContext: 23 | """Like multiprocessing.Pool, but pass output of initializer to map fn. 24 | 25 | Usage: 26 | def init(context): 27 | context['init_return'] = 'init' 28 | def run(args, context): 29 | return (context['init_return'], args) 30 | p = PoolWithContext(4, init) 31 | print(p.map(run, ['task1', 'task2', 'task3'])) 32 | # [('init', 'task1'), ('init', 'task2'), ('init', 'task3')] 33 | # NOTE: GPUs may be in different order 34 | """ 35 | def __init__(self, num_workers, initializer, initargs=None): 36 | self.pool = mp.Pool( 37 | num_workers, 38 | initializer=_PoolWithContext_init, 39 | initargs=(initializer, initargs)) 40 | 41 | def map(self, task_fn, tasks): 42 | return self.pool.map(_PoolWithContext_run, 43 | ((task_fn, task) for task in tasks)) 44 | 45 | def close(self): 46 | self.pool.close() 47 | 48 | def imap_unordered(self, task_fn, tasks): 49 | return self.pool.imap_unordered(_PoolWithContext_run, 50 | ((task_fn, task) for task in tasks)) 51 | 52 | 53 | if __name__ == "__main__": 54 | def _test_init(context): 55 | context['init_return'] = 'hi' 56 | 57 | def _test_init_2(context): 58 | context['hello'] = 2 59 | 60 | def _test_run(args, context): 61 | return (args, context['init_return']) 62 | 63 | def _test_run_2(args, context): 64 | return (args, context) 65 | 66 | p = PoolWithContext(4, _test_init) 67 | p2 = PoolWithContext(4, _test_init_2) 68 | print(p.map(_test_run, ['task1', 'task2', 'task3'])) 69 | print(p2.map(_test_run_2, ['task1', 'task2', 'task3'])) 70 | -------------------------------------------------------------------------------- /tao/tao/utils/yacs_util.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import yaml 4 | from typing import Any, Dict 5 | 6 | from yacs.config import CfgNode 7 | from yacs.config import _valid_type, _VALID_TYPES 8 | 9 | 10 | BASE_KEY = "_BASE_" 11 | 12 | 13 | def _load_yaml_with_base(filename: str, allow_unsafe: bool = False) -> CfgNode: 14 | """ 15 | Just like `yaml.load(open(filename))`, but inherit attributes from its 16 | `_BASE_`. 17 | 18 | Modified from 19 | https://github.com/facebookresearch/fvcore/blob/99cb965c67e675dc3259cd490c1dd78ab03a55ff/fvcore/common/config.py 20 | 21 | Args: 22 | filename (str): the file name of the current config. Will be used to 23 | find the base config file. 24 | allow_unsafe (bool): whether to allow loading the config file with 25 | `yaml.unsafe_load`. 26 | Returns: 27 | (dict): the loaded yaml 28 | """ 29 | with open(filename, "r") as f: 30 | try: 31 | cfg = yaml.safe_load(f) 32 | except yaml.constructor.ConstructorError: 33 | if not allow_unsafe: 34 | raise 35 | logger = logging.getLogger(__name__) 36 | logger.warning( 37 | "Loading config {} with yaml.unsafe_load. Your machine may " 38 | "be at risk if the file contains malicious content.".format( 39 | filename 40 | ) 41 | ) 42 | f.close() 43 | with open(filename, "r") as f: 44 | cfg = yaml.unsafe_load(f) # pyre-ignore 45 | 46 | if cfg is None: 47 | return cfg 48 | 49 | # pyre-ignore 50 | def merge_a_into_b(a: Dict[Any, Any], b: Dict[Any, Any]) -> None: 51 | # merge dict a into dict b. values in a will overwrite b. 52 | for k, v in a.items(): 53 | if isinstance(v, dict) and k in b: 54 | assert isinstance( 55 | b[k], dict), "Cannot inherit key '{}' from base!".format(k) 56 | merge_a_into_b(v, b[k]) 57 | else: 58 | b[k] = v 59 | 60 | if BASE_KEY in cfg: 61 | base_cfg_file = cfg[BASE_KEY] 62 | if base_cfg_file.startswith("~"): 63 | base_cfg_file = os.path.expanduser(base_cfg_file) 64 | if not any(map(base_cfg_file.startswith, 65 | ["/", "https://", "http://"])): 66 | # the path to base cfg is relative to the config file itself. 67 | base_cfg_file = os.path.join(os.path.dirname(filename), 68 | base_cfg_file) 69 | base_cfg = _load_yaml_with_base(base_cfg_file, 70 | allow_unsafe=allow_unsafe) 71 | del cfg[BASE_KEY] 72 | if base_cfg is None: 73 | return cfg 74 | 75 | merge_a_into_b(cfg, base_cfg) # pyre-ignore 76 | return base_cfg 77 | return cfg 78 | 79 | 80 | def merge_from_file_with_base(cfg, 81 | cfg_filename: str, 82 | allow_unsafe: bool = False) -> None: 83 | """ 84 | Merge configs from a given yaml file. 85 | 86 | Modified from 87 | https://github.com/facebookresearch/fvcore/blob/99cb965c67e675dc3259cd490c1dd78ab03a55ff/fvcore/common/config.py 88 | 89 | Args: 90 | cfg_filename: the file name of the yaml config. 91 | allow_unsafe: whether to allow loading the config file with 92 | `yaml.unsafe_load`. 93 | """ 94 | loaded_cfg = _load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe) 95 | loaded_cfg = type(cfg)(loaded_cfg) 96 | cfg.merge_from_other_cfg(loaded_cfg) 97 | 98 | 99 | def cfg_to_dict(cfg_node, key_list=[]): 100 | if not isinstance(cfg_node, CfgNode): 101 | assert _valid_type(cfg_node), ( 102 | "Key {} with value {} is not a valid type; valid types: {}".format( 103 | ".".join(key_list), type(cfg_node), _VALID_TYPES)) 104 | return cfg_node 105 | else: 106 | cfg_dict = dict(cfg_node) 107 | for k, v in cfg_dict.items(): 108 | cfg_dict[k] = cfg_to_dict(v, key_list + [k]) 109 | return cfg_dict 110 | 111 | 112 | def cfg_to_flat_dict(cfg_node, key_list=[]): 113 | if not isinstance(cfg_node, CfgNode): 114 | assert _valid_type(cfg_node), ( 115 | "Key {} with value {} is not a valid type; valid types: {}".format( 116 | ".".join(key_list), type(cfg_node), _VALID_TYPES)) 117 | return cfg_node 118 | else: 119 | cfg_dict_flat = {} 120 | for k, v in dict(cfg_node).items(): 121 | updated = cfg_to_dict(v, key_list + [k]) 122 | if isinstance(updated, dict): 123 | for k1, v1 in updated.items(): 124 | cfg_dict_flat['.'.join(key_list + [k, k1])] = v1 125 | else: 126 | cfg_dict_flat['.'.join(key_list + [k])] = updated 127 | return cfg_dict_flat 128 | --------------------------------------------------------------------------------