├── .gitattributes
├── .gitignore
├── LICENSE
├── Metrics.py
├── README.md
├── Visualization
    ├── Vis_Input.py
    ├── Vis_Res.py
    └── __init__.py
├── __init__.py
├── assets
    └── demo.gif
├── configs
    ├── TubeTK_resnet_50_FPN_8frame_1stride.yaml
    ├── __init__.py
    ├── default.py
    ├── get_MOT17_tube.yaml
    └── get_jta_tube.yaml
├── dataset
    ├── Parsers
    │   ├── JTA.py
    │   ├── MOT17.py
    │   ├── __init__.py
    │   └── structures.py
    ├── __init__.py
    ├── augmentation.py
    ├── dataLoader.py
    ├── jta.py
    ├── mot17.py
    └── mot17jta.py
├── demo.py
├── evaluate.py
├── fetch_models.sh
├── install
    └── compile_local.sh
├── launch.py
├── main.py
├── network
    ├── __init__.py
    ├── focal_loss.py
    ├── fpn.py
    ├── resnet.py
    ├── track_head.py
    ├── tubetk.py
    └── utils.py
├── optim
    ├── __init__.py
    ├── lr_scheduler.py
    └── solver.py
├── post_processing
    ├── __init__.py
    ├── nms
    │   ├── __init__.py
    │   ├── setup.py
    │   └── src
    │   │   ├── nms_cpu.cpp
    │   │   ├── nms_cuda.cpp
    │   │   ├── nms_kernel.cu
    │   │   └── soft_nms_cpu.pyx
    ├── tube_iou_matching.py
    ├── tube_iou_matching_old.py
    ├── tube_iou_matching_super_old.py
    └── tube_nms.py
├── pre_processing
    ├── __init__.py
    ├── get_tubes_MOT17.py
    └── get_tubes_jta.py
├── requirements.txt
├── seqmaps
    ├── AVG-TownCentre.txt
    ├── JTA_train_turning
    ├── MOT15_test.txt
    ├── MOT17-01-FRCNN.txt
    ├── MOT17-02-FRCNN.txt
    ├── MOT17-04-FRCNN.txt
    ├── MOT17-13-FRCNN.txt
    ├── MOT17-14-FRCNN.txt
    ├── MOT17_test.txt
    ├── MOT17_train.txt
    └── PETS09-S2L2.txt
└── utils
    ├── __init__.py
    ├── mem_track.py
    └── util.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | ### Example user template template
3 | ### Example user template
4 | 
5 | # IntelliJ project files
6 | .idea
7 | *.iml


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Bo Pang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # TubeTK
  2 | 
  3 | TubeTK is an one-step end-to-end multi-object tracking method, which is the **first end-to-end** open-source system that achieves **60+ MOTA** on MOT-16 (64 MOTA) and MOT-17 (63 MOTA) datasets. 
  4 | Our paper "[TubeTK: Adopting Tubes to Track Multi-Object in a One-Step Training Model](https://bopang1996.github.io/posts/2020/04/tubeTKpaper/)" is accepted as an oral paper on CVPR-2020.
  5 | 
  6 | 
  7 | 
  8 | # Contents
  9 | 
 10 | - [TubeTK](#TubeTK)
 11 | - [Contents](#Contents)
 12 | - [Results](#Results)
 13 |   - [MOT-16](#MOT-16)
 14 |   - [MOT-17](#MOT-17)
 15 | - [Installation](#Installation)
 16 | - [Quick Start](#Quick-Start)
 17 |   - [Demo](#Demo)
 18 |   - [Evaluation](#Evaluation-on-MOT-17-(16))
 19 |   - [Train](#Train-on-MOT-17-(16))
 20 | 
 21 | # Results
 22 | 
 23 | ![Demo Video](assets/demo.gif)
 24 | 
 25 | 
 26 | ## MOT-16
 27 | 
 28 | Results on MOT-16 dataset:
 29 | 
 30 | | Video         | MOTA | IDF1 | MT   | ML   | FP   | FN   | IDS  |
 31 | | ------------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
 32 | | MOT16-01 | 48.9 | 45.5 | 8 | 9 | 175 | 3052 | 40|
 33 | | MOT16-03 | 76.3 | 69.5 | 86 | 12 | 3741 | 20828 | 177|
 34 | | MOT16-06 | 51.2 | 55.7 | 87 | 39 | 1863 | 3542 | 231|
 35 | | MOT16-07 | 55.0 | 43.5 | 21 | 3 | 2225 | 4938 | 190|
 36 | | MOT16-08 | 46.9 | 37.3 | 18 | 3 | 1694 | 6952 | 234|
 37 | | MOT16-12 | 52.4 | 50.8 | 27 | 20 | 533 | 3366 | 51|
 38 | | MOT16-14 | 35.8 | 39.8 | 7 | 61 | 731 | 10948 | 194|
 39 | | TubeTK (Mean) | 64.0 | 59.4 | 33.5 | 19.4 | 10962   | 53626 | 1117 |
 40 | | RAN | 63.0 | 63.8 | 39.9 | 22.1   |   13663    |  53248 | 482 |
 41 | | Tracktor | 54.5 | 52.5 | 19.0 | 36.9 | 3280 | 79149 | 682 |
 42 | 
 43 | 
 44 | 
 45 | ## MOT-17
 46 | Results on MOT-17 dataset:
 47 | 
 48 | | Video         | MOTA | IDF1 | MT   | ML   | FP   | FN   | IDS  |
 49 | | ------------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
 50 | | MOT17-01 | 47.9 | 44.9 | 6 | 10 | 167 | 3154 | 41|
 51 | | MOT17-03 | 76.4 | 69.6 | 81 | 12 | 3181 | 21287 | 186|
 52 | | MOT17-06 | 52.4 | 54.8 | 85 | 36 | 1609 | 3699 | 307|
 53 | | MOT17-07 | 55.4 | 43.3 | 21 | 2 | 1944 | 5371 | 222|
 54 | | MOT17-08 | 42.3 | 34.1 | 18 | 12 | 970 | 10889 | 319|
 55 | | MOT17-12 | 50.3 | 49.4 | 28 | 23 | 494 | 3749 | 63|
 56 | | MOT17-14 | 35.6 | 39.5 | 6 | 61 | 655 | 11012 | 241|
 57 | | TubeTK (Mean) | 63.0 |58.6 | 31.2 | 19.9 | 27060 |177483 | 4137 |
 58 | | SCNet | 60.0 | 54.4 | 34.4 | 16.2 | 72230 | 145851 | 7611 |
 59 | | Tracktor | 53.5 | 52.3 | 19.5 | 36.3 | 12201 | 248047   |  2072 |
 60 | 
 61 | 
 62 | 
 63 | # Installation
 64 | 
 65 | 1. Get the code and build related modules:
 66 | 
 67 |     ```Shell
 68 |       git clone ...(TO BE CONFIRM)
 69 |       cd TubeTK/install
 70 |       ./compile.sh
 71 |       # if something wrong, try:
 72 |       # sudo ldconfig <path/to/cuda>/lib64
 73 |       cd ..
 74 |     ```
 75 | 
 76 | 2.  Install [pytorch 1.10]( https://pytorch.org/ ) and other dependencies:
 77 | 
 78 |    ```Shell
 79 |    pip install -r requirements.txt
 80 |    ```
 81 | 
 82 | 
 83 | 3. If the memory of your GPU < 16G, then you need [NVIDIA APEX]( https://github.com/nvidia/apex ) to conduct the mixed precision training. 
 84 | 
 85 |    1. Install Apex:
 86 | 
 87 |    ```Shell
 88 |    git clone https://github.com/NVIDIA/apex
 89 |    cd apex
 90 |    pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
 91 |    # if something wrong with the above pip install, try:
 92 |    # pip install -v --no-cache-dir ./
 93 |    ```
 94 | 
 95 |    2. We provide the `--apex` option to train with the APEX, see [Quick Start](#quick-start) for detail.
 96 |    
 97 | 4. Run `fetch_model.sh` to download our pre-trained models. Or download the models manually and put them in `./models`: 
 98 | 
 99 | 
100 |       1. 3DResNet50_original ([Baidu pan](https://pan.baidu.com/s/13GHBQlpugHmhMDG9pQ0_Sw) | [Google drive](https://drive.google.com/open?id=1jLgyNmiZ_c-m8Cw3NcZTEPTf6VESfIzK))
101 |       <!---2. 3DResNet50_small ([Baidu pan]() | [Google drive]())-->
102 | 
103 | 
104 | 
105 | # Quick Start
106 | 
107 | ## Demo
108 | 
109 | Run TubeTK for a video and visualization the results with:
110 | 
111 | ```Shell
112 | python launch.py --nproc_per <num of GPU> --training_script demo.py --batch_size=3 --config configs/TubeTK_resnet_50_FPN_8frame_1stride.yaml --video_url <folder/to/the/videos> --output_dir ./vis_video
113 | ```
114 | 
115 | 
116 | 
117 | ## Evaluation on MOT-17 (16)
118 | 
119 | 1. Download the data from [MOT Challenge](https://motchallenge.net/data/MOT17/  ), and put or link it to `./data`
120 | 
121 | 2. To get the tracking result with:
122 | 
123 |    ```Shell
124 |    python launch.py --nproc_per <num of GPU> --training_script evaluate.py --batch_size 3 --config configs/TubeTK_resnet_50_FPN_8frame_1stride.yaml --trainOrTest test
125 |    ```
126 | 
127 | 3. To get the visualization with: 
128 | 
129 |    ```Shell
130 |    python Visualization/Vis_Res.py --mode test
131 |    ```
132 | 
133 |    The visualization videos are stored in `./vis_video` .
134 | 
135 | 
136 | 
137 | ## Train on MOT-17 (16)
138 | 
139 | 1. Download the data from [MOT Challenge](https://motchallenge.net/data/MOT17/  ), and put or link it to `./data`
140 | 
141 | 2. Get the ground truth Btubes with:
142 | 
143 |    ```Shell
144 |    python ./pre_processing/get_tubes_MOT17.py
145 |    ```
146 | 
147 | 3. Train the model with:
148 | 
149 |    ```Shell
150 |    python launch.py --nproc_per <num of GPU> --training_script main.py --batch_size 1 --config ./configs/TubeTK_resnet_50_FPN_8frame_1stride.yaml
151 |    ```
152 | 
153 |    If out of memory, try:
154 | 
155 |    ```Shell
156 |    python launch.py --nproc_per <num of GPU> --training_script main.py --batch_size 1 --config ./configs/TubeTK_resnet_50_FPN_8frame_1stride.yaml --apex
157 |    ```
158 | 
159 |    If still out of memory, modify the configuration file: `TubeTK_resnet_50_FPN_8frame_1stride.yaml`:
160 | 
161 |    ```
162 |    tube_limit: 500  # or 300
163 |    ```
164 | 
165 | ## Citation
166 | 
167 |    ```
168 |    @inproceedings{pang2020tubetk,
169 |       title={TubeTK: Adopting Tubes to Track Multi-Object in a One-Step Training Model},
170 |       author={Pang, Bo and Li, Yizhuo and Zhang, Yifan and Li, Muchen and Lu, Cewu},
171 |       booktitle={CVPR},
172 |       year={2020}
173 |    }
174 |    ```
175 | 
176 | ## License
177 | 
178 | TubeTK is freely available for free non-commercial use, and may be redistributed under these conditions.
179 | 


--------------------------------------------------------------------------------
/Visualization/Vis_Input.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import dataset.augmentation as argument
  3 | import random
  4 | import cv2
  5 | import torch
  6 | 
  7 | 
  8 | class VisArgumentation(object):
  9 |     def __init__(self, size=896, mean=(104, 117, 123)):
 10 |         self.mean = mean
 11 |         self.size = size
 12 |         self.augment = argument.Compose([
 13 |             argument.AddMeans(self.mean),
 14 |             argument.ToCV2(),
 15 |             argument.ToAbsCoords()
 16 |         ])
 17 | 
 18 |     def __call__(self, images, img_meta, tubes, labels, start_frame):
 19 |         return self.augment(images, img_meta, tubes, labels, start_frame)
 20 | 
 21 | 
 22 | def get_color():
 23 |     colors = [[i for i in range(0, 250, 20)],
 24 |               [i for i in range(0, 250, 20)],
 25 |               [i for i in range(0, 250, 20)]]
 26 |     for i in range(3):
 27 |         random.shuffle(colors[i])
 28 |     colors = list(zip(*colors))
 29 |     return colors
 30 | 
 31 | 
 32 | def get_inter_box(start_box, end_box, inter_id, end_id):
 33 |     if end_id == 0:
 34 |         return start_box
 35 |     return start_box * (end_id - inter_id) / end_id + end_box * inter_id / end_id
 36 | 
 37 | 
 38 | def tubes2bbox(tubes, colors, tube_len, stride):
 39 |     bboxes = [[] for _ in range(tube_len)]
 40 |     for i, tube in enumerate(tubes):
 41 |         color = colors[i % len(colors)]
 42 |         mid_frame = tube[4] / stride
 43 |         back_frame = (tube[4] - tube[10]) / stride
 44 |         front_frame = (tube[4] + tube[5]) / stride
 45 | 
 46 |         mid_bbox = tube[0:4]
 47 |         back_bbox = tube[11: 15] + mid_bbox
 48 |         front_bbox = tube[6: 10] + mid_bbox
 49 | 
 50 |         for f in range(int(back_frame), int(mid_frame)):
 51 |             bboxes[f].append([get_inter_box(back_bbox, mid_bbox, f - back_frame, mid_frame - back_frame), color])
 52 |         for f in range(int(mid_frame), int(front_frame + 1)):
 53 |             bboxes[f].append([get_inter_box(mid_bbox, front_bbox, f - mid_frame, front_frame - mid_frame), color])
 54 |         # bboxes[int(front_frame)].append([front_bbox, color])
 55 |     return bboxes
 56 | 
 57 | 
 58 | def vis_input(imgs, img_metas, gt_bboxes, gt_labels, start_frame, stride, out_folder):
 59 |     imgs_v, img_metas_v, gt_bboxes_v, gt_labels_v, start_frame_v = \
 60 |         VisArgumentation()(imgs[0], img_metas[0], gt_bboxes[0], gt_labels[0], start_frame[0])
 61 | 
 62 |     for f in range(len(imgs_v)):
 63 |         imgs_c = imgs_v.copy()
 64 |         f_tubes = []
 65 |         for tube in gt_bboxes_v:
 66 |             if round(tube[4]) == f * stride:
 67 |                 f_tubes.append(tube)
 68 |         bboxes = tubes2bbox(f_tubes, get_color(), len(imgs_v), stride=stride)
 69 |         for i in range(len(imgs_c)):
 70 |             f_bboxes = bboxes[i]
 71 |             for bbox in f_bboxes:
 72 |                 cv2.rectangle(imgs_c[i], tuple(bbox[0][0:2]), tuple(bbox[0][2:4]), bbox[1], 1)
 73 |             cv2.imwrite(os.path.join(out_folder, str(i) + '.jpg'), imgs_c[i])
 74 | 
 75 | 
 76 | def tubes2bbox_out(tubes, colors, tube_len, stride):
 77 | 
 78 |     bboxes = [[] for _ in range(tube_len)]
 79 |     for i, tube in enumerate(tubes):
 80 |         color = colors[i % len(colors)]
 81 |         mid_frame = tube[0] / stride
 82 |         back_frame = tube[10] / stride
 83 |         front_frame = tube[5] / stride
 84 | 
 85 |         mid_bbox = tube[1:5]
 86 |         back_bbox = tube[11: 15]
 87 |         front_bbox = tube[6: 10]
 88 | 
 89 |         for f in range(int(back_frame), int(mid_frame)):
 90 |             bboxes[f].append([get_inter_box(back_bbox, mid_bbox, f - back_frame, mid_frame - back_frame), color])
 91 |         for f in range(int(mid_frame), int(front_frame + 1)):
 92 |             bboxes[f].append([get_inter_box(mid_bbox, front_bbox, f - mid_frame, front_frame - mid_frame), color])
 93 |     return bboxes
 94 | 
 95 | 
 96 | def vis_output(imgs, img_metas, gt_bboxes, stride, out_folder):
 97 |     no_use = torch.tensor(gt_bboxes)
 98 |     imgs, img_metas, _, gt_labels, start_frame = \
 99 |         VisArgumentation()(imgs, img_metas, no_use, torch.tensor(1), torch.tensor(1))
100 | 
101 |     gt_bboxes[:, [1, 3, 6, 8, 11, 13]] /= img_metas['img_shape'][2] / img_metas['pad_percent'][0] / img_metas['value_range'] / 1024
102 |     gt_bboxes[:, [2, 4, 7, 9, 12, 14]] /= img_metas['img_shape'][1] / img_metas['pad_percent'][1] / img_metas['value_range'] / 768
103 |     gt_bboxes = gt_bboxes.data.numpy()
104 |     for f in range(len(imgs)):
105 |         imgs_c = imgs.copy()
106 |         f_tubes = []
107 |         for tube in gt_bboxes:
108 |             if round(tube[0]) == f * stride:
109 |                 f_tubes.append(tube)
110 |         bboxes = tubes2bbox_out(f_tubes, get_color(), len(imgs), stride=stride)
111 |         write_folder = os.path.join(out_folder, img_metas['video_name'], str(img_metas['start_frame']), str(f))
112 |         os.makedirs(write_folder, exist_ok=True)
113 |         for i in range(len(imgs_c)):
114 |             f_bboxes = bboxes[i]
115 |             for bbox in f_bboxes:
116 |                 cv2.rectangle(imgs_c[i], tuple(bbox[0][0:2]), tuple(bbox[0][2:4]), bbox[1], 1)
117 |             cv2.imwrite(os.path.join(write_folder, str(i) + '.jpg'), imgs_c[i])
118 | 


--------------------------------------------------------------------------------
/Visualization/Vis_Res.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import os
  3 | import argparse
  4 | import configparser
  5 | import numpy as np
  6 | import pandas as pd
  7 | from tqdm import tqdm
  8 | from natsort import natsorted
  9 | try:
 10 |     import xml.etree.cElementTree as ET
 11 | except ImportError:
 12 |     import xml.etree.ElementTree as ET
 13 | 
 14 | 
 15 | def get_seq_info_from_file_mot(seqName, dataDir):
 16 |     seqFolder = os.path.join(dataDir, seqName)
 17 |     seqInfoFile = os.path.join(dataDir, seqName, 'seqinfo.ini')
 18 |     config = configparser.ConfigParser()
 19 |     config.read(seqInfoFile)
 20 | 
 21 |     imgFolder = config.get('Sequence', 'imDir')
 22 |     frameRate = config.getfloat('Sequence', 'frameRate')
 23 |     F = config.getint('Sequence', 'seqLength')
 24 |     imWidth = config.getint('Sequence', 'imWidth')
 25 |     imHeight = config.getint('Sequence', 'imHeight')
 26 |     imgExt = config.get('Sequence', 'imExt')
 27 | 
 28 |     return imgFolder, frameRate, imWidth, imHeight
 29 | 
 30 | 
 31 | def vis_one_video(res_file, frame_rate, img_width, img_height, img_dir, output_name):
 32 | 
 33 |     try:
 34 |         res = np.loadtxt(res_file, delimiter=',')
 35 |     except:
 36 |         res = np.loadtxt(res_file, delimiter=' ')
 37 |     res[:, 4:6] += res[:, 2:4]
 38 |     res = pd.DataFrame(res)
 39 |     res = res.replace([np.inf, -np.inf], np.nan)
 40 |     res = res.dropna()
 41 | 
 42 |     res_group = res.groupby(0)
 43 | 
 44 |     vid_writer = cv2.VideoWriter(output_name,
 45 |                                  cv2.VideoWriter_fourcc(*"MJPG"), frame_rate, (img_width, img_height))
 46 | 
 47 |     img_names = natsorted(os.listdir(img_dir))
 48 | 
 49 |     color_dict = {}
 50 |     for i, img_name in tqdm(enumerate(img_names), ncols=20):
 51 |         img = cv2.imread(os.path.join(img_dir, img_name))
 52 |         frame = int(os.path.splitext(img_name)[0])
 53 |         if frame not in res_group.groups.keys():
 54 |             vid_writer.write(img)
 55 |             continue
 56 |         bboxes = res_group.get_group(frame).values
 57 |         for bbox in bboxes:
 58 |             if bbox[1] in color_dict:
 59 |                 color = color_dict[bbox[1]]
 60 |             else:
 61 |                 color = np.round(np.random.rand(3) * 255)
 62 |                 color_dict[bbox[1]] = color
 63 |             cv2.rectangle(img, tuple(bbox[4:6].astype(int)), tuple(bbox[2:4].astype(int)), color=color, thickness=3)
 64 |             cv2.putText(img, str(bbox[6]) + ' ' + str(bbox[7])[0:5],
 65 |                         tuple(bbox[2:4].astype(int)), cv2.FONT_HERSHEY_COMPLEX, 1, color, 2)
 66 |         vid_writer.write(img)
 67 |     vid_writer.release()
 68 | 
 69 | 
 70 | def vis_res(args):
 71 |     try:
 72 |         os.makedirs(args.output_dir)
 73 |     except:
 74 |         pass
 75 | 
 76 |     if args.video_list is not None:
 77 |         video_list = open(args.video_list).readlines()
 78 |         video_list = [x.strip() for x in video_list]
 79 |     else:
 80 |         video_list = os.listdir(args.res_dir)
 81 |         video_list = [x for x in video_list if x.endswith('txt')]
 82 |         video_list = [os.path.splitext(x)[0] for x in video_list]
 83 |         video_list = [x for x in video_list if os.path.exists(os.path.join(args.data_dir, args.mode, x))]
 84 | 
 85 |     for vid in video_list:
 86 |         print('Processing {}'.format(vid))
 87 |         res_file = os.path.join(args.res_dir, vid + '.txt')
 88 |         if not os.path.exists(res_file):
 89 |             res_file = os.path.join(args.res_dir, vid, 'gt.txt')
 90 | 
 91 |         if 'JTA' not in args.output_dir:
 92 |             img_folder, frame_rate, img_width, img_height = get_seq_info_from_file_mot(vid, os.path.join(args.data_dir,
 93 |                                                                                                          args.mode))
 94 |         else:
 95 |             img_folder = ''
 96 |             frame_rate = 30
 97 |             img_width = 1920
 98 |             img_height = 1080
 99 | 
100 |         img_dir = os.path.join(args.data_dir, args.mode, vid, img_folder)
101 |         vis_one_video(res_file, frame_rate, img_width, img_height, img_dir, os.path.join(args.output_dir, vid + '.avi'))
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     parser = argparse.ArgumentParser()
106 |     parser.add_argument('--res_dir', default='./link_res', type=str, help="path of the predicted tracks (saved as .txt)")
107 |     parser.add_argument('--output_dir', default='./vis_video', type=str, help='where to save the output video')
108 |     parser.add_argument('--data_dir', default='../data', type=str, help="input dataset path")
109 |     parser.add_argument('--mode', default='test', type=str, help='vis the train or test set')
110 |     parser.add_argument('--video_list', default='./seqmaps/MOT17_test.txt', type=str,
111 |                         help='List for videos to visualize, None for all in res_dir')
112 |     args = parser.parse_args()
113 |     vis_res(args)
114 | 


--------------------------------------------------------------------------------
/Visualization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoPang1996/TubeTK/bcca334c5348f9ae33e04595e1af93cf8351e50e/Visualization/__init__.py


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoPang1996/TubeTK/bcca334c5348f9ae33e04595e1af93cf8351e50e/__init__.py


--------------------------------------------------------------------------------
/assets/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoPang1996/TubeTK/bcca334c5348f9ae33e04595e1af93cf8351e50e/assets/demo.gif


--------------------------------------------------------------------------------
/configs/TubeTK_resnet_50_FPN_8frame_1stride.yaml:
--------------------------------------------------------------------------------
1 | min_visibility: -0.1
2 | forward_frames: 4
3 | frame_stride: 1
4 | # pretrain_model_path: '/home/pb/models/3DTracking/3DFCOS_JTA_iou_withoutRegLoss'
5 | pretrain_model_path: ''
6 | backbone: 'res50'
7 | img_size: [896, 1152]
8 | tube_limit: 700  # if out of memory, you can reduce it, but the performance may be effected


--------------------------------------------------------------------------------
/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoPang1996/TubeTK/bcca334c5348f9ae33e04595e1af93cf8351e50e/configs/__init__.py


--------------------------------------------------------------------------------
/configs/default.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path as osp
 3 | import numpy as np
 4 | from easydict import EasyDict as edict
 5 | INF = 1e8
 6 | 
 7 | __C = edict()
 8 | cfg = __C
 9 | 
10 | # for generating the tubes
11 | __C.min_visibility = -0.1
12 | __C.tube_thre = 0.8
13 | __C.forward_frames = 4
14 | __C.frame_stride = 1
15 | __C.value_range = 1
16 | __C.img_size = [896, 1152]
17 | 
18 | # pretrain
19 | __C.pretrain_model_path = ''
20 | 
21 | # for ResNet
22 | __C.freeze_stages = -1
23 | __C.backbone = 'res50'
24 | 
25 | # for FPN
26 | __C.fpn_features_n = 256
27 | __C.fpn_outs_n = 5
28 | 
29 | # for FCOS head
30 | __C.tube_points = 14
31 | __C.heads_features_n = 256
32 | __C.heads_layers_n = 4
33 | __C.withoutThickCenterness = False
34 | __C.model_stride = [[2, 8],
35 |                     [4, 16],
36 |                     [8, 32],
37 |                     [8, 64],
38 |                     [8, 128]]
39 | __C.regress_range = ([(-1, 0.25), (-1, 0.0714)],
40 |                      [(0.25, 0.5), (0.0714, 0.1428)],
41 |                      [(0.5, 0.75), (0.1428, 0.2857)],
42 |                      [(0.75, INF), (0.2857, 0.5714)],
43 |                      [(0.75, INF), (0.5714, INF)])
44 | 
45 | 
46 | # for loss
47 | __C.reg_loss = 'giou'
48 | __C.tube_limit = 1000
49 | __C.test_nms_pre = 1000
50 | __C.test_nms_max_per_img = 500
51 | __C.test_nms_score_thre = 0.5
52 | __C.test_nms_iou_thre = 0.5
53 | __C.linking_min_iou = 0.4
54 | __C.cos_weight = 0.2
55 | 
56 | 
57 | def _merge_a_into_b(a, b):
58 |     """Merge config dictionary a into config dictionary b, clobbering the
59 |     options in b whenever they are also specified in a.
60 |     """
61 |     if type(a) is not edict:
62 |         return
63 | 
64 |     for k, v in a.items():
65 |         # a must specify keys that are in b
66 |         if k not in b:
67 |             raise KeyError('{} is not a valid config key'.format(k))
68 | 
69 |         # the types must match, too
70 |         old_type = type(b[k])
71 |         if old_type is not type(v):
72 |             if isinstance(b[k], np.ndarray):
73 |                 v = np.array(v, dtype=b[k].dtype)
74 |             else:
75 |                 raise ValueError(('Type mismatch ({} vs. {}) '
76 |                                 'for config key: {}').format(type(b[k]),
77 |                                                             type(v), k))
78 | 
79 |         # recursively merge dicts
80 |         if type(v) is edict:
81 |             try:
82 |                 _merge_a_into_b(a[k], b[k])
83 |             except:
84 |                 print('Error under config key: {}'.format(k))
85 |                 raise
86 |         else:
87 |             b[k] = v
88 | 
89 | 
90 | def cfg_from_file(filename):
91 |     """Load a config file and merge it into the default options."""
92 |     import yaml
93 |     with open(filename, 'r') as f:
94 |         yaml_cfg = edict(yaml.load(f))
95 | 
96 |     _merge_a_into_b(yaml_cfg, __C)
97 | 


--------------------------------------------------------------------------------
/configs/get_MOT17_tube.yaml:
--------------------------------------------------------------------------------
1 | min_visibility: -0.1
2 | tube_thre: 0.8
3 | forward_frames: 4
4 | frame_stride: 1
5 | value_range: 1


--------------------------------------------------------------------------------
/configs/get_jta_tube.yaml:
--------------------------------------------------------------------------------
1 | min_visibility: 0.9
2 | tube_thre: 0.8
3 | forward_frames: 4
4 | frame_stride: 1
5 | value_range: 1


--------------------------------------------------------------------------------
/dataset/Parsers/JTA.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path
  3 | import cv2
  4 | import numpy as np
  5 | import random
  6 | import pickle
  7 | from PIL import Image, ImageFile
  8 | 
  9 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 10 | 
 11 | 
 12 | class GTSingleParser_JTA:
 13 |     def __init__(self, folder,
 14 |                  forward_frames=8,
 15 |                  frame_stride=1,
 16 |                  min_vis=-0.1,
 17 |                  value_range=1,
 18 |                  type='train'):
 19 |         self.type = type
 20 |         self.frame_stride = frame_stride
 21 |         self.value_range = value_range
 22 |         self.img_folder = folder
 23 | 
 24 |         self.forward_frames = forward_frames
 25 |         self.max_frame_index = len(os.listdir(os.path.join(self.img_folder))) - (
 26 |                     self.forward_frames * 2 - 1) * self.frame_stride
 27 | 
 28 |         split_path = folder.split('/')
 29 |         if folder[0] == '/':
 30 |             jta_root = '/' + os.path.join(*split_path[:-3])
 31 |         else:
 32 |             jta_root = os.path.join(*split_path[:-3])
 33 |         type = split_path[-2]
 34 |         video_name = split_path[-1]
 35 | 
 36 |         self.min_vis = min_vis
 37 |         self.jta_root = jta_root
 38 |         self.video_name = video_name
 39 |         if frame_stride != -1:
 40 |             tube_path = os.path.join(jta_root,
 41 |                                      'tubes_' + str(forward_frames) + '_' + str(frame_stride) + '_' + str(min_vis),
 42 |                                      type, video_name)
 43 |             self.tube_folder = tube_path
 44 |             if 's3:' in self.tube_folder:
 45 |                 self.tube_folder = self.tube_folder[:3] + '/' + self.tube_folder[3:]
 46 | 
 47 |             if type == 'train':
 48 |                 assert os.path.exists(os.path.join(self.tube_folder)), 'Tube folder does not exist: ' + str(os.path.join(self.tube_folder))
 49 | 
 50 |     def _getimage(self, frame_index):
 51 |         image_file = os.path.join(self.img_folder, '{}.jpg'.format(frame_index + 1))
 52 |         # return cv2.imread(image_file)
 53 |         for i in range(10):
 54 |             try:
 55 |                 assert os.path.exists(image_file), 'Image does not exist: {}'.format(image_file)
 56 |                 img = cv2.cvtColor(np.asarray(Image.open(image_file).convert("RGB")), cv2.COLOR_RGB2BGR)
 57 |                 break
 58 |             except:
 59 |                 print('READ IMAGE ERROR: ' + str(image_file))
 60 |                 print("IMAGE EXIST: " + str(os.path.exists(image_file)))
 61 |         return img
 62 | 
 63 |     def get_item(self, frame_index):
 64 |         if self.frame_stride == -1:
 65 |             strides = [1, 2, 4]
 66 |             frame_stride = strides[random.randint(0, 2)]
 67 |             tube_path = os.path.join(self.jta_root,
 68 |                                      'tubes_' + str(self.forward_frames) + '_' + str(frame_stride) + '_' + str(self.min_vis),
 69 |                                      self.type, self.video_name)
 70 |             self.tube_folder = tube_path
 71 |             if 's3:' in self.tube_folder:
 72 |                 self.tube_folder = self.tube_folder[:3] + '/' + self.tube_folder[3:]
 73 |             if self.type == 'train':
 74 |                 assert os.path.exists(os.path.join(self.tube_folder)), 'Tube folder does not exist: ' + str(os.path.join(self.tube_folder))
 75 |         else:
 76 |             frame_stride = self.frame_stride
 77 | 
 78 |         start_frame = frame_index
 79 |         max_len = self.forward_frames * 2 * frame_stride
 80 |         tube_file = os.path.join(self.tube_folder, str(start_frame))
 81 |         if self.type == 'train':
 82 |             if not os.path.exists(tube_file):
 83 |                 print(tube_file)
 84 |                 return None, None, None, None, None
 85 | 
 86 |         # get image meta
 87 |         img_meta = {}
 88 |         image = self._getimage(frame_index)
 89 |         if image is None:
 90 |             print(os.path.join(self.img_folder, 'img1/{}.jpg'.format(frame_index + 1)))
 91 |         img_meta['img_shape'] = [max_len, image.shape[0], image.shape[1]]
 92 |         img_meta['value_range'] = self.value_range
 93 |         img_meta['pad_percent'] = [1, 1]  # prepared for padding
 94 |         img_meta['video_name'] = os.path.basename(self.img_folder)
 95 |         img_meta['start_frame'] = start_frame
 96 | 
 97 |         # get image
 98 |         imgs = []
 99 |         for i in range(self.forward_frames * 2):
100 |             frame_index = start_frame + i * frame_stride
101 |             image = self._getimage(frame_index)  # h, w, c
102 |             imgs.append(image)
103 | 
104 |         # get_tube
105 |         tubes = np.zeros((1, 15))
106 |         if self.type == 'train':
107 |             tubes = pickle.load(open(tube_file, 'rb'))
108 | 
109 |         num_dets = len(tubes)
110 |         labels = np.ones((num_dets, 1))  # only human class
111 | 
112 |         tubes = np.array(tubes)
113 |         imgs = np.array(imgs)
114 | 
115 |         return imgs, img_meta, tubes, labels, start_frame
116 | 
117 |     def __len__(self):
118 |         return self.max_frame_index
119 | 
120 | 
121 | class GTParser_JTA:
122 |     def __init__(self, jta_root,
123 |                  type='train',
124 |                  forward_frames=4,
125 |                  frame_stride=1,
126 |                  min_vis=-0.1,
127 |                  value_range=1):
128 |         # analsis all the folder in mot_root
129 |         # 1. get all the folders
130 |         jta_root = os.path.join(jta_root, 'imgs', type)
131 |         all_folders = sorted(
132 |             [os.path.join(jta_root, i) for i in os.listdir(jta_root)
133 |              if os.path.isdir(os.path.join(jta_root, i))
134 |              ]
135 |         )
136 |         # 2. create single parser
137 |         self.parsers = [GTSingleParser_JTA(folder, forward_frames=forward_frames, frame_stride=frame_stride,
138 |                                        min_vis=min_vis, value_range=value_range, type=type) for folder in all_folders]
139 | 
140 |         # 3. get some basic information
141 |         self.lens = [len(p) for p in self.parsers]
142 |         self.len = sum(self.lens)
143 | 
144 |     def __len__(self):
145 |         # get the length of all the matching frame
146 |         return self.len
147 | 
148 |     def __getitem__(self, item):
149 |         # 1. find the parser
150 |         total_len = 0
151 |         index = 0
152 |         current_item = item
153 |         for l in self.lens:
154 |             total_len += l
155 |             if item < total_len:
156 |                 break
157 |             else:
158 |                 index += 1
159 |                 current_item -= l
160 | 
161 |         # 2. get items
162 |         if index >= len(self.parsers):
163 |             return None, None, None, None, None
164 |         return self.parsers[index].get_item(current_item)
165 | 
166 | 


--------------------------------------------------------------------------------
/dataset/Parsers/MOT17.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path
  3 | import cv2
  4 | import numpy as np
  5 | import random
  6 | import pickle
  7 | from PIL import Image, ImageFile
  8 | 
  9 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 10 | 
 11 | 
 12 | class GTSingleParser_MOT_17:
 13 |     def __init__(self, folder,
 14 |                  forward_frames=8,
 15 |                  frame_stride=1,
 16 |                  min_vis=-0.1,
 17 |                  value_range=1,
 18 |                  type='train'):
 19 |         self.type = type
 20 |         self.frame_stride = frame_stride
 21 |         self.value_range = value_range
 22 |         self.folder = folder
 23 | 
 24 |         self.min_vis = min_vis
 25 |         self.forward_frames = forward_frames
 26 |         self.max_frame_index = len(os.listdir(os.path.join(self.folder, 'img1'))) - (
 27 |                     self.forward_frames * 2 - 1) * self.frame_stride
 28 | 
 29 |         if frame_stride != -1:
 30 |             self.tube_folder = 'tubes_' + str(forward_frames) + '_' + str(frame_stride) + '_' + str(min_vis)
 31 | 
 32 |             if type == 'train':
 33 |                 assert os.path.exists(os.path.join(self.folder, self.tube_folder)), 'Tube folder does not exist: ' + str(os.path.join(self.folder, self.tube_folder))
 34 | 
 35 |     def _getimage(self, frame_index):
 36 |         image_file = os.path.join(self.folder, 'img1/{0:06}.jpg'.format(frame_index + 1))
 37 |         # return cv2.imread(image_file)
 38 |         for i in range(10):
 39 |             try:
 40 |                 assert os.path.exists(image_file), 'Image does not exist: {}'.format(image_file)
 41 |                 img = cv2.cvtColor(np.asarray(Image.open(image_file).convert("RGB")), cv2.COLOR_RGB2BGR)
 42 |                 break
 43 |             except:
 44 |                 print('READ IMAGE ERROR: ' + str(image_file))
 45 |                 print("IMAGE EXIST: " + str(os.path.exists(image_file)))
 46 |         return img
 47 | 
 48 |     def get_item(self, frame_index):
 49 |         if self.frame_stride == -1:
 50 |             strides = [1,2,4]
 51 |             frame_stride = strides[random.randint(0, 2)]
 52 |             self.tube_folder = 'tubes_' + str(self.forward_frames) + '_' + str(frame_stride) + '_' + str(self.min_vis)
 53 |             if type == 'train':
 54 |                 assert os.path.exists(
 55 |                     os.path.join(self.folder, self.tube_folder)), 'Tube folder does not exist: ' + str(
 56 |                     os.path.join(self.folder, self.tube_folder))
 57 |         else:
 58 |             frame_stride = self.frame_stride
 59 | 
 60 |         start_frame = frame_index
 61 |         max_len = self.forward_frames * 2 * frame_stride
 62 |         tube_file = os.path.join(self.folder, self.tube_folder, str(start_frame))
 63 |         if self.type == 'train':
 64 |             if not os.path.exists(tube_file):
 65 |                 print(tube_file)
 66 |                 return None, None, None, None, None
 67 | 
 68 |         # get image meta
 69 |         img_meta = {}
 70 |         image = self._getimage(frame_index)
 71 |         if image is None:
 72 |             print(os.path.join(self.folder, 'img1/{0:06}.jpg'.format(frame_index + 1)))
 73 |         img_meta['img_shape'] = [max_len, image.shape[0], image.shape[1]]
 74 |         img_meta['value_range'] = self.value_range
 75 |         img_meta['pad_percent'] = [1, 1]  # prepared for padding
 76 |         img_meta['video_name'] = os.path.basename(self.folder)
 77 |         img_meta['start_frame'] = start_frame
 78 | 
 79 |         # get image
 80 |         imgs = []
 81 |         for i in range(self.forward_frames * 2):
 82 |             frame_index = start_frame + i * frame_stride
 83 |             image = self._getimage(frame_index)  # h, w, c
 84 |             imgs.append(image)
 85 | 
 86 |         # get_tube
 87 |         tubes = np.zeros((1, 15))
 88 |         if self.type == 'train':
 89 |             tubes = pickle.load(open(tube_file, 'rb'))
 90 | 
 91 |         num_dets = len(tubes)
 92 |         labels = np.ones((num_dets, 1))  # only human class
 93 | 
 94 |         tubes = np.array(tubes)
 95 |         imgs = np.array(imgs)
 96 | 
 97 |         return imgs, img_meta, tubes, labels, start_frame
 98 | 
 99 |     def __len__(self):
100 |         return self.max_frame_index
101 | 
102 | 
103 | class GTParser_MOT_17:
104 |     def __init__(self, mot_root,
105 |                  type='train',
106 |                  test_seq=None,
107 |                  forward_frames=4,
108 |                  frame_stride=1,
109 |                  min_vis=-0.1,
110 |                  value_range=1):
111 |         # analsis all the folder in mot_root
112 |         # 1. get all the folders
113 |         mot_root = os.path.join(mot_root, type)
114 |         if test_seq is None:
115 |             all_folders = sorted(
116 |                 [os.path.join(mot_root, i) for i in os.listdir(mot_root)
117 |                  if os.path.isdir(os.path.join(mot_root, i))
118 |                  and i.find('FRCNN') != -1]
119 |             )
120 |         else:
121 |             all_folders = sorted(
122 |                 [os.path.join(mot_root, i) for i in os.listdir(mot_root)
123 |                  if os.path.isdir(os.path.join(mot_root, i))
124 |                  and i.find('FRCNN') != -1
125 |                  and i in test_seq]
126 |             )
127 |         # 2. create single parser
128 |         self.parsers = [GTSingleParser_MOT_17(folder, forward_frames=forward_frames, frame_stride=frame_stride,
129 |                                               min_vis=min_vis, value_range=value_range, type=type) for folder in all_folders]
130 | 
131 |         # 3. get some basic information
132 |         self.lens = [len(p) for p in self.parsers]
133 |         self.len = sum(self.lens)
134 | 
135 |     def __len__(self):
136 |         # get the length of all the matching frame
137 |         return self.len
138 | 
139 |     def __getitem__(self, item):
140 |         # 1. find the parser
141 |         total_len = 0
142 |         index = 0
143 |         current_item = item
144 |         for l in self.lens:
145 |             total_len += l
146 |             if item < total_len:
147 |                 break
148 |             else:
149 |                 index += 1
150 |                 current_item -= l
151 | 
152 |         # 2. get items
153 |         if index >= len(self.parsers):
154 |             return None, None, None, None, None
155 |         return self.parsers[index].get_item(current_item)
156 | 


--------------------------------------------------------------------------------
/dataset/Parsers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoPang1996/TubeTK/bcca334c5348f9ae33e04595e1af93cf8351e50e/dataset/Parsers/__init__.py


--------------------------------------------------------------------------------
/dataset/Parsers/structures.py:
--------------------------------------------------------------------------------
 1 | class Node:
 2 |     def __init__(self, box, frame_id, next_fram_id=-1):
 3 |         self.box = box
 4 |         self.frame_id = frame_id
 5 |         self.next_frame_id = next_fram_id
 6 | 
 7 | 
 8 | class Track:
 9 |     def __init__(self, id):
10 |         self.nodes = list()
11 |         self.id = id
12 | 
13 |     def add_node(self, n):
14 |         if len(self.nodes) > 0:
15 |             self.nodes[-1].next_frame_id = n.frame_id
16 |         self.nodes.append(n)
17 | 
18 |     def get_node_by_index(self, index):
19 |         return self.nodes[index]
20 | 
21 | 
22 | class Tracks:
23 |     def __init__(self):
24 |         self.tracks = list()
25 | 
26 |     def add_node(self, node, id):
27 |         node_added = False
28 |         track_index = 0
29 |         node_index = 0
30 |         for t in self.tracks:
31 |             if t.id == id:
32 |                 t.add_node(node)
33 |                 node_added = True
34 |                 track_index = self.tracks.index(t)
35 |                 node_index = t.nodes.index(node)
36 |                 break
37 |         if not node_added:
38 |             t = Track(id)
39 |             t.add_node(node)
40 |             self.tracks.append(t)
41 |             track_index = self.tracks.index(t)
42 |             node_index = t.nodes.index(node)
43 | 
44 |         return track_index, node_index
45 | 
46 |     def get_track_by_index(self, index):
47 |         return self.tracks[index]
48 | 


--------------------------------------------------------------------------------
/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoPang1996/TubeTK/bcca334c5348f9ae33e04595e1af93cf8351e50e/dataset/__init__.py


--------------------------------------------------------------------------------
/dataset/dataLoader.py:
--------------------------------------------------------------------------------
 1 | from dataset.mot17 import MOT17TrainDataset, MOT17TestDataset
 2 | from dataset.jta import JTATrainDataset
 3 | from dataset.mot17jta import MOT17JTATrainDataset
 4 | import torch
 5 | try:
 6 |     import moxing.pytorch as mox
 7 | except:
 8 |     pass
 9 | import os
10 | 
11 | 
12 | class Data_Loader_MOT():
13 |     def __init__(self,
14 |                  batch_size,
15 |                  num_workers,
16 |                  input_path,
17 |                  model_arg,
18 |                  train_epoch,
19 |                  test_epoch,
20 |                  dataset,
21 |                  test_type='test',
22 |                  test_seq=None):
23 | 
24 |         self.BATCH_SIZE = batch_size
25 |         self.num_workers = num_workers
26 | 
27 |         def my_collate(batch):
28 |             imgs = torch.stack([torch.tensor(item[0]) for item in batch], 0)
29 |             img_meta = [item[1] for item in batch]
30 |             tubes = [item[2] for item in batch]
31 |             labels = [item[3] for item in batch]
32 |             start_frame = [item[4] for item in batch]
33 |             return imgs, img_meta, tubes, labels, start_frame
34 | 
35 |         if dataset == 'MOT17':
36 |             print('MOT17 data')
37 |             self.training_set = MOT17TrainDataset(mot_root=input_path, epoch=train_epoch, arg=model_arg)
38 |             self.validation_set = MOT17TestDataset(mot_root=input_path, type=test_type, test_seq=test_seq,
39 |                                                    epoch=test_epoch, arg=model_arg)
40 |         elif dataset == 'JTA':
41 |             print('JTA data')
42 |             self.training_set = JTATrainDataset(jta_root=input_path, epoch=train_epoch, arg=model_arg)
43 |             self.validation_set = None
44 |         elif dataset == 'MOT17JTA':
45 |             print('MOT17JTA data')
46 |             self.training_set = MOT17JTATrainDataset(mot17_root=input_path[0], mot15_root=input_path[1],
47 |                                                      jta_root=input_path[2], epoch=train_epoch, arg=model_arg)
48 |             self.validation_set = None
49 |         else:
50 |             raise NotImplementedError
51 | 
52 |         # train loader
53 |         if int(os.environ["RANK"]) == 0:
54 |             print('==> Training data :', len(self.training_set))
55 |         train_sampler = torch.utils.data.distributed.DistributedSampler(self.training_set)
56 |         self.train_loader = torch.utils.data.DataLoader(
57 |             dataset=self.training_set,
58 |             batch_size=self.BATCH_SIZE,
59 |             collate_fn=my_collate,
60 |             num_workers=self.num_workers,
61 |             pin_memory=True, sampler=train_sampler)
62 | 
63 |         # val loader
64 |         if self.validation_set is not None:
65 |             if int(os.environ["RANK"]) == 0:
66 |                 print('==> Validation data :', len(self.validation_set))
67 |             val_sampler = torch.utils.data.distributed.DistributedSampler(self.validation_set)
68 |             self.test_loader = torch.utils.data.DataLoader(
69 |                 dataset=self.validation_set,
70 |                 batch_size=self.BATCH_SIZE,
71 |                 collate_fn=my_collate,
72 |                 num_workers=self.num_workers,
73 |                 pin_memory=True, sampler=val_sampler)
74 | 
75 | 


--------------------------------------------------------------------------------
/dataset/jta.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data as data
 2 | from PIL import Image, ImageFile
 3 | 
 4 | from dataset.augmentation import SSJAugmentation
 5 | from dataset.Parsers.JTA import GTParser_JTA
 6 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 7 | 
 8 | 
 9 | class JTATrainDataset(data.Dataset):
10 |     '''
11 |     The class is the dataset for train, which read gt.txt file and rearrange them as the tracks set.
12 |     it can be selected from the specified frame
13 |     '''
14 |     def __init__(self,
15 |                  jta_root,
16 |                  epoch,
17 |                  arg,
18 |                  transform=SSJAugmentation,
19 |                  ):
20 |         # 1. init all the variables
21 |         self.jta_root = jta_root
22 |         self.transform = transform(size=arg.img_size, type='train')
23 |         self.epoch = epoch
24 | 
25 |         # 2. init GTParser
26 |         self.parser = GTParser_JTA(self.jta_root, 'train', forward_frames=arg.forward_frames,
27 |                                    frame_stride=arg.frame_stride, min_vis=arg.min_visibility,
28 |                                    value_range=arg.value_range)
29 | 
30 |     def __getitem__(self, item):
31 |         item = item % len(self.parser)
32 |         image, img_meta, tubes, labels, start_frame = self.parser[item]
33 | 
34 |         while image is None:
35 |             item += 100
36 |             image, img_meta, tubes, labels, start_frame = self.parser[item % len(self.parser)]
37 | 
38 |             print('None processing.')
39 | 
40 |         if self.transform is None:
41 |             return image, img_meta, tubes, labels, start_frame
42 |         else:
43 |             image, img_meta, tubes, labels, start_frame = self.transform(image, img_meta, tubes, labels, start_frame)
44 |             return image, img_meta, tubes, labels, start_frame
45 | 
46 |     def __len__(self):
47 |         return len(self.parser) * self.epoch
48 | 
49 | 


--------------------------------------------------------------------------------
/dataset/mot17.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data as data
 2 | import random
 3 | from PIL import ImageFile
 4 | from dataset.augmentation import SSJAugmentation
 5 | from dataset.Parsers.MOT17 import GTParser_MOT_17
 6 | 
 7 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 8 | 
 9 | 
10 | class MOT17TrainDataset(data.Dataset):
11 |     '''
12 |     The class is the dataset for train, which read gt.txt file and rearrange them as the tracks set.
13 |     it can be selected from the specified frame
14 |     '''
15 |     def __init__(self,
16 |                  mot_root,
17 |                  epoch,
18 |                  arg,
19 |                  transform=SSJAugmentation,
20 |                  ):
21 |         # 1. init all the variables
22 |         self.mot_root = mot_root
23 |         self.transform = transform(size=arg.img_size, type='train')
24 |         self.epoch = epoch
25 | 
26 |         # 2. init GTParser
27 |         self.parser = GTParser_MOT_17(self.mot_root, 'train', forward_frames=arg.forward_frames,
28 |                                       frame_stride=arg.frame_stride, min_vis=arg.min_visibility,
29 |                                       value_range=arg.value_range)
30 | 
31 |     def __getitem__(self, item):
32 |         item = item % len(self.parser)
33 |         image, img_meta, tubes, labels, start_frame = self.parser[item]
34 | 
35 |         while image is None:
36 |             item = item + 50
37 |             image, img_meta, tubes, labels, start_frame = self.parser[item % len(self.parser)]
38 |             print('None processing.')
39 | 
40 |         if self.transform is None:
41 |             return image, img_meta, tubes, labels, start_frame
42 |         else:
43 |             image, img_meta, tubes, labels, start_frame = self.transform(image, img_meta, tubes, labels, start_frame)
44 |             return image, img_meta, tubes, labels, start_frame
45 | 
46 |     def __len__(self):
47 |         return len(self.parser) * self.epoch
48 | 
49 | 
50 | class MOT17TestDataset(data.Dataset):
51 |     '''
52 |     The class is the dataset for train, which read gt.txt file and rearrange them as the tracks set.
53 |     it can be selected from the specified frame
54 |     '''
55 |     def __init__(self,
56 |                  mot_root,
57 |                  epoch,
58 |                  type,
59 |                  test_seq,
60 |                  arg,
61 |                  transform=SSJAugmentation,
62 |                  ):
63 |         # 1. init all the variables
64 |         self.mot_root = mot_root
65 |         self.transform = transform(size=arg.img_size, type='test')
66 |         self.epoch = epoch
67 | 
68 |         # 2. init GTParser
69 |         self.parser = GTParser_MOT_17(self.mot_root, type, test_seq=test_seq, forward_frames=arg.forward_frames,
70 |                                       frame_stride=arg.frame_stride, min_vis=arg.min_visibility,
71 |                                       value_range=arg.value_range)
72 | 
73 |     def __getitem__(self, item):
74 |         item = item % len(self.parser)
75 |         image, img_meta, tubes, labels, start_frame = self.parser[item]
76 | 
77 |         while image is None:
78 |             image, img_meta, tubes, labels, start_frame = self.parser[(item+random.randint(-10, 10)) % len(self.parser)]
79 |             print('None processing.')
80 | 
81 |         if self.transform is None:
82 |             return image, img_meta, tubes, labels, start_frame
83 |         else:
84 |             image, img_meta, tubes, labels, start_frame = self.transform(image, img_meta, tubes, labels, start_frame)
85 |             return image, img_meta, tubes, labels, start_frame
86 | 
87 |     def __len__(self):
88 |         return len(self.parser) * self.epoch
89 | 
90 | 


--------------------------------------------------------------------------------
/dataset/mot17jta.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data as data
 2 | from PIL import ImageFile
 3 | from dataset.Parsers.MOT17 import GTParser_MOT_17
 4 | from dataset.Parsers.JTA import GTParser_JTA
 5 | from dataset.augmentation import SSJAugmentation
 6 | 
 7 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 8 | 
 9 | 
10 | class MOT17JTATrainDataset(data.Dataset):
11 |     '''
12 |     The class is the dataset for train, which read gt.txt file and rearrange them as the tracks set.
13 |     it can be selected from the specified frame
14 |     '''
15 |     def __init__(self,
16 |                  mot17_root,
17 |                  mot15_root,
18 |                  jta_root,
19 |                  epoch,
20 |                  arg,
21 |                  transform=SSJAugmentation,
22 |                  ):
23 |         # 1. init all the variables
24 |         self.mot17_root = mot17_root
25 |         self.mot15_root = mot15_root
26 |         self.jta_root = jta_root
27 |         self.transform = transform(size=arg.img_size, type='train')
28 |         self.epoch = epoch
29 | 
30 |         self.parsers = {}
31 |         # 2. init GTParser
32 |         self.parser_MOT17 = GTParser_MOT_17(self.mot17_root, 'train', forward_frames=arg.forward_frames,
33 |                                             frame_stride=arg.frame_stride, min_vis=arg.min_visibility,
34 |                                             value_range=arg.value_range)
35 |         self.parsers['MOT17'] = self.parser_MOT17
36 | 
37 |         self.parser_JTA = GTParser_JTA(self.jta_root, 'train', forward_frames=arg.forward_frames,
38 |                                        frame_stride=arg.frame_stride, min_vis=0.3,
39 |                                        value_range=arg.value_range)
40 |         self.parsers['JTA'] = self.parser_JTA
41 | 
42 |     def __getitem__(self, item):
43 | 
44 |         mot17 = True if item < len(self.parser_MOT17) * self.epoch else False
45 |         if mot17:
46 |             parser = self.parsers['MOT17']
47 |             item = item % len(self.parser_MOT17)
48 | 
49 |         if not mot17:
50 |             parser = self.parsers['JTA']
51 |             item = (item - len(self.parser_MOT17) * self.epoch) % len(self.parser_JTA)
52 | 
53 |         image, img_meta, tubes, labels, start_frame = parser[item]
54 |         while image is None:
55 |             print('None processing.')
56 |             item += 100
57 |             image, img_meta, tubes, labels, start_frame = parser[item % len(parser)]
58 | 
59 |         if self.transform is None:
60 |             return image, img_meta, tubes, labels, start_frame
61 |         else:
62 |             image, img_meta, tubes, labels, start_frame = self.transform(image, img_meta, tubes, labels, start_frame)
63 |             return image, img_meta, tubes, labels, start_frame
64 | 
65 |     def __len__(self):
66 |         return len(self.parser_MOT17) * self.epoch * 2
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import os
  3 | import numpy as np
  4 | import torch
  5 | import warnings
  6 | from tqdm import tqdm
  7 | from network.tubetk import TubeTK
  8 | from apex import amp
  9 | import argparse
 10 | import multiprocessing
 11 | from configs.default import __C, cfg_from_file
 12 | from post_processing.tube_iou_matching import matching
 13 | warnings.filterwarnings('ignore')
 14 | import shutil
 15 | from Visualization.Vis_Res import vis_one_video
 16 | import cv2
 17 | import torch.utils.data as data
 18 | import random
 19 | from dataset.augmentation import SSJAugmentation
 20 | 
 21 | 
 22 | class GTSingleParser:
 23 |     def __init__(self, video,
 24 |                  forward_frames=4,
 25 |                  frame_stride=1,
 26 |                  min_vis=-0.1,
 27 |                  value_range=1):
 28 |         self.frame_stride = frame_stride
 29 |         self.value_range = value_range
 30 |         self.video_name = video
 31 |         self.min_vis = min_vis
 32 |         self.forward_frames = forward_frames
 33 | 
 34 |         self.cap = cv2.VideoCapture(video)
 35 |         fps = int(round(self.cap.get(cv2.CAP_PROP_FPS)))
 36 |         width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
 37 |         height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 38 |         frame_counter = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
 39 |         self.video_frames = np.zeros((frame_counter, height, width, 3), dtype='float16')
 40 |         cnt = 0
 41 |         if int(os.environ["RANK"]) == 0:
 42 |             print('reading video...')
 43 |             pbar = tqdm(total=frame_counter)
 44 | 
 45 |         os.makedirs(video + '_imgs', exist_ok=True)
 46 |         while self.cap.isOpened():
 47 |             _, frame = self.cap.read()
 48 |             if cnt >= frame_counter:
 49 |                 break
 50 |             if frame is not None:
 51 |                 frame_ok = frame  # .astype('float16')
 52 |             else:
 53 |                 if int(os.environ['RANK']) == 0:
 54 |                     print('cannot read frame')
 55 |             self.video_frames[cnt] = frame_ok
 56 |             cv2.imwrite(filename=os.path.join(video + '_imgs', str(cnt + 1) + '.jpg'), img=frame_ok)  #.astype('int8'))
 57 |             cnt += 1
 58 |             if int(os.environ["RANK"]) == 0:
 59 |                 pbar.update(1)
 60 |         if int(os.environ["RANK"]) == 0:
 61 |             print('finish_reading')
 62 |             pbar.close()
 63 | 
 64 |         self.max_frame_index = frame_counter - (
 65 |                     self.forward_frames * 2 - 1) * self.frame_stride
 66 | 
 67 |     def _getimage(self, frame_index):
 68 |         img = self.video_frames[frame_index]
 69 |         return img
 70 | 
 71 |     def get_item(self, frame_index):
 72 |         frame_stride = self.frame_stride
 73 | 
 74 |         start_frame = frame_index
 75 |         max_len = self.forward_frames * 2 * frame_stride
 76 | 
 77 |         # get image meta
 78 |         img_meta = {}
 79 |         image = self._getimage(frame_index)
 80 |         img_meta['img_shape'] = [max_len, image.shape[0], image.shape[1]]
 81 |         img_meta['value_range'] = self.value_range
 82 |         img_meta['pad_percent'] = [1, 1]  # prepared for padding
 83 |         img_meta['video_name'] = os.path.basename(self.video_name)
 84 |         img_meta['start_frame'] = start_frame
 85 | 
 86 |         # get image
 87 |         imgs = []
 88 |         for i in range(self.forward_frames * 2):
 89 |             frame_index = start_frame + i * frame_stride
 90 |             image = self._getimage(frame_index)  # h, w, c
 91 |             imgs.append(image)
 92 | 
 93 |         # get_tube
 94 |         tubes = np.zeros((1, 15))
 95 |         num_dets = len(tubes)
 96 |         labels = np.ones((num_dets, 1))  # only human class
 97 | 
 98 |         tubes = np.array(tubes)
 99 |         imgs = np.array(imgs)
100 | 
101 |         return imgs, img_meta, tubes, labels, start_frame
102 | 
103 |     def __len__(self):
104 |         return self.max_frame_index
105 | 
106 | 
107 | class GTParser:
108 |     def __init__(self, data_root,
109 |                  forward_frames=4,
110 |                  frame_stride=1,
111 |                  min_vis=-0.1,
112 |                  value_range=1):
113 |         # analsis all the folder in mot_root
114 |         # 1. get all the folders
115 |         all_videos = sorted([os.path.join(data_root, i) for i in os.listdir(data_root)
116 |                              if '_imgs' not in i])
117 |         # 2. create single parser
118 |         self.parsers = [GTSingleParser(video, forward_frames=forward_frames, frame_stride=frame_stride,
119 |                                        min_vis=min_vis, value_range=value_range) for video in all_videos]
120 | 
121 |         # 3. get some basic information
122 |         self.lens = [len(p) for p in self.parsers]
123 |         self.len = sum(self.lens)
124 | 
125 |     def __len__(self):
126 |         # get the length of all the matching frame
127 |         return self.len
128 | 
129 |     def __getitem__(self, item):
130 |         # 1. find the parser
131 |         total_len = 0
132 |         index = 0
133 |         current_item = item
134 |         for l in self.lens:
135 |             total_len += l
136 |             if item < total_len:
137 |                 break
138 |             else:
139 |                 index += 1
140 |                 current_item -= l
141 | 
142 |         # 2. get items
143 |         if index >= len(self.parsers):
144 |             return None, None, None, None, None
145 |         return self.parsers[index].get_item(current_item)
146 | 
147 | 
148 | class DemoDataset(data.Dataset):
149 |     '''
150 |     The class is the dataset for train, which read gt.txt file and rearrange them as the tracks set.
151 |     it can be selected from the specified frame
152 |     '''
153 |     def __init__(self,
154 |                  data_root,
155 |                  arg,
156 |                  transform=SSJAugmentation,
157 |                  ):
158 |         # 1. init all the variables
159 |         self.data_root = data_root
160 |         self.transform = transform(size=arg.img_size, type='test')
161 | 
162 |         # 2. init GTParser
163 |         self.parser = GTParser(self.data_root, forward_frames=arg.forward_frames,
164 |                                frame_stride=arg.frame_stride, min_vis=arg.min_visibility,
165 |                                value_range=arg.value_range)
166 | 
167 |     def __getitem__(self, item):
168 |         item = item % len(self.parser)
169 |         image, img_meta, tubes, labels, start_frame = self.parser[item]
170 | 
171 |         while image is None:
172 |             image, img_meta, tubes, labels, start_frame = self.parser[(item+random.randint(-10, 10)) % len(self.parser)]
173 |             print('None processing.')
174 | 
175 |         if self.transform is None:
176 |             return image, img_meta, tubes, labels, start_frame
177 |         else:
178 |             image, img_meta, tubes, labels, start_frame = self.transform(image, img_meta, tubes, labels, start_frame)
179 |             return image, img_meta, tubes, labels, start_frame
180 | 
181 |     def __len__(self):
182 |         return len(self.parser)
183 | 
184 | 
185 | class Data_Loader():
186 |     def __init__(self,
187 |                  batch_size,
188 |                  num_workers,
189 |                  input_path,
190 |                  model_arg):
191 |         self.num_workers = num_workers
192 |         self.BATCH_SIZE = batch_size
193 | 
194 |         def my_collate(batch):
195 |             imgs = torch.stack([torch.tensor(item[0]) for item in batch], 0)
196 |             img_meta = [item[1] for item in batch]
197 |             tubes = [item[2] for item in batch]
198 |             labels = [item[3] for item in batch]
199 |             start_frame = [item[4] for item in batch]
200 |             return imgs, img_meta, tubes, labels, start_frame
201 | 
202 |         self.demo_set = DemoDataset(data_root=input_path, arg=model_arg)
203 | 
204 |         if int(os.environ["RANK"]) == 0:
205 |             print('==> Validation data :', len(self.demo_set))
206 |         val_sampler = torch.utils.data.distributed.DistributedSampler(self.demo_set)
207 |         self.loader = torch.utils.data.DataLoader(
208 |             dataset=self.demo_set,
209 |             batch_size=self.BATCH_SIZE,
210 |             collate_fn=my_collate,
211 |             num_workers=self.num_workers,
212 |             pin_memory=True, sampler=val_sampler)
213 | 
214 | 
215 | def synchronize():
216 |     """
217 |     Helper function to synchronize (barrier) among all processes when
218 |     using distributed training
219 |     """
220 |     if not torch.distributed.is_available():
221 |         return
222 |     if not torch.distributed.is_initialized():
223 |         return
224 |     world_size = torch.distributed.get_world_size()
225 |     if world_size == 1:
226 |         return
227 |     torch.distributed.barrier()
228 | 
229 | 
230 | def match_video(video_name, tmp_dir, output_dir, model_arg):
231 |     tubes_path = os.path.join(tmp_dir, video_name)
232 |     tubes = []
233 |     frames = sorted([int(x) for x in os.listdir(tubes_path)])
234 |     for f in frames:
235 |         tube = pickle.load(open(os.path.join(tubes_path, str(f)), 'rb'))
236 |         tubes.append(tube)
237 | 
238 |     tubes = np.concatenate(tubes)
239 |     matching(tubes, save_path=os.path.join(output_dir, video_name + '.txt'), verbose=True, arg=model_arg)
240 | 
241 | 
242 | def evaluate(model, loader, test_arg, model_arg, output_dir='output'):
243 |     if not os.path.exists(output_dir):
244 |         os.makedirs(output_dir)
245 | 
246 |     tmp_dir = os.path.join(output_dir, 'tmp')
247 |     try:
248 |         shutil.rmtree(tmp_dir)
249 |     except:
250 |         pass
251 |     os.makedirs(tmp_dir, exist_ok=True)
252 | 
253 |     if test_arg.rank == 0:
254 |         loader = tqdm(loader, ncols=20)
255 | 
256 |     for i, data in enumerate(loader):
257 |         imgs, img_metas = data[:2]
258 |         imgs = imgs.cuda()
259 |         with torch.no_grad():
260 |             tubes, _, _ = zip(*model(imgs, img_metas, return_loss=False))
261 | 
262 |         for img, tube, img_meta in zip(imgs, tubes, img_metas):
263 |             tube[:, [0, 5, 10]] += img_meta['start_frame']
264 | 
265 |             os.makedirs(os.path.join(tmp_dir, img_meta['video_name']), exist_ok=True)
266 | 
267 |             tube = tube.cpu().data.numpy()
268 |             pickle.dump(tube, open(os.path.join(tmp_dir, img_meta['video_name'], str(img_meta['start_frame'])), 'wb'))
269 | 
270 |     synchronize()
271 |     if test_arg.rank == 0:
272 |         print('Finish prediction, Start matching')
273 |         video_names = os.listdir(tmp_dir)
274 |         pool = multiprocessing.Pool(processes=20)
275 |         pool_list = []
276 |         for vid in video_names:
277 |             pool_list.append(pool.apply_async(match_video, (vid, tmp_dir, os.path.join(output_dir, 'res'), model_arg,)))
278 |         for p in tqdm(pool_list, ncols=20):
279 |             p.get()
280 |         pool.close()
281 |         pool.join()
282 |         shutil.rmtree(tmp_dir)
283 | 
284 |         print('Finish matching, Start writing to video')
285 |         for vid in os.listdir(os.path.join(output_dir, 'res')):
286 |             cap = cv2.VideoCapture(os.path.join(test_arg.video_url, vid[0: -4]))
287 |             frame_rate = int(round(cap.get(cv2.CAP_PROP_FPS)))
288 |             img_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
289 |             img_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
290 |             res_file = os.path.join(output_dir, 'res', vid)
291 |             img_dir = os.path.join(test_arg.video_url, vid[0: -4] + '_imgs')
292 |             output_name = os.path.join(test_arg.output_dir, vid + '.avi')
293 |             vis_one_video(res_file, frame_rate, img_width, img_height, img_dir, output_name)
294 |             try:
295 |                 shutil.rmtree(img_dir)
296 |             except:
297 |                 pass
298 | 
299 | 
300 | def main(test_arg, model_arg):
301 |     torch.distributed.init_process_group(backend="nccl", init_method='env://')
302 | 
303 |     local_rank = int(os.environ["LOCAL_RANK"])
304 |     print('Rank: ' + str(test_arg.rank) + " Start!")
305 |     torch.cuda.set_device(local_rank)
306 |     if local_rank == 0:
307 |         print("Building TubeTK Model")
308 | 
309 |     model = TubeTK(num_classes=1, arg=model_arg, pretrained=False)
310 | 
311 |     data_loader = Data_Loader(
312 |         batch_size=test_arg.batch_size,
313 |         num_workers=8,
314 |         input_path=test_arg.video_url,
315 |         model_arg=model_arg,
316 |     )
317 | 
318 |     model = model.cuda(local_rank)
319 | 
320 |     model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
321 |     if test_arg.apex:
322 |         model = amp.initialize(model, opt_level='O1')
323 | 
324 |     model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[local_rank],
325 |                                                       output_device=local_rank,
326 |                                                       find_unused_parameters=True)
327 | 
328 |     if test_arg.local_rank == 0:
329 |         print("Loading Model")
330 |     checkpoint = torch.load(test_arg.model_path + '/' + test_arg.model_name, map_location=
331 |                             {'cuda:0': 'cuda:' + str(test_arg.local_rank),
332 |                              'cuda:1': 'cuda:' + str(test_arg.local_rank),
333 |                              'cuda:2': 'cuda:' + str(test_arg.local_rank),
334 |                              'cuda:3': 'cuda:' + str(test_arg.local_rank),
335 |                              'cuda:4': 'cuda:' + str(test_arg.local_rank),
336 |                              'cuda:5': 'cuda:' + str(test_arg.local_rank),
337 |                              'cuda:6': 'cuda:' + str(test_arg.local_rank),
338 |                              'cuda:7': 'cuda:' + str(test_arg.local_rank)})
339 |     model.load_state_dict(checkpoint['state'], strict=False)
340 |     if test_arg.local_rank == 0:
341 |         print("Finish Loading")
342 |     del checkpoint
343 | 
344 |     model.eval()
345 |     loader = data_loader.loader
346 | 
347 |     evaluate(model, loader, test_arg, model_arg, output_dir=test_arg.output_dir)
348 | 
349 | 
350 | if __name__ == '__main__':
351 |     parser = argparse.ArgumentParser()
352 |     parser.add_argument('--batch_size', default=3, type=int)
353 |     parser.add_argument('--model_path', default='./models', type=str, help='model path')
354 |     parser.add_argument('--model_name', default='TubeTK', type=str, help='model name')
355 |     parser.add_argument('--video_url', type=str, default='./data', help='video path')
356 |     parser.add_argument('--output_dir', default='./vis_video', type=str, help='output path')
357 |     parser.add_argument('--apex', action='store_true', help='whether use apex')
358 |     parser.add_argument('--config', default='./configs/TubeTK_resnet_50_FPN_8frame_1stride.yaml', type=str, help='config file')
359 | 
360 |     parser.add_argument('--local_rank', type=int, help='gpus')
361 | 
362 |     test_arg, unparsed = parser.parse_known_args()
363 | 
364 |     model_arg = __C
365 |     if test_arg.config is not None:
366 |         cfg_from_file(test_arg.config)
367 | 
368 |     test_arg.rank = int(os.environ["RANK"])
369 | 
370 |     main(test_arg, model_arg)
371 | 
372 | 


--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import os
  3 | import numpy as np
  4 | import torch
  5 | import warnings
  6 | from tqdm import tqdm
  7 | from Metrics import evaluateTracking
  8 | from dataset.dataLoader import Data_Loader_MOT
  9 | from network.tubetk import TubeTK
 10 | from post_processing.tube_nms import multiclass_nms
 11 | from apex import amp
 12 | import argparse
 13 | import multiprocessing
 14 | from configs.default import __C, cfg_from_file
 15 | from post_processing.tube_iou_matching import matching
 16 | warnings.filterwarnings('ignore')
 17 | import shutil
 18 | 
 19 | 
 20 | def synchronize():
 21 |     """
 22 |     Helper function to synchronize (barrier) among all processes when
 23 |     using distributed training
 24 |     """
 25 |     if not torch.distributed.is_available():
 26 |         return
 27 |     if not torch.distributed.is_initialized():
 28 |         return
 29 |     world_size = torch.distributed.get_world_size()
 30 |     if world_size == 1:
 31 |         return
 32 |     torch.distributed.barrier()
 33 | 
 34 | 
 35 | def match_video(video_name, tmp_dir, output_dir, model_arg):
 36 |     tubes_path = os.path.join(tmp_dir, video_name)
 37 |     tubes = []
 38 |     frames = sorted([int(x) for x in os.listdir(tubes_path)])
 39 |     for f in frames:
 40 |         tube = pickle.load(open(os.path.join(tubes_path, str(f)), 'rb'))
 41 |         tubes.append(tube)
 42 | 
 43 |     tubes = np.concatenate(tubes)
 44 |     matching(tubes, save_path=os.path.join(output_dir, video_name + '.txt'), verbose=True, arg=model_arg)
 45 | 
 46 | 
 47 | def evaluate(model, loader, test_arg, model_arg, output_dir='output'):
 48 |     if not os.path.exists(output_dir):
 49 |         os.makedirs(output_dir)
 50 | 
 51 |     tmp_dir = os.path.join(output_dir, 'tmp')
 52 |     try:
 53 |         shutil.rmtree(tmp_dir)
 54 |     except:
 55 |         pass
 56 |     os.makedirs(tmp_dir, exist_ok=True)
 57 | 
 58 |     if test_arg.rank == 0:
 59 |         loader = tqdm(loader, ncols=20)
 60 | 
 61 |     for i, data in enumerate(loader):
 62 |         imgs, img_metas = data[:2]
 63 |         imgs = imgs.cuda()
 64 |         with torch.no_grad():
 65 |             tubes, _, _ = zip(*model(imgs, img_metas, return_loss=False))
 66 | 
 67 |         for img, tube, img_meta in zip(imgs, tubes, img_metas):
 68 |             # ===========================================VIS OUTPUT====================================================
 69 |             # if img is not None:
 70 |             #     vis_output(img.cpu(), img_meta, bbox.cpu(), stride=model_arg.frame_stride, out_folder='/home/pb/results/')
 71 |             # =========================================================================================================
 72 |             tube[:, [0, 5, 10]] += img_meta['start_frame']
 73 | 
 74 |             os.makedirs(os.path.join(tmp_dir, img_meta['video_name']), exist_ok=True)
 75 | 
 76 |             tube = tube.cpu().data.numpy()
 77 |             pickle.dump(tube, open(os.path.join(tmp_dir, img_meta['video_name'], str(img_meta['start_frame'])), 'wb'))
 78 | 
 79 |     synchronize()
 80 |     if test_arg.rank == 0:
 81 |         print('Finish prediction, Start matching')
 82 |         video_names = os.listdir(tmp_dir)
 83 |         pool = multiprocessing.Pool(processes=20)
 84 |         pool_list = []
 85 |         for vid in video_names:
 86 |             pool_list.append(pool.apply_async(match_video, (vid, tmp_dir, os.path.join(output_dir, 'res'), model_arg,)))
 87 |         for p in tqdm(pool_list, ncols=20):
 88 |             p.get()
 89 |         pool.close()
 90 |         pool.join()
 91 |         shutil.rmtree(tmp_dir)
 92 | 
 93 |         if test_arg.trainOrTest == 'train' and test_arg.dataset == 'MOT17':
 94 |             print("FINISH MATCHING, START EVALUATE")
 95 |             seq_map = 'MOT17_train.txt'
 96 |             evaluateTracking(seq_map, os.path.join(output_dir, 'res'),
 97 |                              os.path.join(test_arg.data_url, 'train'), 'MOT17')
 98 |         # elif test_arg.trainOrTest == 'train' and test_arg.dataset == 'MOT15':
 99 |         #     print("FINISH MATCHING, START EVALUATE")
100 |         #     seq_map = 'MOT15_train.txt'
101 |         #     evaluateTracking(seq_map, os.path.join(output_dir, 'res'),
102 |         #                      os.path.join(test_arg.data_url[3], 'train'), 'MOT15')
103 | 
104 | 
105 | def main(test_arg, model_arg):
106 |     torch.distributed.init_process_group(backend="nccl", init_method='env://')
107 | 
108 |     local_rank = int(os.environ["LOCAL_RANK"])
109 |     print('Rank: ' + str(test_arg.rank) + " Start!")
110 |     torch.cuda.set_device(local_rank)
111 |     if local_rank == 0:
112 |         print("Building TubeTK Model")
113 | 
114 |     model = TubeTK(num_classes=1, arg=model_arg, pretrained=False)
115 | 
116 |     data_loader = Data_Loader_MOT(
117 |         batch_size=test_arg.batch_size,
118 |         num_workers=8,
119 |         input_path=test_arg.data_url,
120 |         train_epoch=1,
121 |         test_epoch=1,
122 |         model_arg=model_arg,
123 |         dataset=test_arg.dataset,
124 |         test_seq=None,
125 |         test_type=test_arg.trainOrTest,
126 |     )
127 | 
128 |     model = model.cuda(local_rank)
129 | 
130 |     model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
131 |     if test_arg.apex:
132 |         model = amp.initialize(model, opt_level='O1')
133 | 
134 |     model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[local_rank],
135 |                                                       output_device=local_rank,
136 |                                                       find_unused_parameters=True)
137 | 
138 |     if test_arg.local_rank == 0:
139 |         print("Loading Model")
140 |     checkpoint = torch.load(test_arg.model_path + '/' + test_arg.model_name, map_location=
141 |                             {'cuda:0': 'cuda:' + str(test_arg.local_rank),
142 |                              'cuda:1': 'cuda:' + str(test_arg.local_rank),
143 |                              'cuda:2': 'cuda:' + str(test_arg.local_rank),
144 |                              'cuda:3': 'cuda:' + str(test_arg.local_rank),
145 |                              'cuda:4': 'cuda:' + str(test_arg.local_rank),
146 |                              'cuda:5': 'cuda:' + str(test_arg.local_rank),
147 |                              'cuda:6': 'cuda:' + str(test_arg.local_rank),
148 |                              'cuda:7': 'cuda:' + str(test_arg.local_rank)})
149 |     model.load_state_dict(checkpoint['state'], strict=False)
150 |     if test_arg.local_rank == 0:
151 |         print("Finish Loading")
152 |     del checkpoint
153 | 
154 |     model.eval()
155 |     loader = data_loader.test_loader
156 | 
157 |     evaluate(model, loader, test_arg, model_arg, output_dir=test_arg.output_dir)
158 | 
159 | 
160 | if __name__ == '__main__':
161 |     parser = argparse.ArgumentParser()
162 |     parser.add_argument('--batch_size', default=1, type=int)
163 |     parser.add_argument('--model_path', default='./models', type=str, help='model path')
164 |     parser.add_argument('--model_name', default='TubeTK', type=str, help='model name')
165 |     parser.add_argument('--data_url', default='./data/', type=str, help='model path')
166 |     parser.add_argument('--output_dir', default='./link_res', type=str, help='output path')
167 |     parser.add_argument('--apex', action='store_true', help='whether use apex')
168 |     parser.add_argument('--config', default='./configs/TubeTK_resnet_50_FPN_8frame_1stride.yaml', type=str, help='config file')
169 |     parser.add_argument('--dataset', default='MOT17', type=str, help='test which dataset: MOT17, MOT15')
170 |     parser.add_argument('--trainOrTest', default='test', type=str, help='evaluate train or test set')
171 | 
172 |     parser.add_argument('--local_rank', type=int, help='gpus')
173 | 
174 |     test_arg, unparsed = parser.parse_known_args()
175 | 
176 |     model_arg = __C
177 |     if test_arg.config is not None:
178 |         cfg_from_file(test_arg.config)
179 | 
180 |     test_arg.rank = int(os.environ["RANK"])
181 | 
182 |     main(test_arg, model_arg)
183 | 
184 | 


--------------------------------------------------------------------------------
/fetch_models.sh:
--------------------------------------------------------------------------------
 1 | DIR='./models'
 2 | URL='https://drive.google.com/uc?id=1jLgyNmiZ_c-m8Cw3NcZTEPTf6VESfIzK&export=download'
 3 | 
 4 | mkdir -p $DIR
 5 | 
 6 | echo "Downloading pre-trained TubeTK..."
 7 | FILE="$(curl -sc /tmp/gcokie "${URL}" | grep -o '="uc-name.*</span>' | sed 's/.*">//;s/<.a> .*//')"
 8 | curl -Lb /tmp/gcokie "${URL}&confirm=$(awk '/_warning_/ {print $NF}' /tmp/gcokie)" -o "$DIR/${FILE}"
 9 | 
10 | echo "Download success."
11 | 


--------------------------------------------------------------------------------
/install/compile_local.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | PYTHON=${PYTHON:-"python"}
 3 | echo "Building nms op..."
 4 | cd ../post_processing/nms
 5 | if [ -d "build" ]; then
 6 |     rm -r build
 7 | fi
 8 | $PYTHON setup.py build_ext --inplace
 9 | 
10 | 


--------------------------------------------------------------------------------
/launch.py:
--------------------------------------------------------------------------------
 1 | import os, sys, stat, subprocess
 2 | from argparse import ArgumentParser
 3 | 
 4 | os.environ['NCCL_LL_THRESHOLD'] = '0'
 5 | 
 6 | 
 7 | def parse_args():
 8 |     parser = ArgumentParser(description="PyTorch distributed training launch "
 9 |                                         "helper utilty that will spawn up "
10 |                                         "multiple distributed processes")
11 | 
12 |     # Optional arguments for the launch helper
13 |     parser.add_argument("--world_size", type=int, default=1,
14 |                         help="The number of nodes to use for distributed "
15 |                              "training")
16 |     parser.add_argument("--rank", type=int, default=0,
17 |                         help="The rank of the node for multi-node distributed "
18 |                              "training")
19 |     parser.add_argument("--nproc_per_node", type=int, default=1,
20 |                         help="The number of processes to launch on each node, "
21 |                              "for GPU training, this is recommended to be set "
22 |                              "to the number of GPUs in your system so that "
23 |                              "each process can be bound to a single GPU.")
24 |     parser.add_argument("--init_method", default="tcp://127.0.0.1:29000", type=str,
25 |                         help="Init method of distributed system.")
26 |     parser.add_argument("--use_env", default=False, action="store_true",
27 |                         help="Use environment variable to pass "
28 |                              "'local rank'. For legacy reasons, the default value is False. "
29 |                              "If set to True, the script will not pass "
30 |                              "--local_rank as argument, and will instead set LOCAL_RANK.")
31 | 
32 |     # positional
33 |     parser.add_argument("--training_script", type=str,
34 |                         help="The full path to the single GPU training "
35 |                              "program/script to be launched in parallel, "
36 |                              "followed by all the arguments for the "
37 |                              "training script")
38 | 
39 |     return parser.parse_known_args()
40 | 
41 | 
42 | def main():
43 |     args, script_args = parse_args()
44 | 
45 |     # world size in terms of number of processes
46 |     dist_world_size = args.nproc_per_node * args.world_size
47 | 
48 |     # set PyTorch distributed related environmental variables
49 |     current_env = os.environ.copy()
50 |     assert args.init_method.startswith("tcp://"), "init_method should start with \"tcp://\"."
51 |     master_addr, master_port = args.init_method[6:].split(":")
52 |     current_env["MASTER_ADDR"] = master_addr
53 |     current_env["MASTER_PORT"] = str(master_port)
54 |     current_env["WORLD_SIZE"] = str(dist_world_size)
55 | 
56 |     processes = []
57 | 
58 |     for local_rank in range(0, args.nproc_per_node):
59 |         # each process's rank
60 |         dist_rank = args.nproc_per_node * args.rank + local_rank
61 |         current_env["RANK"] = str(dist_rank)
62 |         current_env["LOCAL_RANK"] = str(local_rank)
63 | 
64 |         # For some store true args.
65 |         new_script_args = []
66 |         for script_arg in script_args:
67 |             if script_arg.endswith("=True"):
68 |                 new_script_args.append(script_arg[:-5])
69 |             elif script_arg.endswith("=False"):
70 |                 pass
71 |             else:
72 |                 new_script_args.append(script_arg)
73 |         script_args = new_script_args
74 | 
75 |         # spawn the processes
76 |         if args.use_env:
77 |             cmd = [sys.executable, "-u",
78 |                    args.training_script] + script_args
79 |         else:
80 |             cmd = [sys.executable,
81 |                    "-u",
82 |                    args.training_script,
83 |                    "--local_rank={}".format(local_rank)] + script_args
84 |         process = subprocess.Popen(cmd, env=current_env)
85 |         processes.append(process)
86 | 
87 |     for process in processes:
88 |         process.wait()
89 |         if process.returncode != 0:
90 |             raise subprocess.CalledProcessError(returncode=process.returncode,
91 |                                                 cmd=process.args)
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     main()
96 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import torch
  4 | from tensorboardX import SummaryWriter
  5 | from network.tubetk import TubeTK
  6 | from dataset.dataLoader import Data_Loader_MOT
  7 | from optim.solver import make_optimizer as makeOpt
  8 | from configs.default import __C, cfg_from_file
  9 | from utils.util import AverageMeter
 10 | from tqdm import tqdm
 11 | from optim.lr_scheduler import WarmupMultiStepLR
 12 | import warnings
 13 | import numpy as np
 14 | try:
 15 |     from apex import amp
 16 |     import apex
 17 | except:
 18 |     pass
 19 | warnings.filterwarnings('ignore')
 20 | 
 21 | 
 22 | def fix_bn(m):
 23 |     classname = m.__class__.__name__
 24 |     if classname.find('BatchNorm') != -1:
 25 |         m.half()
 26 | 
 27 | 
 28 | def synchronize():
 29 |     """
 30 |     Helper function to synchronize (barrier) among all processes when
 31 |     using distributed training
 32 |     """
 33 |     if not torch.distributed.is_available():
 34 |         return
 35 |     if not torch.distributed.is_initialized():
 36 |         return
 37 |     world_size = torch.distributed.get_world_size()
 38 |     if world_size == 1:
 39 |         return
 40 |     torch.distributed.barrier()
 41 | 
 42 | 
 43 | def print_dict(string, rank):
 44 |     if rank == 0:
 45 |         print(string)
 46 | 
 47 | 
 48 | def run_one_iter(model, optimizer, data, scheduler, test):
 49 |     imgs, img_metas, gt_tubes, gt_labels, start_frame = data
 50 | 
 51 |     # =================================Visualization================================================
 52 |     # vis_input(imgs, img_metas, gt_bboxes, gt_labels, start_frame, stride=model_arg.frame_stride, out_folder='/home/pb/results/')
 53 |     # ==============================================================================================
 54 | 
 55 |     # Get Input
 56 |     imgs = imgs.cuda()
 57 |     for i in range(len(gt_tubes)):
 58 |         gt_tubes[i] = gt_tubes[i].cuda()
 59 |         gt_labels[i] = gt_labels[i].cuda()
 60 | 
 61 |     if not test:
 62 |         scheduler.step()
 63 | 
 64 |     # Forward
 65 |     if not test:
 66 |         losses = model(imgs, img_metas, return_loss=True, gt_tubes=gt_tubes, gt_labels=gt_labels)
 67 |         res = losses
 68 |     else:
 69 |         with torch.no_grad():
 70 |             bbox_list = model(imgs, img_metas, return_loss=False, gt_tubes=gt_tubes, gt_labels=gt_labels)
 71 |         bbox_list[:, :, 0] += start_frame
 72 |         res = bbox_list
 73 | 
 74 |     # Backward
 75 |     if not test:
 76 |         if losses:
 77 |             optimizer.zero_grad()
 78 |             loss = torch.zeros(1).cuda()
 79 |             for l in losses:
 80 |                 if 'loss_cls' in l:
 81 |                     loss += 1e3 * losses[l]
 82 |                 else:
 83 |                     loss += losses[l]
 84 |             if not train_arg.apex:
 85 |                 loss.backward()
 86 |             else:
 87 |                 with amp.scale_loss(loss, optimizer) as scaled_loss:
 88 |                     scaled_loss.backward()
 89 |             optimizer.step()
 90 | 
 91 |     return res
 92 | 
 93 | 
 94 | def train(model, optimizer, data_loader, scheduler, writer, max_acc=0, step_start=0):
 95 |     loss_cls_accumulate = AverageMeter()
 96 |     loss_reg_accumulate = AverageMeter()
 97 |     loss_center_accumulate = AverageMeter()
 98 |     max_acc = max_acc
 99 | 
100 |     loader = data_loader.train_loader
101 |     model.train()
102 |     if train_arg.apex:
103 |         model.apply(fix_bn)
104 |     if train_arg.local_rank == 0:
105 |         loader = tqdm(loader, ncols=20)
106 | 
107 |     loader_len = len(loader)
108 |     for step, data in enumerate(loader):
109 |         # Input
110 |         if step > loader_len - step_start:
111 |             break
112 |         step += step_start
113 |         losses = run_one_iter(model, optimizer, data, scheduler, False)
114 | 
115 |         # Loss and results
116 |         if losses:
117 |             if not np.isnan(losses['loss_cls'].data.cpu().numpy()):
118 |                 loss_cls_accumulate.update(val=losses['loss_cls'].data.cpu().numpy())
119 |             if not np.isnan(losses['loss_reg'].data.cpu().numpy()):
120 |                 loss_reg_accumulate.update(val=losses['loss_reg'].data.cpu().numpy())
121 |             if not np.isnan(losses['loss_centerness'].data.cpu().numpy()):
122 |                 loss_center_accumulate.update(val=losses['loss_centerness'].data.cpu().numpy())
123 | 
124 |         if train_arg.rank == 0:
125 |             writer.add_scalar('train/loss_cls', loss_cls_accumulate.avg, step)
126 |             writer.add_scalar('train/loss_reg', loss_reg_accumulate.avg, step)
127 |             writer.add_scalar('train/loss_center', loss_center_accumulate.avg, step)
128 |             writer.add_scalar('train/lr', optimizer.param_groups[0]["lr"], step)
129 | 
130 |         if step % 1000 == 999:
131 |             if train_arg.rank == 0:
132 |                 print('save model')
133 |                 torch.save({'state': model.state_dict(),
134 |                             'max_acc': max_acc,
135 |                             'step': step,
136 |                             'opt': optimizer.state_dict(),
137 |                             'sched': scheduler.state_dict()},
138 |                            train_arg.model_path + '/' + train_arg.model_name)
139 | 
140 |         if step % train_arg.reset_iter == train_arg.reset_iter - 1:
141 |             loss_cls_accumulate.reset()
142 |             loss_reg_accumulate.reset()
143 |             loss_center_accumulate.reset()
144 | 
145 |         if train_arg.local_rank == 0:
146 |             loader.set_description('Loss_cls: ' + str(loss_cls_accumulate.avg)[0:6] +
147 |                                    ',\tLoss_reg: ' + str(loss_reg_accumulate.avg)[0:6] +
148 |                                    ',\tLoss_center: ' + str(loss_center_accumulate.avg)[0:6], refresh=False)
149 | 
150 | 
151 | def main(train_arg, model_arg):
152 |     torch.distributed.init_process_group(backend="nccl", init_method='env://')
153 |     local_rank = int(os.environ["LOCAL_RANK"])
154 |     print('Rank: ' + str(train_arg.rank) + " Start!")
155 |     torch.cuda.set_device(local_rank)
156 | 
157 |     print_dict("Building TubeTK Model", train_arg.local_rank)
158 |     model = TubeTK(num_classes=1, arg=model_arg, pretrained=True)
159 | 
160 |     data_loader = Data_Loader_MOT(
161 |         batch_size=train_arg.batch_size,
162 |         num_workers=8,
163 |         input_path=train_arg.data_url,
164 |         train_epoch=train_arg.epochs,
165 |         model_arg=model_arg,
166 |         dataset=train_arg.dataset,
167 |         test_epoch=1
168 |     )
169 |     # =================================Visualization================================================
170 |     # loader = data_loader.train_loader
171 |     # for step, data in enumerate(loader):
172 |     #     imgs, img_metas, gt_bboxes, gt_labels, start_frame = data
173 |     #
174 |     #     vis_input(imgs, img_metas, gt_bboxes, gt_labels, start_frame, stride=model_arg.frame_stride,
175 |     #               out_folder='/home/pb/results/')
176 |     # ==============================================================================================
177 | 
178 |     model = model.cuda(local_rank)
179 |     optimizer = makeOpt(train_arg, model)
180 | 
181 |     model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
182 |     if train_arg.apex:
183 |         model, optimizer = amp.initialize(model, optimizer,
184 |                                           opt_level='O1',
185 |                                           # loss_scale='dynamic',
186 |                                           # keep_batchnorm_fp32=False
187 |                                           )
188 | 
189 |     model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[local_rank],
190 |                                                       output_device=local_rank,
191 |                                                       find_unused_parameters=True)
192 | 
193 |     sched = WarmupMultiStepLR(
194 |         optimizer,
195 |         milestones=train_arg.mileStone,
196 |         warmup_factor=0.1,
197 |         warmup_iters=0,
198 |         warmup_method='linear')
199 | 
200 |     max_acc = 0
201 |     step = 0
202 | 
203 |     if train_arg.resume:
204 |         print_dict("Loading Model", train_arg.local_rank)
205 |         checkpoint = torch.load(train_arg.model_path + '/' + train_arg.model_name, map_location=
206 |                                 {'cuda:0': 'cuda:' + str(train_arg.local_rank),
207 |                                  'cuda:1': 'cuda:' + str(train_arg.local_rank),
208 |                                  'cuda:2': 'cuda:' + str(train_arg.local_rank),
209 |                                  'cuda:3': 'cuda:' + str(train_arg.local_rank),
210 |                                  'cuda:4': 'cuda:' + str(train_arg.local_rank),
211 |                                  'cuda:5': 'cuda:' + str(train_arg.local_rank),
212 |                                  'cuda:6': 'cuda:' + str(train_arg.local_rank),
213 |                                  'cuda:7': 'cuda:' + str(train_arg.local_rank)})
214 |         model.load_state_dict(checkpoint['state'], strict=False)
215 |         optimizer.load_state_dict(checkpoint['opt'])
216 |         sched.load_state_dict(checkpoint['sched'])
217 |         sched.milestones = train_arg.mileStone
218 |         step = checkpoint['step'] + 1
219 |         sched.last_epoch = step
220 |         max_acc = checkpoint['max_acc']
221 |         print_dict("Finish Loading", train_arg.local_rank)
222 |         del checkpoint
223 | 
224 |     if train_arg.rank == 0:
225 |         tensorboard_writer = SummaryWriter(train_arg.logName, purge_step=step)
226 |     else:
227 |         tensorboard_writer = None
228 | 
229 |     print_dict("Training", train_arg.local_rank)
230 |     train(model, optimizer, data_loader, sched, tensorboard_writer, max_acc=max_acc, step_start=step)
231 | 
232 | 
233 | if __name__ == '__main__':
234 |     parser = argparse.ArgumentParser(description='PyTorch Sub-JHMDB rgb frame training')
235 |     parser.add_argument('--epochs', default=120, type=int, metavar='N', help='number of total epochs')
236 |     parser.add_argument('--batch_size', default=1, type=int, metavar='N', help='mini-batch size (default: 64)')
237 |     parser.add_argument('--lr', default=0.001, type=float, metavar='LR', help='initial learning rate')
238 |     parser.add_argument('--weight_decay', default=1e-5, type=float, help='weight decay')
239 |     parser.add_argument('--mileStone', nargs='+', type=int, default=[7500, 15000], help='mileStone for lr Sched')
240 |     parser.add_argument('--reset_iter', default=200, type=list, help='test iter')
241 |     parser.add_argument('--model_path', default='./models', type=str, help='model path')
242 |     parser.add_argument('--model_name', default='TubeTK', type=str, help='model name')
243 |     parser.add_argument('--data_url', default='./data/', type=str, help='data path')
244 |     parser.add_argument('--dataset', default='MOT17', type=str, help='MOT17, JTA, MOTJTA')
245 | 
246 |     parser.add_argument('--config', default=None, type=str, help='config file')
247 | 
248 |     parser.add_argument('--logName', type=str,
249 |                         default='./logs/TubeTK_log', help='log dir name')
250 | 
251 |     parser.add_argument('--local_rank', type=int, help='gpus')
252 | 
253 |     parser.add_argument('--resume', action='store_true', help='whether resume')
254 | 
255 |     parser.add_argument('--apex', action='store_true', help='whether use apex')
256 | 
257 |     train_arg, unparsed = parser.parse_known_args()
258 | 
259 |     model_arg = __C
260 |     if train_arg.config is not None:
261 |         cfg_from_file(train_arg.config)
262 | 
263 |     train_arg.rank = int(os.environ["RANK"])
264 |     if train_arg.rank == 0:
265 |         try:
266 |             os.makedirs(train_arg.model_path)
267 |         except:
268 |             pass
269 | 
270 |     main(train_arg, model_arg)
271 | 


--------------------------------------------------------------------------------
/network/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoPang1996/TubeTK/bcca334c5348f9ae33e04595e1af93cf8351e50e/network/__init__.py


--------------------------------------------------------------------------------
/network/focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def one_hot_embedding(labels, num_classes):
 7 |     '''
 8 |     Embedding labels to one-hot form.
 9 |     Args:
10 |       labels: (LongTensor) class labels, sized [N,].
11 |       num_classes: (int) number of classes.
12 |     Returns:
13 |       (tensor) encoded labels, sized [N,#classes].
14 |     '''
15 |     y = torch.eye(num_classes)
16 |     return y[labels]
17 | 
18 | 
19 | def focal_loss(x, y):
20 |     '''
21 |     Focal loss.
22 |     Args:
23 |       x: (tensor) sized [N,D].
24 |       y: (tensor) sized [N,].
25 |     Return:
26 |       (tensor) focal loss.
27 |     '''
28 |     alpha = 0.25
29 |     gamma = 2
30 | 
31 |     t = one_hot_embedding(y, x.shape[1] + 1)
32 | 
33 |     # exclude background
34 |     t = t[:, 1:]
35 | 
36 |     t = Variable(t).cuda()
37 |     p = x.sigmoid().float()
38 | 
39 |     # pt = p if t > 0 else 1-p
40 |     pt = p * t + (1 - p) * (1 - t)
41 | 
42 |     # w = alpha if t > 0 else 1-alpha
43 |     w = alpha * t + (1 - alpha) * (1 - t)
44 | 
45 |     w = w * (1 - pt).pow(gamma)
46 |     return F.binary_cross_entropy_with_logits(x.float(), t, w.detach(), size_average=True)
47 | 


--------------------------------------------------------------------------------
/network/fpn.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | class FPN(nn.Module):
 6 |     def __init__(self,
 7 |                  in_channels,  # [512, 1024, 2048]
 8 |                  arg,
 9 |                  ):
10 |         super(FPN, self).__init__()
11 |         assert isinstance(in_channels, list)
12 |         self.in_channels = in_channels
13 |         self.out_channels = arg.fpn_features_n
14 |         self.num_ins = len(in_channels)
15 |         self.num_outs = arg.fpn_outs_n
16 | 
17 |         self.lateral_convs = nn.ModuleList()
18 |         self.fpn_convs = nn.ModuleList()
19 | 
20 |         for i in range(self.num_ins):
21 |             l_conv = nn.Conv3d(in_channels[i], self.out_channels, kernel_size=1, stride=1)
22 |             fpn_conv = nn.Conv3d(self.out_channels, self.out_channels,
23 |                                  kernel_size=3, stride=1, padding=1)
24 | 
25 |             self.lateral_convs.append(l_conv)
26 |             self.fpn_convs.append(fpn_conv)
27 | 
28 |         # add extra conv layers (e.g., RetinaNet)
29 |         extra_levels = self.num_outs - self.num_ins
30 |         if extra_levels >= 1:
31 |             for i in range(extra_levels):
32 |                 in_channels = self.out_channels
33 |                 extra_fpn_conv = nn.Conv3d(in_channels, self.out_channels, kernel_size=3, stride=(1, 2, 2), padding=1)
34 |                 self.fpn_convs.append(extra_fpn_conv)
35 | 
36 |         # default init_weights for conv(msra) and norm in ConvModule
37 |         for m in self.modules():
38 |             if isinstance(m, nn.Conv3d):
39 |                 nn.init.xavier_uniform_(m.weight, gain=1)
40 |                 if hasattr(m, 'bias') and m.bias is not None:
41 |                     nn.init.constant_(m.bias, 0)
42 | 
43 |     def forward(self, inputs):
44 |         assert len(inputs) == len(self.in_channels)
45 | 
46 |         # build laterals
47 |         laterals = [
48 |             lateral_conv(inputs[i])
49 |             for i, lateral_conv in enumerate(self.lateral_convs)
50 |         ]
51 | 
52 |         # build top-down path
53 |         used_backbone_levels = len(laterals)
54 |         for i in range(used_backbone_levels - 1, 0, -1):
55 |             laterals[i - 1] += F.interpolate(
56 |                 laterals[i], scale_factor=2, mode='nearest')
57 | 
58 |         # build outputs
59 |         # part 1: from original levels
60 |         outs = [
61 |             self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
62 |         ]
63 |         # part 2: add extra levels
64 |         if self.num_outs > len(outs):
65 |             # add conv layers on top of original feature maps (RetinaNet)
66 |             outs.append(self.fpn_convs[used_backbone_levels](outs[-1]))
67 |             for i in range(used_backbone_levels + 1, self.num_outs):
68 |                 outs.append(self.fpn_convs[i](F.relu(outs[-1])))
69 | 
70 |         return tuple(outs)
71 | 


--------------------------------------------------------------------------------
/network/resnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import math
  4 | from torch.nn.modules.batchnorm import _BatchNorm
  5 | 
  6 | __all__ = [
  7 |     'ResNet', 'resnet50', 'resnet101', 'resnext101'
  8 | ]
  9 | 
 10 | 
 11 | class Bottleneck(nn.Module):
 12 |     expansion = 4
 13 | 
 14 |     def __init__(self, inplanes, planes, stride=1, kernel=3, downsample=None, groups=1, base_width=64):
 15 |         super(Bottleneck, self).__init__()
 16 | 
 17 |         width = int(planes * (base_width / 64.)) * groups
 18 | 
 19 |         self.conv1 = nn.Conv3d(inplanes, width, kernel_size=1, bias=False)
 20 |         self.bn1 = nn.BatchNorm3d(width)
 21 |         self.conv2 = nn.Conv3d(
 22 |             width, width, kernel_size=(kernel, 3, 3), stride=stride, padding=(kernel//2, 1, 1), groups=groups, bias=False)
 23 |         self.bn2 = nn.BatchNorm3d(width)
 24 |         self.conv3 = nn.Conv3d(width, planes * 4, kernel_size=1, bias=False)
 25 |         self.bn3 = nn.BatchNorm3d(planes * 4)
 26 |         self.relu = nn.ReLU(inplace=True)
 27 |         self.downsample = downsample
 28 |         self.stride = stride
 29 | 
 30 |     def forward(self, x):
 31 |         residual = x
 32 | 
 33 |         out = self.conv1(x)
 34 |         out = self.bn1(out)
 35 |         out = self.relu(out)
 36 | 
 37 |         out = self.conv2(out)
 38 |         out = self.bn2(out)
 39 |         out = self.relu(out)
 40 | 
 41 |         out = self.conv3(out)
 42 |         out = self.bn3(out)
 43 | 
 44 |         if self.downsample is not None:
 45 |             residual = self.downsample(x)
 46 | 
 47 |         out += residual
 48 |         out = self.relu(out)
 49 | 
 50 |         return out
 51 | 
 52 | 
 53 | class ResNet(nn.Module):
 54 | 
 55 |     def __init__(self,
 56 |                  block,
 57 |                  layers,
 58 |                  kernels,
 59 |                  groups=1,
 60 |                  width_per_group=64,
 61 |                  freeze_bn=False,
 62 |                  freeze_stages=-1,
 63 |                  fst_l_stride=2):
 64 |         self.freeze_bn = freeze_bn
 65 |         self.freeze_stages = freeze_stages
 66 |         self.groups = groups
 67 |         self.base_width = width_per_group
 68 |         self.inplanes = 64
 69 | 
 70 |         super(ResNet, self).__init__()
 71 |         self.conv1 = nn.Conv3d(
 72 |             3,
 73 |             64,
 74 |             kernel_size=(kernels[0][0], 7, 7),
 75 |             stride=(1, 2, 2),
 76 |             padding=(kernels[0][0]//2, 3, 3),
 77 |             bias=False)
 78 |         self.bn1 = nn.BatchNorm3d(64)
 79 |         self.relu = nn.ReLU(inplace=True)
 80 |         if kernels[0][0] == 7:
 81 |             self.maxpool = nn.MaxPool3d(kernel_size=3, stride=(1, 2, 2), padding=1)
 82 |         else:
 83 |             self.maxpool = nn.MaxPool3d(kernel_size=(1, 3, 4), stride=(1, 2, 2), padding=(0, 1, 1))
 84 |         self.layer1 = self._make_layer(block, 64, layers[0], kernels[1])
 85 |         self.layer2 = self._make_layer(
 86 |             block, 128, layers[1], kernels[2], stride=(1, 2, 2) if fst_l_stride < 2 else 2)
 87 |         self.layer3 = self._make_layer(
 88 |             block, 256, layers[2], kernels[3], stride=2)
 89 |         self.layer4 = self._make_layer(
 90 |             block, 512, layers[3], kernels[4], stride=2)
 91 | 
 92 |         for m in self.modules():
 93 |             if isinstance(m, nn.Conv3d):
 94 |                 m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')
 95 |             elif isinstance(m, nn.BatchNorm3d):
 96 |                 m.weight.data.fill_(1)
 97 |                 m.bias.data.zero_()
 98 | 
 99 |         self._freeze_stages()
100 |         if self.freeze_bn:
101 |             self._freeze_bn()
102 | 
103 |     def _make_layer(self, block, planes, blocks, kernel, stride=1):
104 |         downsample = None
105 |         if stride != 1 or self.inplanes != planes * block.expansion:
106 |             downsample = nn.Sequential(
107 |                 nn.Conv3d(
108 |                     self.inplanes,
109 |                     planes * block.expansion,
110 |                     kernel_size=1,
111 |                     stride=stride,
112 |                     bias=False), nn.BatchNorm3d(planes * block.expansion))
113 | 
114 |         layers = []
115 |         layers.append(block(self.inplanes, planes, stride, kernel[0], downsample, self.groups, self.base_width))
116 |         self.inplanes = planes * block.expansion
117 |         for i in range(1, blocks):
118 |             layers.append(block(self.inplanes, planes, kernel=kernel[i], groups=self.groups, base_width=self.base_width))
119 | 
120 |         return nn.Sequential(*layers)
121 | 
122 |     def forward(self, x):
123 |         x = self.conv1(x)
124 |         x = self.bn1(x)
125 |         x = self.relu(x)
126 |         x = self.maxpool(x)
127 | 
128 |         outs = []
129 |         x = self.layer1(x)
130 |         x = self.layer2(x)
131 |         outs.append(x)
132 |         x = self.layer3(x)
133 |         outs.append(x)
134 |         x = self.layer4(x)
135 |         outs.append(x)
136 | 
137 |         return tuple(outs)
138 | 
139 |     def _freeze_stages(self):
140 |         if self.freeze_stages >= 0:
141 |             print('Freeze Stage: 0')
142 |             self.bn1.eval()
143 |             for m in [self.conv1, self.bn1]:
144 |                 for param in m.parameters():
145 |                     param.requires_grad = False
146 | 
147 |         for i in range(1, self.freeze_stages + 1):
148 |             print('Freeze Stage: ' + str(i))
149 |             m = getattr(self, 'layer{}'.format(i))
150 |             m.eval()
151 |             for param in m.parameters():
152 |                 param.requires_grad = False
153 | 
154 |     def _freeze_bn(self):
155 |         print('Freeze BN')
156 |         for m in self.modules():
157 |             if isinstance(m, _BatchNorm):
158 |                 m.eval()
159 | 
160 |     def train(self, mode=True):
161 |         super(ResNet, self).train(mode)
162 |         self._freeze_stages()
163 | 
164 |         if mode and self.freeze_bn:
165 |             for m in self.modules():
166 |                 if isinstance(m, _BatchNorm):
167 |                     m.eval()
168 | 
169 | 
170 | def resnet50(**kwargs):
171 |     """Constructs a ResNet-50 model.
172 |     """
173 |     kernel = [[7], [3, 3, 3], [3, 3, 3, 3], [3, 3, 3, 3, 3, 3], [3, 3, 3]]
174 |     # kernel = [[5], [3, 3, 3], [3, 1, 3, 1], [3, 1, 3, 1, 3, 1], [3, 1, 3]]
175 |     model = ResNet(Bottleneck, [3, 4, 6, 3],
176 |                    kernels=kernel,
177 |                    groups=1,
178 |                    width_per_group=64,
179 |                    **kwargs)
180 |     return model
181 | 
182 | 
183 | def resnet101(**kwargs):
184 |     """Constructs a ResNet-101 model.
185 |     """
186 |     # kernel = [[7], [3, 3, 3], [3, 1, 3, 1], [(-1 * (i % 2) + 1)*2 + 1 for i in range(23)], [1, 3, 1]]
187 |     kernel = [[7], [3, 3, 3], [3, 3, 3, 3], [3 for _ in range(23)], [3, 3, 3]]
188 |     model = ResNet(Bottleneck, [3, 4, 23, 3],
189 |                    kernels=kernel,
190 |                    groups=1,
191 |                    width_per_group=64,
192 |                    **kwargs)
193 |     return model
194 | 
195 | 
196 | def resnext101(**kwargs):
197 |     """Constructs a ResNet-101 model.
198 |     """
199 |     # kernel = [[5], [3, 3, 3], [3, 1, 3, 1], [(-1 * (i % 2) + 1)*2 + 1 for i in range(23)], [1, 3, 1]]
200 |     kernel = [[7], [3, 3, 3], [3, 3, 3, 3], [3 for _ in range(23)], [3, 3, 3]]
201 |     model = ResNet(Bottleneck, [3, 4, 23, 3],
202 |                    kernels=kernel,
203 |                    groups=32,
204 |                    width_per_group=4,
205 |                    **kwargs)
206 |     return model
207 | 


--------------------------------------------------------------------------------
/network/tubetk.py:
--------------------------------------------------------------------------------
 1 | import time, os
 2 | import torch
 3 | import torch.nn as nn
 4 | from network.resnet import resnet101, resnet50, resnext101
 5 | from network.fpn import FPN
 6 | from network.track_head import TrackHead
 7 | 
 8 | 
 9 | class TubeTK(nn.Module):
10 | 
11 |     def __init__(self,
12 |                  num_classes,
13 |                  arg,
14 |                  pretrained=True
15 |                  ):
16 |         super(TubeTK, self).__init__()
17 |         self.arg = arg
18 |         if arg.backbone == 'res50':
19 |             self.backbone = resnet50(freeze_stages=arg.freeze_stages, fst_l_stride=arg.model_stride[0][0])
20 |         elif arg.backbone == 'res101':
21 |             self.backbone = resnet101(freeze_stages=arg.freeze_stages, fst_l_stride=arg.model_stride[0][0])
22 |         elif arg.backbone == 'resx101':
23 |             self.backbone = resnext101(freeze_stages=arg.freeze_stages, fst_l_stride=arg.model_stride[0][0])
24 |         else:
25 |             raise NotImplementedError
26 |         self.neck = FPN(in_channels=[512, 1024, 2048], arg=arg)
27 |         self.tube_head = TrackHead(arg=arg,
28 |                                    num_classes=num_classes,
29 |                                    in_channels=self.neck.out_channels,
30 |                                    strides=[[arg.model_stride[i][0]/(arg.forward_frames * 2) * arg.value_range,
31 |                                             arg.model_stride[i][1]/arg.img_size[0] * arg.value_range,
32 |                                             arg.model_stride[i][1]/arg.img_size[1] * arg.value_range] for i in range(5)]
33 |                                    )
34 | 
35 |         if pretrained and arg.pretrain_model_path != '':
36 |             self.load_pretrain(model_path=arg.pretrain_model_path)
37 |         torch.cuda.empty_cache()
38 | 
39 |     def load_pretrain(self, model_path):
40 |         if int(os.environ["RANK"]) == 0:
41 |             print('loading JTA Pretrain: ' + str(model_path))
42 | 
43 |         pre_model = torch.load(model_path, map_location={'cuda:0': 'cpu',
44 |                                                          'cuda:1': 'cpu',
45 |                                                          'cuda:2': 'cpu',
46 |                                                          'cuda:3': 'cpu',
47 |                                                          'cuda:4': 'cpu',
48 |                                                          'cuda:5': 'cpu',
49 |                                                          'cuda:6': 'cpu',
50 |                                                          'cuda:7': 'cpu'})['state']
51 |         model_dict = self.state_dict()
52 |         for key in model_dict:
53 |             if model_dict[key].shape != pre_model['module.' + key].shape:
54 |                 p_shape = model_dict[key].shape
55 |                 pre_model['module.' + key] = pre_model['module.' + key].repeat(1, 1, p_shape[2], 1, 1) / p_shape[2]
56 |             else:
57 |                 model_dict[key] = pre_model['module.' + key]
58 |         self.load_state_dict(model_dict)
59 |         del pre_model, model_dict
60 | 
61 |     def extract_feat(self, x):
62 |         x = self.backbone(x)
63 |         x = self.neck(x)
64 |         return x
65 | 
66 |     def forward_train(self,
67 |                       img,
68 |                       img_metas,
69 |                       gt_tubes,
70 |                       gt_labels):
71 |         x = self.extract_feat(img)
72 |         outs = self.tube_head(x)
73 |         loss_inputs = outs + (gt_tubes, gt_labels, img_metas)
74 |         losses = self.tube_head.loss(*loss_inputs)
75 |         return losses
76 | 
77 |     def forward_test(self, img, img_meta):
78 |         x = self.extract_feat(img)
79 |         outs = self.tube_head(x)
80 |         tube_inputs = outs + (img_meta, self.arg)
81 |         tube_list = self.tube_head.get_tubes(*tube_inputs)
82 |         return tube_list
83 | 
84 |     def forward(self, img, img_meta, return_loss=True, **kwargs):
85 |         if return_loss:
86 |             return self.forward_train(img, img_meta, **kwargs)
87 |         else:
88 |             return self.forward_test(img, img_meta)
89 | 


--------------------------------------------------------------------------------
/network/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | 
  5 | def distance2bbox(points, distance, max_shape=None):
  6 |     """Decode distance prediction to bounding box.
  7 | 
  8 |     Args:
  9 |         points (Tensor): Shape (n, 3), [t, x, y].
 10 |         distance (Tensor): Distance from the given point to 4
 11 |             boundaries (left, top, right, bottom, frDis, 4point, bkDis, 4point).
 12 |         max_shape (list): Shape of the image.
 13 | 
 14 |     Returns:
 15 |         Tensor: Decoded bboxes.
 16 |     """
 17 | 
 18 |     mid_t = points[:, 0]
 19 |     mid_x1 = points[:, 1] - distance[:, 0]
 20 |     mid_y1 = points[:, 2] - distance[:, 1]
 21 |     mid_x2 = points[:, 1] + distance[:, 2]
 22 |     mid_y2 = points[:, 2] + distance[:, 3]
 23 | 
 24 |     fr_t = points[:, 0] + distance[:, 4]
 25 |     fr_x1 = mid_x1 + distance[:, 5]
 26 |     fr_y1 = mid_y1 + distance[:, 6]
 27 |     fr_x2 = mid_x2 + distance[:, 7]
 28 |     fr_y2 = mid_y2 + distance[:, 8]
 29 | 
 30 |     bk_t = points[:, 0] - distance[:, 9]
 31 |     bk_x1 = mid_x1 + distance[:, 10]
 32 |     bk_y1 = mid_y1 + distance[:, 11]
 33 |     bk_x2 = mid_x2 + distance[:, 12]
 34 |     bk_y2 = mid_y2 + distance[:, 13]
 35 | 
 36 |     if max_shape is not None:
 37 |         mid_x1 = mid_x1.clamp(min=0, max=max_shape[2])
 38 |         mid_y1 = mid_y1.clamp(min=0, max=max_shape[1])
 39 |         mid_x2 = mid_x2.clamp(min=0, max=max_shape[2])
 40 |         mid_y2 = mid_y2.clamp(min=0, max=max_shape[1])
 41 | 
 42 |         fr_t = fr_t.clamp(min=0, max=max_shape[0])
 43 |         fr_x1 = fr_x1.clamp(min=0, max=max_shape[2])
 44 |         fr_y1 = fr_y1.clamp(min=0, max=max_shape[1])
 45 |         fr_x2 = fr_x2.clamp(min=0, max=max_shape[2])
 46 |         fr_y2 = fr_y2.clamp(min=0, max=max_shape[1])
 47 | 
 48 |         bk_t = bk_t.clamp(min=0, max=max_shape[0])
 49 |         bk_x1 = bk_x1.clamp(min=0, max=max_shape[2])
 50 |         bk_y1 = bk_y1.clamp(min=0, max=max_shape[1])
 51 |         bk_x2 = bk_x2.clamp(min=0, max=max_shape[2])
 52 |         bk_y2 = bk_y2.clamp(min=0, max=max_shape[1])
 53 | 
 54 |     return torch.stack([mid_t, mid_x1, mid_y1, mid_x2, mid_y2,
 55 |                         fr_t, fr_x1, fr_y1, fr_x2, fr_y2,
 56 |                         bk_t, bk_x1, bk_y1, bk_x2, bk_y2], -1)
 57 | 
 58 | 
 59 | def iou_loss(pred_tubes, target_tubes):
 60 |     ious = tube_iou(pred_tubes, target_tubes)
 61 |     loss = 1 - ious
 62 |     return loss
 63 | 
 64 | 
 65 | def giou_loss(pred_tubes, target_tubes):
 66 |     gious = tube_giou(pred_tubes, target_tubes)
 67 |     loss = 1 - gious
 68 |     loss = loss.clamp(min=0, max=2)
 69 |     return loss
 70 | 
 71 | 
 72 | def tube_giou(pred_tubes, target_tubes):
 73 |     mid_t_pred, mid_bboxes_pred, fr_t_pred, fr_bboxes_pred, bk_t_pred, bk_bboxes_pred = get3bboxes_from_tube(pred_tubes)
 74 |     mid_t_gt, mid_bboxes_gt, fr_t_gt, fr_bboxes_gt, bk_t_gt, bk_bboxes_gt = get3bboxes_from_tube(target_tubes)
 75 | 
 76 |     # get giou of mid_frame
 77 |     tube_vol_pred = volume(area(mid_bboxes_pred), area(fr_bboxes_pred), fr_t_pred - mid_t_pred) + \
 78 |                     volume(area(mid_bboxes_pred), area(bk_bboxes_pred), mid_t_pred - bk_t_pred)
 79 |     tube_vol_gt = volume(area(mid_bboxes_gt), area(fr_bboxes_gt), fr_t_gt - mid_t_gt) + \
 80 |                   volume(area(mid_bboxes_gt), area(bk_bboxes_gt), mid_t_gt - bk_t_gt)
 81 | 
 82 |     mid_intersect = bbox_overlaps(mid_bboxes_pred, mid_bboxes_gt)
 83 |     mid_enclose = bbox_enclose(mid_bboxes_pred, mid_bboxes_gt)
 84 | 
 85 |     iou = mid_intersect / (area(mid_bboxes_gt) + area(mid_bboxes_pred) - mid_intersect)
 86 |     giou = iou - (mid_enclose - (area(mid_bboxes_gt) + area(mid_bboxes_pred) - mid_intersect)) / mid_enclose
 87 | 
 88 |     # get intersect of front and back frame
 89 |     dis_fr_min, fr_bboxes_pred_align_min, fr_bboxes_gt_align_min = \
 90 |         align_bbox_on_frame(mid_bboxes_pred, fr_bboxes_pred, fr_t_pred - mid_t_pred,
 91 |                             mid_bboxes_gt, fr_bboxes_gt, fr_t_gt - mid_t_gt)
 92 |     fr_intersect = bbox_overlaps(fr_bboxes_pred_align_min, fr_bboxes_gt_align_min)
 93 | 
 94 |     dis_bk_min, bk_bboxes_pred_align_min, bk_bboxes_gt_align_min = \
 95 |         align_bbox_on_frame(mid_bboxes_pred, bk_bboxes_pred, mid_t_pred - bk_t_pred,
 96 |                             mid_bboxes_gt, bk_bboxes_gt, mid_t_gt - bk_t_gt)
 97 |     bk_intersect = bbox_overlaps(bk_bboxes_pred_align_min, bk_bboxes_gt_align_min)
 98 | 
 99 |     #  get enclose of front and back frame
100 |     dis_fr_max, fr_bboxes_pred_align_max, fr_bboxes_gt_align_max = \
101 |         align_bbox_on_frame(mid_bboxes_pred, fr_bboxes_pred, fr_t_pred - mid_t_pred,
102 |                             mid_bboxes_gt, fr_bboxes_gt, fr_t_gt - mid_t_gt, mode='max')
103 |     fr_enclose = bbox_enclose(fr_bboxes_pred_align_max, fr_bboxes_gt_align_max)
104 | 
105 |     dis_bk_max, bk_bboxes_pred_align_max, bk_bboxes_gt_align_max = \
106 |         align_bbox_on_frame(mid_bboxes_pred, bk_bboxes_pred, mid_t_pred - bk_t_pred,
107 |                             mid_bboxes_gt, bk_bboxes_gt, mid_t_gt - bk_t_gt, mode='max')
108 |     bk_enclose = bbox_enclose(bk_bboxes_pred_align_max, bk_bboxes_gt_align_max)
109 | 
110 |     isTube = dis_fr_min + dis_bk_min != 0
111 |     intersect = volume(mid_intersect[isTube], fr_intersect[isTube], dis_fr_min[isTube]) + \
112 |                 volume(mid_intersect[isTube], bk_intersect[isTube], dis_bk_min[isTube])
113 |     iou[isTube] = intersect / (tube_vol_pred[isTube] + tube_vol_gt[isTube] - intersect)
114 | 
115 |     enclose = volume(mid_enclose[isTube], fr_enclose[isTube], dis_fr_max[isTube]) + \
116 |               volume(mid_enclose[isTube], bk_enclose[isTube], dis_bk_max[isTube])
117 | 
118 |     giou[isTube] = iou[isTube] - (enclose - (tube_vol_pred[isTube] + tube_vol_gt[isTube] - intersect)) / enclose
119 | 
120 |     return giou
121 | 
122 | 
123 | def tube_iou(pred_tubes, target_tubes):
124 |     mid_t_pred, mid_bboxes_pred, fr_t_pred, fr_bboxes_pred, bk_t_pred, bk_bboxes_pred = get3bboxes_from_tube(pred_tubes)
125 |     mid_t_gt, mid_bboxes_gt, fr_t_gt, fr_bboxes_gt, bk_t_gt, bk_bboxes_gt = get3bboxes_from_tube(target_tubes)
126 | 
127 |     # get the tubes volume
128 |     tube_vol_pred = volume(area(mid_bboxes_pred), area(fr_bboxes_pred), fr_t_pred - mid_t_pred) + \
129 |                     volume(area(mid_bboxes_pred), area(bk_bboxes_pred), mid_t_pred - bk_t_pred)
130 |     tube_vol_gt = volume(area(mid_bboxes_gt), area(fr_bboxes_gt), fr_t_gt - mid_t_gt) + \
131 |                   volume(area(mid_bboxes_gt), area(bk_bboxes_gt), mid_t_gt - bk_t_gt)
132 | 
133 |     # overlap area on mid bbox
134 |     mid_overlap = bbox_overlaps(mid_bboxes_pred, mid_bboxes_gt)
135 | 
136 |     # overlap area on front bbox
137 |     dis_fr, fr_bboxes_pred_align, fr_bboxes_gt_align = \
138 |         align_bbox_on_frame(mid_bboxes_pred, fr_bboxes_pred, fr_t_pred - mid_t_pred,
139 |                             mid_bboxes_gt, fr_bboxes_gt, fr_t_gt - mid_t_gt)
140 |     fr_overlap = bbox_overlaps(fr_bboxes_pred_align, fr_bboxes_gt_align)
141 | 
142 |     # overlap area on back bbox
143 |     dis_bk, bk_bboxes_pred_align, bk_bboxes_gt_align = \
144 |         align_bbox_on_frame(mid_bboxes_pred, bk_bboxes_pred, mid_t_pred - bk_t_pred,
145 |                             mid_bboxes_gt, bk_bboxes_gt, mid_t_gt - bk_t_gt)
146 |     bk_overlap = bbox_overlaps(bk_bboxes_pred_align, bk_bboxes_gt_align)
147 | 
148 |     # overlap volume
149 |     res = mid_overlap / (area(mid_bboxes_gt) + area(mid_bboxes_pred) - mid_overlap)
150 |     isTube = dis_fr + dis_bk != 0
151 |     overlap = volume(mid_overlap[isTube], fr_overlap[isTube], dis_fr[isTube]) + \
152 |               volume(mid_overlap[isTube], bk_overlap[isTube], dis_bk[isTube])
153 |     res[isTube] = overlap / (tube_vol_pred[isTube] + tube_vol_gt[isTube] - overlap)
154 | 
155 |     res = res.clamp(min=1e-5, max=1)
156 |     return res
157 | 
158 | 
159 | def get3bboxes_from_tube(tubes):
160 |     mid_t = tubes[:, 0]
161 |     mid_bboxes = tubes[:, 1:5]
162 |     fr_t = tubes[:, 5]
163 |     fr_bboxes = tubes[:, 6:10]
164 |     bk_t = tubes[:, 10]
165 |     bk_bboxes = tubes[:, 11:15]
166 |     return mid_t, mid_bboxes, fr_t, fr_bboxes, bk_t, bk_bboxes
167 | 
168 | 
169 | def area(bboxes):
170 |     a = (bboxes[:, 2] - bboxes[:, 0]) * (bboxes[:, 3] - bboxes[:, 1])
171 |     if isinstance(a, np.ndarray):
172 |         return np.abs(a)
173 |     else:
174 |         return torch.abs(a)
175 | 
176 | 
177 | def volume(bbox1_area, bbox2_area, dis):
178 |     return (bbox1_area + bbox2_area + torch.sqrt(bbox1_area + 1e-5) * torch.sqrt(bbox2_area + 1e-5)) * dis
179 | 
180 | 
181 | def align_bbox_on_frame(mid1, bbox1, t1, mid2, bbox2, t2, mode='min'):
182 |     if mode == 'min':
183 |         t = torch.min(t1, t2)
184 |     else:
185 |         t = torch.max(t1, t2)
186 | 
187 |     t1_zero_ind = t1 == 0
188 |     t1_notzero_ind = t1 != 0
189 |     bbox1_aligned = torch.zeros(mid1.shape, device=mid1.device)
190 |     bbox1_aligned[t1_zero_ind] = mid1[t1_zero_ind]
191 |     bbox1_aligned[t1_notzero_ind] = mid1[t1_notzero_ind] * ((t1[t1_notzero_ind]-t[t1_notzero_ind])/(t1[t1_notzero_ind]+1e-4)).unsqueeze(1).repeat(1, 4) + \
192 |                                     bbox1[t1_notzero_ind] * (t[t1_notzero_ind]/(t1[t1_notzero_ind]+1e-4)).unsqueeze(1).repeat(1, 4)
193 | 
194 |     t2_zero_ind = t2 == 0
195 |     t2_notzero_ind = t2 != 0
196 |     bbox2_aligned = torch.zeros(mid2.shape, device=mid2.device)
197 |     bbox2_aligned[t2_zero_ind] = mid2[t2_zero_ind]
198 |     bbox2_aligned[t2_notzero_ind] = mid2[t2_notzero_ind] * ((t2[t2_notzero_ind]-t[t2_notzero_ind])/(t2[t2_notzero_ind]+1e-4)).unsqueeze(1).repeat(1, 4) + \
199 |                                     bbox2[t2_notzero_ind] * (t[t2_notzero_ind]/(t2[t2_notzero_ind]+1e-4)).unsqueeze(1).repeat(1, 4)
200 | 
201 |     return t, bbox1_aligned, bbox2_aligned
202 | 
203 | 
204 | def bbox_overlaps(bboxes1, bboxes2):
205 |     rows = bboxes1.shape[0]
206 |     cols = bboxes2.shape[0]
207 | 
208 |     if rows * cols == 0:
209 |         return bboxes1.new(rows, 1)
210 | 
211 |     if isinstance(bboxes1, np.ndarray):
212 |         # To avoid wrong pred bbox which is not left top cord and right bottom cord
213 |         lt = np.maximum(np.minimum(bboxes1[:, :2], bboxes1[:, 2:]), np.minimum(bboxes2[:, :2], bboxes2[:, 2:]))
214 |         rb = np.minimum(np.maximum(bboxes1[:, 2:], bboxes1[:, :2]), np.maximum(bboxes2[:, 2:], bboxes2[:, :2]))
215 |         wh = np.clip(rb - lt, 0, None)
216 |     else:
217 |         lt = torch.max(torch.min(bboxes1[:, :2], bboxes1[:, 2:]), torch.min(bboxes2[:, :2], bboxes2[:, 2:]))
218 |         rb = torch.min(torch.max(bboxes1[:, 2:], bboxes1[:, :2]), torch.max(bboxes2[:, 2:], bboxes2[:, :2]))
219 |         wh = (rb - lt).clamp(min=0)
220 |     overlap = wh[:, 0] * wh[:, 1]
221 | 
222 |     return overlap
223 | 
224 | 
225 | def bbox_enclose(bboxes1, bboxes2):
226 |     rows = bboxes1.shape[0]
227 |     cols = bboxes2.shape[0]
228 | 
229 |     if rows * cols == 0:
230 |         return bboxes1.new(rows, 1)
231 | 
232 |     if isinstance(bboxes1, np.ndarray):
233 |         # To avoid wrong pred bbox which is not left top cord and right bottom cord
234 |         lt = np.minimum(np.minimum(bboxes1[:, :2], bboxes1[:, 2:]),
235 |                         np.minimum(bboxes2[:, :2], bboxes2[:, 2:]))
236 |         rb = np.maximum(np.maximum(bboxes1[:, 2:], bboxes1[:, :2]),
237 |                         np.maximum(bboxes2[:, 2:], bboxes2[:, :2]))
238 |         wh = np.clip(rb - lt, 0, None)
239 |     else:
240 |         lt = torch.min(torch.min(bboxes1[:, :2], bboxes1[:, 2:]),
241 |                        torch.min(bboxes2[:, :2], bboxes2[:, 2:]))
242 |         rb = torch.max(torch.max(bboxes1[:, 2:], bboxes1[:, :2]),
243 |                        torch.max(bboxes2[:, 2:], bboxes2[:, :2]))
244 |         wh = (rb - lt).clamp(min=0)
245 |     overlap = wh[:, 0] * wh[:, 1]
246 | 
247 |     return overlap
248 | 
249 | 
250 | def bbox_iou_loss(bboxes1, bboxes2):
251 |     iou = bbox_iou(bboxes1, bboxes2)
252 |     return 1 - iou
253 | 
254 | 
255 | def bbox_iou(bboxes1, bboxes2):
256 | 
257 |     overlap = bbox_overlaps(bboxes1, bboxes2)
258 | 
259 |     area1 = area(bboxes1)
260 |     area2 = area(bboxes2)
261 | 
262 |     ious = overlap / (area1 + area2 - overlap)
263 | 
264 |     return ious
265 | 


--------------------------------------------------------------------------------
/optim/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoPang1996/TubeTK/bcca334c5348f9ae33e04595e1af93cf8351e50e/optim/__init__.py


--------------------------------------------------------------------------------
/optim/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from bisect import bisect_right
 3 | import torch
 4 | import numpy as np
 5 | 
 6 | 
 7 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
 8 |     def __init__(
 9 |         self,
10 |         optimizer,
11 |         milestones,
12 |         gamma=0.1,
13 |         warmup_factor=1.0 / 3,
14 |         warmup_iters=500,
15 |         warmup_method="linear",
16 |         last_epoch=-1,
17 |     ):
18 |         if not list(milestones) == sorted(milestones):
19 |             raise ValueError(
20 |                 "Milestones should be a list of" " increasing integers. Got {}",
21 |                 milestones,
22 |             )
23 | 
24 |         if warmup_method not in ("constant", "linear"):
25 |             raise ValueError(
26 |                 "Only 'constant' or 'linear' warmup_method accepted"
27 |                 "got {}".format(warmup_method)
28 |             )
29 |         self.milestones = milestones
30 |         self.gamma = gamma
31 |         self.warmup_factor = warmup_factor
32 |         self.warmup_iters = warmup_iters
33 |         self.warmup_method = warmup_method
34 |         super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
35 | 
36 |     def get_lr(self):
37 |         warmup_factor = 1
38 |         if self.last_epoch < self.warmup_iters:
39 |             if self.warmup_method == "constant":
40 |                 warmup_factor = self.warmup_factor
41 |             elif self.warmup_method == "linear":
42 |                 alpha = float(self.last_epoch) / self.warmup_iters
43 |                 warmup_factor = self.warmup_factor * (1 - alpha) + alpha
44 |         return [
45 |             base_lr
46 |             * warmup_factor
47 |             * self.gamma ** bisect_right(self.milestones, self.last_epoch)
48 |             for base_lr in self.base_lrs
49 |         ]
50 | 
51 | 
52 | class HalfPeriodCosStepLR(torch.optim.lr_scheduler._LRScheduler):
53 |     def __init__(
54 |         self,
55 |         optimizer,
56 |         warmup_factor=1.0 / 3,
57 |         warmup_iters=8000,
58 |         max_iters=93750,
59 |         warmup_method="linear",
60 |         last_epoch=-1,
61 |     ):
62 |         if warmup_method not in ("constant", "linear"):
63 |             raise ValueError(
64 |                 "Only 'constant' or 'linear' warmup_method accepted"
65 |                 "got {}".format(warmup_method)
66 |             )
67 |         self.warmup_factor = warmup_factor
68 |         self.warmup_iters = warmup_iters
69 |         self.max_iters = max_iters
70 |         self.warmup_method = warmup_method
71 |         super(HalfPeriodCosStepLR, self).__init__(optimizer, last_epoch)
72 | 
73 |     def get_lr(self):
74 |         warmup_factor = 1
75 |         if self.last_epoch < self.warmup_iters:
76 |             if self.warmup_method == "constant":
77 |                 warmup_factor = self.warmup_factor
78 |             elif self.warmup_method == "linear":
79 |                 alpha = float(self.last_epoch) / self.warmup_iters
80 |                 warmup_factor = self.warmup_factor * (1 - alpha) + alpha
81 |         else:
82 |             warmup_factor = 0.5 * (np.cos(self.last_epoch / self.max_iters * np.pi) + 1)
83 |         return [
84 |             base_lr
85 |             * warmup_factor
86 |             for base_lr in self.base_lrs
87 |         ]
88 | 


--------------------------------------------------------------------------------
/optim/solver.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | 
 4 | 
 5 | def make_optimizer(arg, model):
 6 |     params = []
 7 |     bn_param_set = set()
 8 |     for name, module in model.named_modules():
 9 |         if isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)):
10 |             bn_param_set.add(name+".weight")
11 |             bn_param_set.add(name+".bias")
12 |     for key, value in model.named_parameters():
13 |         if not value.requires_grad:
14 |             continue
15 |         lr = arg.lr
16 |         weight_decay = arg.weight_decay
17 |         if key in bn_param_set:
18 |             weight_decay = arg.weight_decay * 0
19 |         elif "bias" in key:
20 |             lr = arg.lr * 1
21 |             weight_decay = arg.weight_decay
22 |         params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}]
23 | 
24 |     optimizer = torch.optim.SGD(params, arg.lr, momentum=0.9)
25 |     return optimizer
26 | 


--------------------------------------------------------------------------------
/post_processing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoPang1996/TubeTK/bcca334c5348f9ae33e04595e1af93cf8351e50e/post_processing/__init__.py


--------------------------------------------------------------------------------
/post_processing/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoPang1996/TubeTK/bcca334c5348f9ae33e04595e1af93cf8351e50e/post_processing/nms/__init__.py


--------------------------------------------------------------------------------
/post_processing/nms/setup.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | from setuptools import setup, Extension
 3 | 
 4 | import numpy as np
 5 | from Cython.Build import cythonize
 6 | from Cython.Distutils import build_ext
 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 8 | 
 9 | ext_args = dict(
10 |     include_dirs=[np.get_include()],
11 |     language='c++',
12 |     extra_compile_args={
13 |         'cc': ['-Wno-unused-function', '-Wno-write-strings'],
14 |         'nvcc': ['-c', '--compiler-options', '-fPIC'],
15 |     },
16 | )
17 | 
18 | extensions = [
19 |     Extension('soft_nms_cpu', ['src/soft_nms_cpu.pyx'], **ext_args),
20 | ]
21 | 
22 | 
23 | def customize_compiler_for_nvcc(self):
24 |     """inject deep into distutils to customize how the dispatch
25 |     to cc/nvcc works.
26 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
27 |     injected in, and still have the right customizations (i.e.
28 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
29 |     the OO route, I have this. Note, it's kindof like a wierd functional
30 |     subclassing going on."""
31 | 
32 |     # tell the compiler it can processes .cu
33 |     self.src_extensions.append('.cu')
34 | 
35 |     # save references to the default compiler_so and _comple methods
36 |     default_compiler_so = self.compiler_so
37 |     super = self._compile
38 | 
39 |     # now redefine the _compile method. This gets executed for each
40 |     # object but distutils doesn't have the ability to change compilers
41 |     # based on source extension: we add it.
42 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
43 |         if osp.splitext(src)[1] == '.cu':
44 |             # use the cuda for .cu files
45 |             self.set_executable('compiler_so', 'nvcc')
46 |             # use only a subset of the extra_postargs, which are 1-1 translated
47 |             # from the extra_compile_args in the Extension class
48 |             postargs = extra_postargs['nvcc']
49 |         else:
50 |             postargs = extra_postargs['cc']
51 | 
52 |         super(obj, src, ext, cc_args, postargs, pp_opts)
53 |         # reset the default compiler_so, which we might have changed for cuda
54 |         self.compiler_so = default_compiler_so
55 | 
56 |     # inject our redefined _compile method into the class
57 |     self._compile = _compile
58 | 
59 | 
60 | class custom_build_ext(build_ext):
61 | 
62 |     def build_extensions(self):
63 |         customize_compiler_for_nvcc(self.compiler)
64 |         build_ext.build_extensions(self)
65 | 
66 | 
67 | setup(
68 |     name='soft_nms',
69 |     cmdclass={'build_ext': custom_build_ext},
70 |     ext_modules=cythonize(extensions),
71 | )
72 | 
73 | setup(
74 |     name='nms_cuda',
75 |     ext_modules=[
76 |         CUDAExtension('nms_cuda', [
77 |             'src/nms_cuda.cpp',
78 |             'src/nms_kernel.cu',
79 |         ]),
80 |         CUDAExtension('nms_cpu', [
81 |             'src/nms_cpu.cpp',
82 |         ]),
83 |     ],
84 |     cmdclass={'build_ext': BuildExtension})
85 | 


--------------------------------------------------------------------------------
/post_processing/nms/src/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | template <typename scalar_t>
 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
 6 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
 7 | 
 8 |   if (dets.numel() == 0) {
 9 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
10 |   }
11 | 
12 |   auto x1_t = dets.select(1, 0).contiguous();
13 |   auto y1_t = dets.select(1, 1).contiguous();
14 |   auto x2_t = dets.select(1, 2).contiguous();
15 |   auto y2_t = dets.select(1, 3).contiguous();
16 |   auto scores = dets.select(1, 4).contiguous();
17 | 
18 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
19 | 
20 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
21 | 
22 |   auto ndets = dets.size(0);
23 |   at::Tensor suppressed_t =
24 |       at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
25 | 
26 |   auto suppressed = suppressed_t.data<uint8_t>();
27 |   auto order = order_t.data<int64_t>();
28 |   auto x1 = x1_t.data<scalar_t>();
29 |   auto y1 = y1_t.data<scalar_t>();
30 |   auto x2 = x2_t.data<scalar_t>();
31 |   auto y2 = y2_t.data<scalar_t>();
32 |   auto areas = areas_t.data<scalar_t>();
33 | 
34 |   for (int64_t _i = 0; _i < ndets; _i++) {
35 |     auto i = order[_i];
36 |     if (suppressed[i] == 1) continue;
37 |     auto ix1 = x1[i];
38 |     auto iy1 = y1[i];
39 |     auto ix2 = x2[i];
40 |     auto iy2 = y2[i];
41 |     auto iarea = areas[i];
42 | 
43 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
44 |       auto j = order[_j];
45 |       if (suppressed[j] == 1) continue;
46 |       auto xx1 = std::max(ix1, x1[j]);
47 |       auto yy1 = std::max(iy1, y1[j]);
48 |       auto xx2 = std::min(ix2, x2[j]);
49 |       auto yy2 = std::min(iy2, y2[j]);
50 | 
51 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
52 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
53 |       auto inter = w * h;
54 |       auto ovr = inter / (iarea + areas[j] - inter);
55 |       if (ovr >= threshold) suppressed[j] = 1;
56 |     }
57 |   }
58 |   return at::nonzero(suppressed_t == 0).squeeze(1);
59 | }
60 | 
61 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
62 |   at::Tensor result;
63 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
64 |     result = nms_cpu_kernel<scalar_t>(dets, threshold);
65 |   });
66 |   return result;
67 | }
68 | 
69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
70 |   m.def("nms", &nms, "non-maximum suppression");
71 | }


--------------------------------------------------------------------------------
/post_processing/nms/src/nms_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
 5 | 
 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh, float side_nms_overlap_thresh);
 7 | 
 8 | at::Tensor nms(const at::Tensor& dets, const float threshold, const float side_threshold) {
 9 |   CHECK_CUDA(dets);
10 |   if (dets.numel() == 0)
11 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
12 |   return nms_cuda(dets, threshold, side_threshold);
13 | }
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |   m.def("nms", &nms, "non-maximum suppression");
17 | }


--------------------------------------------------------------------------------
/post_processing/nms/src/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | 
  5 | #include <THC/THC.h>
  6 | #include <THC/THCDeviceUtils.cuh>
  7 | 
  8 | #include <vector>
  9 | #include <iostream>
 10 | 
 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 12 | 
 13 | int const bbox_dim = 15;
 14 | 
 15 | 
 16 | struct res
 17 | {
 18 |      float* bbox1_aligned;
 19 |      float* bbox2_aligned;
 20 | };
 21 | 
 22 | __device__ inline res align_bbox_on_frame(float const * const mid1, float const *const  bbox1, float const* t1, \
 23 | 
 24 |                                              float const * const mid2, float const * const bbox2, float const* t2, float const* mid_t){
 25 |   float d1 = abs(*t1 - *mid_t), d2 = abs(*t2 - *mid_t);
 26 |   float t = min(d1, d2);
 27 |   float bbox1_aligned[4], bbox2_aligned[4];
 28 |   if (d1 != 0){
 29 |     for (int i=0; i< 4; i++){
 30 |       bbox1_aligned[i] = mid1[i] * ((d1-t)/ d1) + bbox1[i] * (t/ d1);
 31 |     }
 32 |   }else{
 33 |     for (int i=0; i< 4; i++){
 34 |       bbox1_aligned[i] = mid1[i];
 35 |     }
 36 |   }
 37 |   if (d2 != 0){
 38 |     for (int i=0; i< 4; i++){
 39 |       bbox2_aligned[i] = mid2[i] * ((d2-t)/ d2) + bbox2[i] * (t/ d2);
 40 |     }
 41 |   }else{
 42 |     for (int i=0; i< 4; i++){
 43 |       bbox2_aligned[i] = mid2[i];
 44 |     }
 45 |   }
 46 |   res aligned_bbox = {bbox1_aligned, bbox2_aligned};
 47 |   return aligned_bbox;
 48 | }
 49 | 
 50 | __device__ inline float devIoU(float const * const a, float const * const b) {
 51 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 52 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 53 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 54 |   float interS = width * height;
 55 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 56 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 57 |   return interS / (Sa + Sb - interS);
 58 | }
 59 | 
 60 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, const float side_nms_overlap_thresh,
 61 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 62 |   const int row_start = blockIdx.y;
 63 |   const int col_start = blockIdx.x;
 64 | 
 65 |   // if (row_start > col_start) return;
 66 | 
 67 |   const int row_size =
 68 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 69 |   const int col_size =
 70 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 71 | 
 72 |   __shared__ float block_boxes[threadsPerBlock * (bbox_dim + 1)];
 73 |   if (threadIdx.x < col_size) {
 74 |     int d = 0;
 75 |     for (;d <= bbox_dim; d ++){
 76 |       block_boxes[threadIdx.x * (bbox_dim + 1) + d] =
 77 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * (bbox_dim + 1) + d];
 78 |     }
 79 |   }
 80 |   __syncthreads();
 81 | 
 82 |   if (threadIdx.x < row_size) {
 83 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 84 |     const float *cur_box = dev_boxes + cur_box_idx * (bbox_dim + 1);
 85 |     int i = 0;
 86 |     unsigned long long t = 0;
 87 |     int start = 0;
 88 |     if (row_start == col_start) {
 89 |       start = threadIdx.x + 1;
 90 |     }
 91 |     for (i = start; i < col_size; i++) {
 92 |       if (bbox_dim == 4){
 93 |           if (devIoU(cur_box, block_boxes + i * (bbox_dim + 1)) > nms_overlap_thresh) {
 94 |             t |= 1ULL << i;
 95 |           }
 96 |       }
 97 |       else if(bbox_dim == 15){
 98 |           const float *cur_box_mid = cur_box + 1;
 99 |           const float *cur_box_fr = cur_box + 6;
100 |           const float *cur_box_bk = cur_box + 11;
101 | 
102 |           const float *block_boxes_mid = block_boxes + i * (bbox_dim + 1) + 1;
103 |           const float *block_boxes_fr = block_boxes + i * (bbox_dim + 1) + 6;
104 |           const float *block_boxes_bk = block_boxes + i * (bbox_dim + 1) + 11;
105 | 
106 |           res aligned_bbox_fr = align_bbox_on_frame(cur_box_mid, cur_box_fr, cur_box + 5,
107 |                                            block_boxes_mid, block_boxes_fr, block_boxes + i * (bbox_dim + 1) + 5, cur_box);
108 |           const float * cur_box_fr_aligned = aligned_bbox_fr.bbox1_aligned;
109 |           const float * block_boxes_fr_aligned = aligned_bbox_fr.bbox2_aligned;
110 | 
111 |           res aligned_bbox_bk = align_bbox_on_frame(cur_box_mid, cur_box_bk, cur_box + 10,
112 |                                            block_boxes_mid, block_boxes_bk, block_boxes + i * (bbox_dim + 1) + 10, cur_box);
113 |           const float * cur_box_bk_aligned = aligned_bbox_bk.bbox1_aligned;
114 |           const float * block_boxes_bk_aligned = aligned_bbox_bk.bbox2_aligned;
115 | 
116 |           if (devIoU(cur_box_mid, block_boxes_mid) > nms_overlap_thresh &&
117 |               devIoU(cur_box_fr_aligned, block_boxes_fr_aligned) > side_nms_overlap_thresh &&
118 |               devIoU(cur_box_bk_aligned, block_boxes_bk_aligned) > side_nms_overlap_thresh) {
119 |             t |= 1ULL << i;
120 |           }
121 |       }
122 | 
123 | 
124 |     }
125 |     const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
126 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
127 |   }
128 | }
129 | 
130 | // boxes is a N x 5 tensor
131 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh, float side_nms_overlap_thresh) {
132 |   using scalar_t = float;
133 |   AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
134 |   auto scores = boxes.select(1, bbox_dim);
135 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
136 |   auto boxes_sorted = boxes.index_select(0, order_t);
137 | 
138 |   int boxes_num = boxes.size(0);
139 | 
140 |   const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
141 | 
142 |   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
143 | 
144 |   THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
145 | 
146 |   unsigned long long* mask_dev = NULL;
147 |   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
148 |   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
149 | 
150 |   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
151 | 
152 |   dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
153 |               THCCeilDiv(boxes_num, threadsPerBlock));
154 |   dim3 threads(threadsPerBlock);
155 |   nms_kernel<<<blocks, threads>>>(boxes_num,
156 |                                   nms_overlap_thresh,
157 |                                   side_nms_overlap_thresh,
158 |                                   boxes_dev,
159 |                                   mask_dev);
160 | 
161 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
162 |   THCudaCheck(cudaMemcpy(&mask_host[0],
163 |                         mask_dev,
164 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
165 |                         cudaMemcpyDeviceToHost));
166 | 
167 |   std::vector<unsigned long long> remv(col_blocks);
168 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
169 | 
170 |   at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
171 |   int64_t* keep_out = keep.data<int64_t>();
172 | 
173 |   int num_to_keep = 0;
174 |   for (int i = 0; i < boxes_num; i++) {
175 |     int nblock = i / threadsPerBlock;
176 |     int inblock = i % threadsPerBlock;
177 | 
178 |     if (!(remv[nblock] & (1ULL << inblock))) {
179 |       keep_out[num_to_keep++] = i;
180 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
181 |       for (int j = nblock; j < col_blocks; j++) {
182 |         remv[j] |= p[j];
183 |       }
184 |     }
185 |   }
186 | 
187 |   THCudaFree(state, mask_dev);
188 |   // TODO improve this part
189 |   return std::get<0>(order_t.index({
190 |                        keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
191 |                          order_t.device(), keep.scalar_type())
192 |                      }).sort(0, false));
193 | }


--------------------------------------------------------------------------------
/post_processing/nms/src/soft_nms_cpu.pyx:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------
  2 | # Soft-NMS: Improving Object Detection With One Line of Code
  3 | # Copyright (c) University of Maryland, College Park
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Navaneeth Bodla and Bharat Singh
  6 | # Modified by Kai Chen
  7 | # ----------------------------------------------------------
  8 | 
  9 | # cython: language_level=3, boundscheck=False
 10 | 
 11 | import numpy as np
 12 | cimport numpy as np
 13 | 
 14 | 
 15 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 16 |     return a if a >= b else b
 17 | 
 18 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
 19 |     return a if a <= b else b
 20 | 
 21 | 
 22 | def soft_nms_cpu(
 23 |     np.ndarray[float, ndim=2] boxes_in,
 24 |     float iou_thr,
 25 |     unsigned int method=1,
 26 |     float sigma=0.5,
 27 |     float min_score=0.001,
 28 | ):
 29 |     boxes = boxes_in.copy()
 30 |     cdef unsigned int N = boxes.shape[0]
 31 |     cdef float iw, ih, box_area
 32 |     cdef float ua
 33 |     cdef int pos = 0
 34 |     cdef float maxscore = 0
 35 |     cdef int maxpos = 0
 36 |     cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov
 37 |     inds = np.arange(N)
 38 | 
 39 |     for i in range(N):
 40 |         maxscore = boxes[i, 4]
 41 |         maxpos = i
 42 | 
 43 |         tx1 = boxes[i, 0]
 44 |         ty1 = boxes[i, 1]
 45 |         tx2 = boxes[i, 2]
 46 |         ty2 = boxes[i, 3]
 47 |         ts = boxes[i, 4]
 48 |         ti = inds[i]
 49 | 
 50 |         pos = i + 1
 51 |         # get max box
 52 |         while pos < N:
 53 |             if maxscore < boxes[pos, 4]:
 54 |                 maxscore = boxes[pos, 4]
 55 |                 maxpos = pos
 56 |             pos = pos + 1
 57 | 
 58 |         # add max box as a detection
 59 |         boxes[i, 0] = boxes[maxpos, 0]
 60 |         boxes[i, 1] = boxes[maxpos, 1]
 61 |         boxes[i, 2] = boxes[maxpos, 2]
 62 |         boxes[i, 3] = boxes[maxpos, 3]
 63 |         boxes[i, 4] = boxes[maxpos, 4]
 64 |         inds[i] = inds[maxpos]
 65 | 
 66 |         # swap ith box with position of max box
 67 |         boxes[maxpos, 0] = tx1
 68 |         boxes[maxpos, 1] = ty1
 69 |         boxes[maxpos, 2] = tx2
 70 |         boxes[maxpos, 3] = ty2
 71 |         boxes[maxpos, 4] = ts
 72 |         inds[maxpos] = ti
 73 | 
 74 |         tx1 = boxes[i, 0]
 75 |         ty1 = boxes[i, 1]
 76 |         tx2 = boxes[i, 2]
 77 |         ty2 = boxes[i, 3]
 78 |         ts = boxes[i, 4]
 79 | 
 80 |         pos = i + 1
 81 |         # NMS iterations, note that N changes if detection boxes fall below
 82 |         # threshold
 83 |         while pos < N:
 84 |             x1 = boxes[pos, 0]
 85 |             y1 = boxes[pos, 1]
 86 |             x2 = boxes[pos, 2]
 87 |             y2 = boxes[pos, 3]
 88 |             s = boxes[pos, 4]
 89 | 
 90 |             area = (x2 - x1 + 1) * (y2 - y1 + 1)
 91 |             iw = (min(tx2, x2) - max(tx1, x1) + 1)
 92 |             if iw > 0:
 93 |                 ih = (min(ty2, y2) - max(ty1, y1) + 1)
 94 |                 if ih > 0:
 95 |                     ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
 96 |                     ov = iw * ih / ua  # iou between max box and detection box
 97 | 
 98 |                     if method == 1:  # linear
 99 |                         if ov > iou_thr:
100 |                             weight = 1 - ov
101 |                         else:
102 |                             weight = 1
103 |                     elif method == 2:  # gaussian
104 |                         weight = np.exp(-(ov * ov) / sigma)
105 |                     else:  # original NMS
106 |                         if ov > iou_thr:
107 |                             weight = 0
108 |                         else:
109 |                             weight = 1
110 | 
111 |                     boxes[pos, 4] = weight * boxes[pos, 4]
112 | 
113 |                     # if box score falls below threshold, discard the box by
114 |                     # swapping with last box update N
115 |                     if boxes[pos, 4] < min_score:
116 |                         boxes[pos, 0] = boxes[N-1, 0]
117 |                         boxes[pos, 1] = boxes[N-1, 1]
118 |                         boxes[pos, 2] = boxes[N-1, 2]
119 |                         boxes[pos, 3] = boxes[N-1, 3]
120 |                         boxes[pos, 4] = boxes[N-1, 4]
121 |                         inds[pos] = inds[N - 1]
122 |                         N = N - 1
123 |                         pos = pos - 1
124 | 
125 |             pos = pos + 1
126 | 
127 |     return boxes[:N], inds[:N]
128 | 


--------------------------------------------------------------------------------
/post_processing/tube_iou_matching.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import cv2
  3 | import os
  4 | import datetime
  5 | import numpy as np
  6 | import pandas as pd
  7 | from tqdm import tqdm
  8 | from network.utils import bbox_iou
  9 | from datetime import datetime
 10 | import multiprocessing
 11 | from scipy.optimize import linear_sum_assignment
 12 | 
 13 | 
 14 | class Track:
 15 |     '''
 16 |     Track is the class of track. it contains all the node and manages the node. it contains the following information:
 17 |     1) all the nodes
 18 |     2) track id. it is unique it identify each track
 19 |     3) track pool id. it is a number to give a new id to a new track
 20 |     4) age. age indicates how old is the track
 21 |     5) max_age. indicates the dead age of this track
 22 |     '''
 23 |     _id_pool = 1
 24 |     def __init__(self):
 25 |         self.nodes = list()
 26 |         self.frames = {}
 27 |         self.mid_frames = {}
 28 |         self.id = Track._id_pool
 29 |         Track._id_pool += 1
 30 |         self.color = tuple((np.random.rand(3) * 255).astype(int).tolist())
 31 |         self.prev_direction = None
 32 | 
 33 |     def update_frames(self, all_tube_frames, tube_boxes, mid_frame, mid_box, score, tube_direction):
 34 | 
 35 |         for frame, frame_box in zip(all_tube_frames, tube_boxes):
 36 |             if frame not in self.frames:
 37 |                 self.frames[frame] = [frame_box, 1, score]
 38 |             else:
 39 |                 self.frames[frame][0] += frame_box.astype(np.float)
 40 |                 self.frames[frame][1] += 1
 41 |                 self.frames[frame][2] += score
 42 | 
 43 |         if mid_frame not in self.mid_frames:
 44 |             self.mid_frames[mid_frame] = [mid_box.astype(np.float), 1, score]
 45 |         else:
 46 |             self.mid_frames[mid_frame][0] += mid_box.astype(np.float)
 47 |             self.mid_frames[mid_frame][1] += 1
 48 |             self.mid_frames[mid_frame][2] += score
 49 | 
 50 |         def get_center(box):
 51 |             return np.array(((box[0] + box[2]) / 2, (box[1] + box[3]) / 2))
 52 |         
 53 |         front_frame = np.max(all_tube_frames)
 54 |         back_frame = np.min(all_tube_frames)
 55 | 
 56 |         end_box = self.frames[front_frame][0] / self.frames[front_frame][1]
 57 |         start_box = self.frames[back_frame][0] / self.frames[back_frame][1]
 58 |         self.prev_direction = np.zeros(3)
 59 |         self.prev_direction[:2] = get_center(end_box) - get_center(start_box)
 60 |         self.prev_direction[2] = front_frame - back_frame
 61 | 
 62 | 
 63 | def track_tube_iou(track_boxes, tube_boxes):
 64 |     track_boxes = np.atleast_3d(track_boxes).astype(np.float)  # (n_track, n_tbbox, 4)
 65 |     tube_boxes = np.atleast_2d(tube_boxes).astype(np.float)    # (n_tbbox, 4)
 66 | 
 67 |     def track_tube_overlaps(bboxes1, bboxes2):
 68 |         lt = np.maximum(np.minimum(bboxes1[:, :, :2], bboxes1[:, :, 2:]), np.minimum(bboxes2[:, :2], bboxes2[:, 2:]))  # [rows, 2]
 69 |         rb = np.minimum(np.maximum(bboxes1[:, :, 2:], bboxes1[:, :, :2]), np.maximum(bboxes2[:, 2:], bboxes2[:, :2]))  # [rows, 2]
 70 |         wh = np.clip(rb - lt, 0, None)
 71 |         overlap = wh[:, :, 0] * wh[:, :, 1]
 72 |         return overlap
 73 |     
 74 |     overlap = track_tube_overlaps(track_boxes, tube_boxes)
 75 | 
 76 |     area1 = (track_boxes[:, :, 2] - track_boxes[:, :, 0]) * (track_boxes[:, :, 3] - track_boxes[:, :, 1])
 77 |     area1 = np.abs(area1)
 78 |     area2 = (tube_boxes[:, 2] - tube_boxes[:, 0]) * (tube_boxes[:, 3] - tube_boxes[:, 1])
 79 |     area2 = np.abs(area2)
 80 | 
 81 |     ious = overlap / (area1 + area2 - overlap)
 82 | 
 83 |     return ious
 84 | 
 85 | 
 86 | def get_shape_diff(track_boxes, tube_boxes):
 87 |     track_boxes = np.atleast_3d(track_boxes).astype(np.float)  # (n_track, n_tbbox, 4)
 88 |     tube_boxes = np.atleast_2d(tube_boxes).astype(np.float)  # (n_tbbox, 4)
 89 | 
 90 |     track_height = track_boxes[:, :, 2] - track_boxes[:, :, 0]
 91 |     track_width = track_boxes[:, :, 3] - track_boxes[:, :, 1]
 92 |     tube_height = tube_boxes[:, 2] - tube_boxes[:, 0]
 93 |     tube_width = tube_boxes[:, 3] - tube_boxes[:, 1]
 94 | 
 95 |     diff = np.abs(track_height - tube_height) / (track_height + tube_height) + \
 96 |         np.abs(track_width - tube_width) / (track_width + tube_width)
 97 |     
 98 |     return np.exp(1.5 * -diff)
 99 | 
100 | 
101 | def update_tracks_fast(tracks, tube, arg):
102 |     mid_frame = tube[0].astype(np.int)
103 |     mid_box = tube[1:5]
104 |     end_frame = tube[5].astype(np.int)
105 |     end_box = tube[6:10]
106 |     start_frame = tube[10].astype(np.int)
107 |     start_box = tube[11:15]
108 |     score = tube[15]
109 | 
110 |     def get_center(box):
111 |         return np.array(((box[0] + box[2]) / 2, (box[1] + box[3]) / 2))
112 | 
113 |     back_frames = np.arange(start_frame, mid_frame)
114 |     front_frames = np.arange(mid_frame + 1, end_frame + 1)
115 |     all_tube_frames = np.arange(start_frame, end_frame + 1)
116 |     
117 |     back_start_coef = (mid_frame - back_frames) / (mid_frame - start_frame)
118 |     back_mid_coef = (back_frames - start_frame) / (mid_frame - start_frame)
119 |     front_mid_coef = (end_frame - front_frames) / (end_frame - mid_frame)
120 |     front_end_coef = (front_frames - mid_frame) / (end_frame - mid_frame)
121 | 
122 |     back_frame_boxes = np.outer(back_start_coef, start_box) + np.outer(back_mid_coef, mid_box)
123 |     front_frame_boxes = np.outer(front_end_coef, end_box) + np.outer(front_mid_coef, mid_box)
124 | 
125 |     tube_boxes = np.concatenate((back_frame_boxes, mid_box[None], front_frame_boxes))
126 |     tube_frame_num = len(all_tube_frames)
127 | 
128 |     depth_divider = 8
129 | 
130 |     tube_direction = np.zeros(3)
131 |     tube_direction[:2] = get_center(end_box) - get_center(start_box)
132 |     tube_direction[2] = np.max(all_tube_frames) - np.min(all_tube_frames)
133 |     tube_direction[2] /= depth_divider
134 | 
135 |     if len(tracks) == 0:
136 |         new_track = Track()
137 |         new_track.update_frames(all_tube_frames, tube_boxes, mid_frame, mid_box, score, tube_direction)
138 |         tracks.append(new_track)
139 |         return
140 | 
141 |     all_has_frame = np.zeros((len(tracks), tube_frame_num), dtype=np.bool)
142 |     all_track_boxes = np.zeros((len(tracks), *tube_boxes.shape))
143 |     track_direction = np.zeros((len(tracks), 3))
144 | 
145 |     for track_idx, track in enumerate(tracks):
146 |         # overlap_area = [1e8, -1]
147 |         if track.prev_direction is not None:
148 |             track_direction[track_idx, :] = track.prev_direction
149 | 
150 |         for i, frame in enumerate(all_tube_frames):
151 |             if frame not in track.frames:
152 |                 continue
153 |             all_has_frame[track_idx, i] = True
154 |             all_track_boxes[track_idx, i, :] = \
155 |                 track.frames[frame][0] / track.frames[frame][1]
156 |             # overlap_area[0] = min(overlap_area[0], frame)
157 |             # overlap_area[1] = max(overlap_area[1], frame)
158 | 
159 |         # if overlap_area[1] < 0:
160 |         #     continue
161 |         # while overlap_area[0] - 1 in track.frames and overlap_area[1] - overlap_area[0] + 1 < tube_frame_num:
162 |         #     overlap_area[0] -= 1
163 |         # while overlap_area[1] + 1 in track.frames and overlap_area[1] - overlap_area[0] + 1 < tube_frame_num:
164 |         #     overlap_area[1] += 1
165 |         # track_direction[track_idx, :2] = get_center(track.frames[overlap_area[1]][0] / track.frames[overlap_area[1]][1]) - \
166 |         #         get_center(track.frames[overlap_area[0]][0] / track.frames[overlap_area[0]][1])
167 |         # track_direction[track_idx, 2] = overlap_area[1] - overlap_area[0]
168 | 
169 |     track_direction[:, 2] /= depth_divider
170 | 
171 |     has_overlap = (np.sum(all_has_frame, axis=1) > 0)
172 |     all_iou = np.zeros(all_has_frame.shape, dtype=np.float)
173 |     shape_diff = np.zeros(all_has_frame.shape, dtype=np.float)
174 |     all_iou[has_overlap] = track_tube_iou(all_track_boxes[has_overlap], tube_boxes)
175 |     shape_diff[has_overlap] = get_shape_diff(all_track_boxes[has_overlap], tube_boxes)
176 | 
177 |     mean_all_iou = np.zeros(has_overlap.shape, dtype=np.float)
178 |     mean_all_iou[has_overlap] = np.sum(all_iou[has_overlap], axis=1) / np.sum(all_has_frame[has_overlap], axis=1)
179 | 
180 |     angle_cos = np.ones_like(mean_all_iou)
181 |     norm_mul = np.linalg.norm(track_direction, axis=1) * np.linalg.norm(tube_direction)
182 | 
183 |     cos_mask = np.logical_and(has_overlap, norm_mul > 0)
184 |     angle_cos[cos_mask] = np.dot(track_direction[cos_mask], tube_direction) / norm_mul[cos_mask]
185 | 
186 |     mean_all_iou = mean_all_iou * (1 + arg.cos_weight * angle_cos)
187 |     max_idx = np.argmax(mean_all_iou)
188 | 
189 |     if mean_all_iou[max_idx] > arg.linking_min_iou:
190 |         tracks[max_idx].update_frames(all_tube_frames, tube_boxes, mid_frame, mid_box, score, tube_direction)
191 |     else:
192 |         new_track = Track()
193 |         new_track.update_frames(all_tube_frames, tube_boxes, mid_frame, mid_box, score, tube_direction)
194 |         tracks.append(new_track)
195 | 
196 | 
197 | def filt_bbox(save_path):
198 |     def bboxfilt(res, l=8):
199 |         max_frame = np.max(res[0])
200 |         range_mask = (res[6] >= l) | (res[0] <= 8) | (res[0] + 8 >= max_frame)
201 |         return res[range_mask]
202 | 
203 |     def trackfilt(track, l=16):
204 |         max_fid = int(np.max(track[0]))
205 |         min_fid = int(np.min(track[0]))
206 |         return max_fid - min_fid < 5
207 |         # max_frame = np.max(track.iloc[:, 0])
208 |         # range_mask = (track[0] > 8) & (track[0] + 8 < max_frame)
209 |         # if np.mean(track[range_mask][6]) < l:
210 |         #     return True
211 |         # else:
212 |         #     return False
213 | 
214 |     def ip_linear(det1, det2, fid):
215 |         fid1 = det1[0]
216 |         fid2 = det2[0]
217 |         w1 = 1.0 * (fid2 - fid) / (fid2 - fid1)
218 |         w2 = 1.0 * (fid - fid1) / (fid2 - fid1)
219 | 
220 |         ip = np.copy(det1)
221 |         ip[0] = fid
222 |         ip[2:6] = w1 * det1[2:6] + w2 * det2[2:6]
223 |         return np.array([ip])
224 | 
225 |     def track_complete(track, gap_threshold=8):
226 |         max_fid = int(np.max(track[:, 0]))
227 |         min_fid = int(np.min(track[:, 0]))
228 | 
229 |         ips = []
230 |         ip_cnt = 0
231 |         max_missing_len = 0
232 |         for i, fid in enumerate(list(track[:-1, 0])):
233 |             if track[i+1, 0] - 1 != track[i, 0]:
234 |                 if track[i+1, 0] - track[i, 0] - 1 > gap_threshold:
235 |                     continue
236 |                 cur_fid = track[i, 0] + 1
237 |                 missing_len = 0
238 |                 while cur_fid < track[i+1, 0]:
239 |                     ips.append(ip_linear(track[i+1], track[i], cur_fid))
240 |                     cur_fid = cur_fid + 1
241 |                     missing_len = missing_len + 1
242 |                 ip_cnt = ip_cnt + missing_len
243 |                 max_missing_len = max(max_missing_len, missing_len)
244 |         
245 |         assert len(ips) == ip_cnt, (track, ips)
246 |         ips.append(track)
247 |         new_track = np.concatenate(ips, axis=0)
248 |         new_track = new_track[new_track[:, 0].argsort()]
249 |         if ip_cnt == 0:
250 |             return track, 0
251 |         else:
252 |             return new_track, ip_cnt
253 | 
254 |     param_pairs = [
255 |         (['-05-'], [0, 4, 8]),
256 |         (['-10-'], [0, 6, 8]),
257 |         (['-11-'], [0, 6, 8]),
258 |         (['-13-'], [0, 9, 8]),
259 |         (['-02-'], [0, 6, 8]),
260 |         (['-09-'], [0, 4, 8]),
261 |         (['-04-'], [0, 12, 8]),
262 |         (['-06-'], [0, 4, 8]),
263 |         (['-07-'], [0, 6, 8]),
264 |         (['-12-'], [0, 6, 8]),
265 |         (['-14-'], [0, 9, 8]),
266 |         (['-01-'], [0, 6, 30]),
267 |         (['-08-'], [0, 4, 30]),
268 |         (['-03-'], [0, 12, 30])
269 |     ]
270 |     params = {}
271 |     for file_nums, param in param_pairs:
272 |         params.update({x: param for x in file_nums})
273 |     file_num = None
274 |     for k in params.keys():
275 |         if k in save_path and file_num is None:
276 |             file_num = k
277 |         elif k in save_path:
278 |             assert False
279 |     # assert file_num is not None
280 |     res = pd.read_csv(save_path, header=None)
281 |     
282 |     if file_num is not None:
283 |         min_num = params[file_num][0]
284 |         min_bbox = params[file_num][1]
285 |         res = bboxfilt(res, min_bbox)
286 |         filtered_tracks = [x[0] for x in res.groupby(1) if trackfilt(x[1], min_num)]
287 |         inds = [res.iloc[x, 1] not in filtered_tracks for x in range(len(res))]
288 |         res = res[inds]
289 |         inds = np.unique(res[1])
290 |         dict_map = {x: i + 1 for i, x in enumerate(inds)}
291 |         res[1] = res[1].map(lambda x: dict_map[x])
292 |         # res.to_csv(save_path, header=None, index=False)
293 | 
294 |     # track complete part
295 |     tracks = res.groupby(1)
296 |     new_tracks = []
297 |     for tid in tracks.groups.keys():
298 |         res, _ = track_complete(tracks.get_group(tid).values, params[file_num][2])
299 |         if res is not None:
300 |             new_tracks.append(res)
301 | 
302 |     new_tracks = np.concatenate(new_tracks)
303 |     new_tracks = new_tracks[new_tracks[:, 0].argsort()]
304 |     np.savetxt(save_path, new_tracks, fmt='%i,%i,%f,%f,%f,%f,%i,%f,%i,%i', delimiter=',')
305 | 
306 | 
307 | def final_processing(tracks, save_path, mid_only):
308 |     res = []
309 |     assert len(tracks) != 0, 'No Tracks: ' + str(save_path)
310 |     for track in tracks:
311 |         if mid_only:
312 |             frames = track.mid_frames
313 |         else:
314 |             frames = track.frames
315 |         cur_res = np.zeros((len(track.mid_frames), 10))
316 |         for i, (frame, bbox) in enumerate(track.mid_frames.items()):
317 |             cur_res[i, 0] = frame + 1
318 |             cur_res[i, 2:6] = bbox[0] / bbox[1]
319 |             cur_res[i, 6] = track.frames[frame][1]  # num of average bbox, use all frames
320 |             cur_res[i, 7] = track.frames[frame][2] / track.frames[frame][1]  # average score, use all frames
321 |         cur_res[:, 1] = track.id
322 |         res.append(cur_res)
323 |     res = np.concatenate(res)
324 |     res = res[res[:, 0].argsort()]
325 |     res[:, -2:] = -1
326 |     res[:, 4:6] -= res[:, 2:4]
327 |     if save_path is not None:
328 |         try:
329 |             if save_path[0] == '/':
330 |                 os.makedirs(os.path.join('/', *(save_path.split('/')[:-1])))
331 |             else:
332 |                 os.makedirs(os.path.join(*(save_path.split('/')[:-1])))
333 |         except:
334 |             pass
335 |         np.savetxt(save_path, res, fmt='%i,%i,%f,%f,%f,%f,%i,%f,%i,%i', delimiter=',')
336 |         filt_bbox(save_path)
337 |     # ? return res or track
338 | 
339 | 
340 | def archive_tracks(tracks, arch_tracks, cur_frame, forward_frames):
341 |     track_ = []
342 |     for track in tracks:
343 |         max_frame = max(track.frames.keys())
344 |         if max_frame + 2 * forward_frames < cur_frame:
345 |             arch_tracks.append(track)
346 |         else:
347 |             track_.append(track)
348 | 
349 |     return track_
350 | 
351 | 
352 | def adjust_poi_tubes(tubes, poi_tubes):
353 |     def adjust_single_frame(tubes, poi_tubes):
354 |         tubes_end_mask = tubes[:, 5] > tubes[:, 0]
355 |         # Trans from end to mid
356 |         trans_x_end = (tubes[tubes_end_mask][:, 6] + tubes[tubes_end_mask][:, 8]) / 2 - \
357 |             (tubes[tubes_end_mask][:, 1] + tubes[tubes_end_mask][:, 3]) / 2
358 |         trans_y_end = (tubes[tubes_end_mask][:, 7] + tubes[tubes_end_mask][:, 9]) / 2 - \
359 |             (tubes[tubes_end_mask][:, 2] + tubes[tubes_end_mask][:, 4]) / 2
360 |         # Trans Per Frame
361 |         trans_x_end = trans_x_end / (tubes[tubes_end_mask][:, 5] - tubes[tubes_end_mask][:, 0])
362 |         trans_y_end = trans_y_end / (tubes[tubes_end_mask][:, 5] - tubes[tubes_end_mask][:, 0])
363 |         # Trans Per Height
364 |         mean_trans_x_end = np.mean(trans_x_end / (tubes[tubes_end_mask][:, 7] - tubes[tubes_end_mask][:, 9]))
365 |         mean_trans_y_end = np.mean(trans_y_end / (tubes[tubes_end_mask][:, 7] - tubes[tubes_end_mask][:, 9]))
366 |         poi_tubes[:, [6, 8]] += (mean_trans_x_end * (poi_tubes[:, 5] - poi_tubes[:, 0])
367 |                                  * (poi_tubes[:, 7] - poi_tubes[:, 9]))[:, None]
368 |         poi_tubes[:, [7, 9]] += (mean_trans_y_end * (poi_tubes[:, 5] - poi_tubes[:, 0])
369 |                                  * (poi_tubes[:, 7] - poi_tubes[:, 9]))[:, None]
370 | 
371 |         tubes_start_mask = tubes[:, 10] < tubes[:, 0]
372 |         trans_x_start = (tubes[tubes_start_mask][:, 11] + tubes[tubes_start_mask][:, 13]) / 2 - \
373 |             (tubes[tubes_start_mask][:, 1] + tubes[tubes_start_mask][:, 3]) / 2
374 |         trans_y_start = (tubes[tubes_start_mask][:, 12] + tubes[tubes_start_mask][:, 14]) / 2 - \
375 |             (tubes[tubes_start_mask][:, 2] + tubes[tubes_start_mask][:, 4]) / 2
376 |         # Trans Per Frame
377 |         trans_x_start = trans_x_start / (tubes[tubes_start_mask][:, 10] - tubes[tubes_start_mask][:, 0])
378 |         trans_y_start = trans_y_start / (tubes[tubes_start_mask][:, 10] - tubes[tubes_start_mask][:, 0])
379 |         # Trans Per Height
380 |         mean_trans_x_start = np.mean(trans_x_start / (tubes[tubes_start_mask][:, 12] - tubes[tubes_start_mask][:, 14]))
381 |         mean_trans_y_start = np.mean(trans_y_start / (tubes[tubes_start_mask][:, 12] - tubes[tubes_start_mask][:, 14]))
382 |         poi_tubes[:, [11, 13]] += (mean_trans_x_start * (poi_tubes[:, 10] - poi_tubes[:, 0])
383 |                                    * (poi_tubes[:, 12] - poi_tubes[:, 14]))[:, None]
384 |         poi_tubes[:, [12, 14]] += (mean_trans_y_start * (poi_tubes[:, 10] - poi_tubes[:, 0])
385 |                                    * (poi_tubes[:, 12] - poi_tubes[:, 14]))[:, None]
386 | 
387 |         return poi_tubes
388 | 
389 |     frame_idxs = np.unique(tubes[:, 0])
390 |     for frame_idx in frame_idxs:
391 |         poi_tubes[poi_tubes[:, 0] == frame_idx] = adjust_single_frame(
392 |             tubes[tubes[:, 0] == frame_idx], poi_tubes[poi_tubes[:, 0] == frame_idx])
393 |     
394 |     return poi_tubes
395 | 
396 | 
397 | def matching(tubes, arg, save_path=None, verbose=False, mid_only=True, poi_tubes=None):
398 |     """
399 |     tubes: All tubes in a video to match. (n, 15 + 1) [mid_frame, mid_box, front_frame, front_box, back_frame, back_box, value]
400 |     save_path: File path to save formatted result.
401 |     """
402 |     tracks = []
403 |     if not isinstance(tubes, np.ndarray):
404 |         tubes = tubes.cpu().data.numpy()
405 |     
406 |     if poi_tubes is not None:
407 |         poi_tubes = adjust_poi_tubes(tubes, poi_tubes)
408 |         tubes = np.concatenate((tubes, poi_tubes))
409 |     
410 |     tubes = tubes[(-tubes[:, 15]).argsort()]
411 |     tubes = tubes[tubes[:, 0].argsort(kind='stable')]
412 |     arch_tracks = []
413 |     prev_frame = -1
414 |     tubes_one_frame = 0
415 | 
416 |     for tube in tubes:
417 |         update_tracks_fast(tracks, tube, arg)
418 | 
419 |         current_frame = tube[0]
420 |         if prev_frame != current_frame and prev_frame != -1:  # Switch Frame
421 |             if verbose:
422 |                 print('{}\tFrame: {}\tTubes: {}\tCur tracks:{}\tArch tracks:{}'.format(
423 |                     datetime.now().time(), prev_frame, tubes_one_frame, len(tracks), len(arch_tracks)))
424 |             tubes_one_frame = 0
425 |             # Archive tracks 2*forward_frames frames away, they won't be useful anymore
426 |             if int(current_frame) % 10 == 0:
427 |                 tracks = archive_tracks(tracks, arch_tracks, current_frame, arg.forward_frames * arg.frame_stride)
428 | 
429 |         prev_frame = current_frame
430 |         tubes_one_frame += 1
431 | 
432 |     arch_tracks.extend(tracks)
433 |     tracks = arch_tracks
434 |     final_processing(tracks, save_path, mid_only)
435 |     return tracks
436 | 


--------------------------------------------------------------------------------
/post_processing/tube_iou_matching_old.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import cv2
  3 | import os
  4 | import datetime
  5 | import numpy as np
  6 | import pandas as pd
  7 | from tqdm import tqdm
  8 | from network.utils import bbox_iou
  9 | from datetime import datetime
 10 | import multiprocessing
 11 | 
 12 | 
 13 | class Node:
 14 | 
 15 |     def __init__(self, box):
 16 |         self.box = box
 17 | 
 18 | 
 19 | class Track:
 20 |     '''
 21 |     Track is the class of track. it contains all the node and manages the node. it contains the following information:
 22 |     1) all the nodes
 23 |     2) track id. it is unique it identify each track
 24 |     3) track pool id. it is a number to give a new id to a new track
 25 |     4) age. age indicates how old is the track
 26 |     5) max_age. indicates the dead age of this track
 27 |     '''
 28 |     _id_pool = 1
 29 |     ''' for mot
 30 |     '''
 31 |     _max_num_node = 36
 32 |     '''for kitti
 33 |     _max_num_node = 5
 34 |     '''
 35 | 
 36 |     def __init__(self):
 37 |         self.nodes = list()
 38 |         self.frames = {}
 39 |         self.mid_frames = {}
 40 |         self.id = Track._id_pool
 41 |         Track._id_pool += 1
 42 |         self.color = tuple((np.random.rand(3) * 255).astype(int).tolist())
 43 | 
 44 |     def __del__(self):
 45 |         for n in self.nodes:
 46 |             del n
 47 | 
 48 |     def update_frames(self, node):
 49 |         tube = node.box
 50 | 
 51 |         mid_frame = tube[0].astype(np.int)
 52 |         mid_box = tube[1:5]
 53 |         end_frame = tube[5].astype(np.int)
 54 |         end_box = tube[6:10]
 55 |         start_frame = tube[10].astype(np.int)
 56 |         start_box = tube[11:15]
 57 |         score = tube[15]
 58 | 
 59 |         for frame in range(start_frame, mid_frame):
 60 |             frame_box = start_box * (mid_frame - frame) / (mid_frame - start_frame) + mid_box * (
 61 |                         frame - start_frame) / (mid_frame - start_frame)
 62 |             if frame not in self.frames:
 63 |                 self.frames[frame] = [frame_box, 1, score]
 64 |             else:
 65 |                 self.frames[frame][0] += frame_box.astype(np.float)
 66 |                 self.frames[frame][1] += 1
 67 |                 self.frames[frame][2] += score
 68 | 
 69 |         for frame in range(mid_frame + 1, end_frame + 1):
 70 |             frame_box = mid_box * (end_frame - frame) / (end_frame - mid_frame) + end_box * (frame - mid_frame) / (
 71 |                         end_frame - mid_frame)
 72 |             if frame not in self.frames:
 73 |                 self.frames[frame] = [frame_box, 1, score]
 74 |             else:
 75 |                 self.frames[frame][0] += frame_box.astype(np.float)
 76 |                 self.frames[frame][1] += 1
 77 |                 self.frames[frame][2] += score
 78 | 
 79 |         # Add middle frame
 80 |         if mid_frame not in self.frames:
 81 |             self.frames[mid_frame] = [mid_box.astype(np.float), 1, score]
 82 |         else:
 83 |             self.frames[mid_frame][0] += mid_box.astype(np.float)
 84 |             self.frames[mid_frame][1] += 1
 85 |             self.frames[mid_frame][2] += score
 86 | 
 87 |         if mid_frame not in self.mid_frames:
 88 |             self.mid_frames[mid_frame] = [mid_box.astype(np.float), 1, score]
 89 |         else:
 90 |             self.mid_frames[mid_frame][0] += mid_box.astype(np.float)
 91 |             self.mid_frames[mid_frame][1] += 1
 92 |             self.mid_frames[mid_frame][2] += score
 93 | 
 94 |     def add_node(self, node):
 95 |         # self.nodes.append(node)
 96 |         self.update_frames(node)
 97 |         # self._volatile_memory()
 98 | 
 99 |     def _volatile_memory(self):
100 |         if len(self.nodes) > self._max_num_node:
101 |             for i in range(int(self._max_num_node / 2)):
102 |                 del self.nodes[i]
103 | 
104 | 
105 | class Tracks:
106 |     '''
107 |     Track set. It contains all the tracks and manage the tracks. it has the following information
108 |     1) tracks. the set of tracks
109 |     2) keep the previous image and features
110 |     '''
111 | 
112 |     def __init__(self):
113 |         self.tracks = list()  # the set of tracks
114 |         self.max_drawing_track = 10
115 | 
116 |     def __getitem__(self, item):
117 |         return self.tracks[item]
118 | 
119 |     def append(self, track):
120 |         self.tracks.append(track)
121 | 
122 |     def get_track_by_id(self, id):
123 |         for t in self.tracks:
124 |             if t.id == id:
125 |                 return t
126 |         return None
127 | 
128 |     def one_frame_pass(self):
129 |         keep_track_set = list()
130 |         for i, t in enumerate(self.tracks):
131 |             t.add_age()
132 |             if t.age < t._max_age:
133 |                 keep_track_set.append(i)
134 | 
135 |         self.tracks = [self.tracks[i] for i in keep_track_set]
136 | 
137 |     def show(self, image):
138 |         h, w, _ = image.shape
139 | 
140 |         # draw rectangle
141 |         for t in self.tracks:
142 |             if len(t.nodes) > 0 and t.age < 2:
143 |                 b = t.nodes[-1].box
144 |                 image = cv2.putText(image, str(t.id), (int(b[0] * w), int((b[1]) * h)), cv2.FONT_HERSHEY_SIMPLEX, 1,
145 |                                     t.color, 3)
146 |                 image = cv2.rectangle(image, (int(b[0] * w), int((b[1]) * h)),
147 |                                       (int((b[0] + b[2]) * w), int((b[1] + b[3]) * h)), t.color, 2)
148 | 
149 |         # draw line
150 |         for t in self.tracks:
151 |             if t.age > 1:
152 |                 continue
153 |             if len(t.nodes) > self.max_drawing_track:
154 |                 start = len(t.nodes) - self.max_drawing_track
155 |             else:
156 |                 start = 0
157 |             for n1, n2 in zip(t.nodes[start:], t.nodes[start + 1:]):
158 |                 c1 = (int((n1.box[0] + n1.box[2] / 2.0) * w), int((n1.box[1] + n1.box[3]) * h))
159 |                 c2 = (int((n2.box[0] + n2.box[2] / 2.0) * w), int((n2.box[1] + n2.box[3]) * h))
160 |                 image = cv2.line(image, c1, c2, t.color, 2)
161 | 
162 |         return image
163 | 
164 | 
165 | def update_tracks(tracks, tube, arg):
166 |     mid_frame = tube[0].astype(np.int)
167 |     mid_box = tube[1:5]
168 |     end_frame = tube[5].astype(np.int)
169 |     end_box = tube[6:10]
170 |     start_frame = tube[10].astype(np.int)
171 |     start_box = tube[11:15]
172 |     score = tube[15]
173 | 
174 |     def get_center(box):
175 | 
176 |         return np.array([(box[0] + box[2]) / 2, (box[1] + box[3]) / 2])
177 | 
178 |     tube_direction = get_center(end_box) - get_center(start_box)
179 | 
180 |     assert start_frame <= mid_frame and mid_frame <= end_frame
181 | 
182 |     # Pre-compute all inter frame_boxs in this tube
183 |     back_frames = list(range(start_frame, mid_frame))
184 |     front_frames = list(range(mid_frame + 1, end_frame + 1))
185 |     all_tube_frames = back_frames + front_frames + [mid_frame]
186 |     # ! CAUTION: all_tube_frames is not sorted, mid_frame is the last one
187 | 
188 |     back_start_coef = (mid_frame - back_frames) / (mid_frame - start_frame)
189 |     back_mid_coef = (back_frames - start_frame) / (mid_frame - start_frame)
190 |     front_mid_coef = (end_frame - front_frames) / (end_frame - mid_frame)
191 |     front_end_coef = (front_frames - mid_frame) / (end_frame - mid_frame)
192 |     frame_boxs = np.concatenate((np.outer(back_start_coef, start_box), np.outer(front_end_coef, end_box))) + \
193 |                  np.outer(np.concatenate((back_mid_coef, front_mid_coef)), mid_box)
194 |     frame_boxs = np.concatenate((frame_boxs, mid_box[None]))
195 | 
196 |     tube_frame_num = len(frame_boxs)
197 | 
198 |     # Above code computes bboxes in tube of corresponding frames
199 |     # Equal to:
200 |     # back_frame_boxs = np.outer((mid_frame - back_frames) / (mid_frame - start_frame), start_box) + \
201 |     #         np.outer((back_frames - start_frame) / (mid_frame - start_frame), mid_box)
202 |     # front_frame_boxs = np.outer((end_frame - front_frames) / (end_frame - mid_frame), mid_box) + \
203 |     #         np.outer((front_frames - mid_frame) / (end_frame - mid_frame), end_box)
204 |     # frame_boxs = np.concatenate((back_frame_boxs, front_frame_boxs))
205 | 
206 |     # Preallocate array of bboxes in track
207 |     track_boxs = np.zeros_like(frame_boxs)
208 | 
209 |     max_idx, max_iou = -1, -1
210 | 
211 |     for idx, track in enumerate(tracks):
212 |         iou = [0, 0]
213 | 
214 |         has_frame = [(frame in track.frames) for frame in all_tube_frames]
215 |         if np.sum(has_frame) == 0:  # tube and track does not overlap
216 |             continue
217 | 
218 |         # get the same length of area in the track that near to the tube
219 |         overlap_frames = np.array(all_tube_frames)[np.where(has_frame)[0]]
220 |         overlap_area = [min(overlap_frames), max(overlap_frames)]
221 |         while overlap_area[1] - overlap_area[0] + 1 < tube_frame_num:
222 |             if overlap_area[0] - 1 in track.frames:
223 |                 overlap_area[0] = overlap_area[0] - 1
224 |             elif overlap_area[1] + 1 in track.frames:
225 |                 overlap_area[1] = overlap_area[1] + 1
226 |             else:
227 |                 break
228 |         # calculate the cos value
229 |         track_direction = get_center(track.frames[overlap_area[1]][0] / track.frames[overlap_area[1]][1]) - \
230 |                           get_center(track.frames[overlap_area[0]][0] / track.frames[overlap_area[0]][1])
231 | 
232 |         if np.linalg.norm(tube_direction) < arg.noise_dis:
233 |             tube_direction = np.array([0, 0])
234 |         if np.linalg.norm(track_direction) < arg.noise_dis:
235 |             track_direction = np.array([0, 0])
236 |         if np.linalg.norm(track_direction) * np.linalg.norm(tube_direction) > 0:
237 |             angle_cos = np.dot(track_direction, tube_direction) / (
238 |                         np.linalg.norm(track_direction) * np.linalg.norm(tube_direction))
239 |         else:
240 |             angle_cos = 1
241 | 
242 |         # calculate the IoU
243 |         for i, frame in enumerate(all_tube_frames):
244 |             if has_frame[i]:
245 |                 track_boxs[i] = track.frames[frame][0] / track.frames[frame][1]
246 | 
247 |         iou[0] = np.sum(bbox_iou(frame_boxs, track_boxs)[has_frame])
248 |         iou[1] = np.sum(has_frame)
249 | 
250 |         if iou[0] / iou[1] > arg.linking_min_iou + 0.2:
251 |             angle_cos = 1
252 | 
253 |         # whether linking
254 |         if iou[1] > 0 and iou[0] / iou[1] > max_iou and angle_cos > arg.cos_value:
255 |             max_idx = idx
256 |             max_iou = iou[0] / iou[1]
257 | 
258 |     if max_iou > arg.linking_min_iou:
259 |         tracks[max_idx].update_frames(Node(tube))
260 |     else:
261 |         new_tracks(tracks, [tube])
262 | 
263 | 
264 | def new_tracks(tracks, tubes):
265 |     for tube in tubes:
266 |         track = Track()
267 |         track.add_node(Node(tube))
268 |         tracks.append(track)
269 | 
270 | 
271 | def final_processing(tracks, save_path, mid_only):
272 |     res = []
273 |     assert len(tracks) != 0, 'No Tracks: ' + str(save_path)
274 |     for track in tracks:
275 |         if mid_only:
276 |             frames = track.mid_frames
277 |         else:
278 |             frames = track.frames
279 |         cur_res = np.zeros((len(track.mid_frames), 10))
280 |         for i, (frame, bbox) in enumerate(track.mid_frames.items()):
281 |             cur_res[i, 0] = frame + 1
282 |             cur_res[i, 2:6] = bbox[0] / bbox[1]
283 |             cur_res[i, 6] = track.frames[frame][1]  # num of average bbox, use all frames
284 |             cur_res[i, 7] = track.frames[frame][2] / track.frames[frame][1]  # average score, use all frames
285 |         cur_res[:, 1] = track.id
286 |         res.append(cur_res)
287 |     res = np.concatenate(res)
288 |     res = res[res[:, 0].argsort()]
289 |     res[:, -2:] = -1
290 |     res[:, 4:6] -= res[:, 2:4]
291 |     if save_path is not None:
292 |         try:
293 |             if save_path[0] == '/':
294 |                 os.makedirs(os.path.join('/', *(save_path.split('/')[:-1])))
295 |             else:
296 |                 os.makedirs(os.path.join(*(save_path.split('/')[:-1])))
297 |         except:
298 |             pass
299 |         np.savetxt(save_path, res, fmt='%i,%i,%f,%f,%f,%f,%i,%f,%i,%i', delimiter=',')
300 |     # ? return res or track
301 | 
302 | 
303 | def archive_tracks(tracks, arch_tracks, cur_frame, forward_frames):
304 |     track_ = []
305 |     for track in tracks:
306 |         max_frame = max(track.frames.keys())
307 |         if (max_frame + 2 * forward_frames < cur_frame):
308 |             arch_tracks.append(track)
309 |         else:
310 |             track_.append(track)
311 | 
312 |     return track_
313 | 
314 | 
315 | def matching(tubes, arg, save_path=None, verbose=False, mid_only=True):
316 |     """
317 |     tubes: All tubes in a video to match. (n, 15 + 1) [mid_frame, mid_box, front_frame, front_box, back_frame, back_box, value]
318 |     save_path: File path to save formatted result.
319 |     """
320 |     tracks = []
321 |     if not isinstance(tubes, np.ndarray):
322 |         tubes = tubes.cpu().data.numpy()
323 |     # tubes = pd.DataFrame(tubes)
324 |     # tubes = tubes.astype({0: int, 5: int, 10: int})
325 | 
326 |     # tubes_group = tubes.groupby(0)  # group by back_frame, i.e. start_frame
327 | 
328 |     # arch_tracks = []
329 |     # for frame in sorted(tubes_group.indices.keys()):
330 |     #     tubes_one_frame = tubes_group.get_group(frame).values
331 | 
332 |     #     for tube in tubes_one_frame:
333 |     #         update_tracks(tracks, tube, arg)
334 | 
335 |     #     if verbose:
336 |     #         print('{}\tFrame: {}\tTubes: {}\tCur tracks:{}\tArch tracks:{}'.format(\
337 |     #                 datetime.now().time(), frame, len(tubes_one_frame), len(tracks), len(arch_tracks)))
338 | 
339 |     #     # Archive tracks 2*forward_frames frames away, they won't be useful anymore
340 |     #     # if frame % 10 == 0:
341 |     #     tracks = archive_tracks(tracks, arch_tracks, frame, arg.forward_frames * arg.frame_stride)
342 | 
343 |     tubes = tubes[tubes[:, 0].argsort()]
344 |     arch_tracks = []
345 |     prev_frame = -1
346 |     tubes_one_frame = 0
347 | 
348 |     for tube in tubes:
349 |         update_tracks(tracks, tube, arg)
350 | 
351 |         current_frame = tube[0]
352 |         if prev_frame != current_frame and prev_frame != -1:  # Switch Frame
353 |             if verbose:
354 |                 print('{}\tFrame: {}\tTubes: {}\tCur tracks:{}\tArch tracks:{}'.format( \
355 |                     datetime.now().time(), prev_frame, tubes_one_frame, len(tracks), len(arch_tracks)))
356 |             tubes_one_frame = 0
357 |             # Archive tracks 2*forward_frames frames away, they won't be useful anymore
358 |             if int(current_frame) % 10 == 0:
359 |                 tracks = archive_tracks(tracks, arch_tracks, current_frame, arg.forward_frames * arg.frame_stride)
360 | 
361 |         prev_frame = current_frame
362 |         tubes_one_frame += 1
363 | 
364 |     arch_tracks.extend(tracks)
365 |     tracks = arch_tracks
366 |     final_processing(tracks, save_path, mid_only)
367 |     return tracks


--------------------------------------------------------------------------------
/post_processing/tube_iou_matching_super_old.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import cv2
  3 | import os
  4 | import datetime
  5 | import numpy as np
  6 | import pandas as pd
  7 | from tqdm import tqdm
  8 | from network.utils import bbox_iou
  9 | from datetime import datetime
 10 | 
 11 | 
 12 | class Node:
 13 | 
 14 |     def __init__(self, box):
 15 |         self.box = box
 16 | 
 17 | 
 18 | class Track:
 19 |     '''
 20 |     Track is the class of track. it contains all the node and manages the node. it contains the following information:
 21 |     1) all the nodes
 22 |     2) track id. it is unique it identify each track
 23 |     3) track pool id. it is a number to give a new id to a new track
 24 |     4) age. age indicates how old is the track
 25 |     5) max_age. indicates the dead age of this track
 26 |     '''
 27 |     _id_pool = 1
 28 |     ''' for mot
 29 |     '''
 30 |     _max_num_node = 36
 31 |     '''for kitti
 32 |     _max_num_node = 5
 33 |     '''
 34 |     def __init__(self):
 35 |         self.nodes = list()
 36 |         self.frames = {}
 37 |         self.id = Track._id_pool
 38 |         Track._id_pool += 1
 39 |         self.color = tuple((np.random.rand(3) * 255).astype(int).tolist())
 40 | 
 41 |     def __del__(self):
 42 |         for n in self.nodes:
 43 |             del n
 44 | 
 45 |     def update_frames(self, node):
 46 |         tube = node.box
 47 | 
 48 |         mid_frame = tube[0].astype(np.int)
 49 |         mid_box = tube[1:5]
 50 |         end_frame = tube[5].astype(np.int)
 51 |         end_box = tube[6:10]
 52 |         start_frame = tube[10].astype(np.int)
 53 |         start_box = tube[11:15]
 54 |         score = tube[15]
 55 | 
 56 |         for frame in range(start_frame, mid_frame):
 57 |             frame_box = start_box * (mid_frame - frame) / (mid_frame - start_frame) + mid_box * (frame - start_frame) / (mid_frame - start_frame)
 58 |             if frame not in self.frames:
 59 |                 self.frames[frame] = [frame_box, 1, score]
 60 |             else:
 61 |                 self.frames[frame][0] += frame_box.astype(np.float)
 62 |                 self.frames[frame][1] += 1
 63 |                 self.frames[frame][2] += score
 64 | 
 65 |         for frame in range(mid_frame + 1, end_frame + 1):
 66 |             frame_box = mid_box * (end_frame - frame) / (end_frame - mid_frame) + end_box * (frame - mid_frame) / (end_frame - mid_frame)
 67 |             if frame not in self.frames:
 68 |                 self.frames[frame] = [frame_box, 1, score]
 69 |             else:
 70 |                 self.frames[frame][0] += frame_box.astype(np.float)
 71 |                 self.frames[frame][1] += 1
 72 |                 self.frames[frame][2] += score
 73 | 
 74 |         # Add middle frame
 75 |         if mid_frame not in self.frames:
 76 |             self.frames[mid_frame] = [mid_box.astype(np.float), 1, score]
 77 |         else:
 78 |             self.frames[mid_frame][0] += mid_box.astype(np.float)
 79 |             self.frames[mid_frame][1] += 1
 80 |             self.frames[mid_frame][2] += score
 81 | 
 82 |     def add_node(self, node):
 83 |         # self.nodes.append(node)
 84 |         self.update_frames(node)
 85 |         # self._volatile_memory()
 86 | 
 87 |     def _volatile_memory(self):
 88 |         if len(self.nodes) > self._max_num_node:
 89 |             for i in range(int(self._max_num_node/2)):
 90 |                 del self.nodes[i]
 91 | 
 92 | 
 93 | class Tracks:
 94 |     '''
 95 |     Track set. It contains all the tracks and manage the tracks. it has the following information
 96 |     1) tracks. the set of tracks
 97 |     2) keep the previous image and features
 98 |     '''
 99 |     def __init__(self):
100 |         self.tracks = list() # the set of tracks
101 |         self.max_drawing_track = 10
102 | 
103 |     def __getitem__(self, item):
104 |         return self.tracks[item]
105 | 
106 |     def append(self, track):
107 |         self.tracks.append(track)
108 | 
109 |     def get_track_by_id(self, id):
110 |         for t in self.tracks:
111 |             if t.id == id:
112 |                 return t
113 |         return None
114 | 
115 |     def one_frame_pass(self):
116 |         keep_track_set = list()
117 |         for i, t in enumerate(self.tracks):
118 |             t.add_age()
119 |             if t.age < t._max_age:
120 |                 keep_track_set.append(i)
121 | 
122 |         self.tracks = [self.tracks[i] for i in keep_track_set]
123 | 
124 |     def show(self, image):
125 |         h, w, _ = image.shape
126 | 
127 |         # draw rectangle
128 |         for t in self.tracks:
129 |             if len(t.nodes) > 0 and t.age<2:
130 |                 b = t.nodes[-1].box
131 |                 image = cv2.putText(image, str(t.id), (int(b[0]*w),int((b[1])*h)), cv2.FONT_HERSHEY_SIMPLEX, 1, t.color, 3)
132 |                 image = cv2.rectangle(image, (int(b[0]*w),int((b[1])*h)), (int((b[0]+b[2])*w), int((b[1]+b[3])*h)), t.color, 2)
133 | 
134 |         # draw line
135 |         for t in self.tracks:
136 |             if t.age > 1:
137 |                 continue
138 |             if len(t.nodes) > self.max_drawing_track:
139 |                 start = len(t.nodes) - self.max_drawing_track
140 |             else:
141 |                 start = 0
142 |             for n1, n2 in zip(t.nodes[start:], t.nodes[start+1:]):
143 |                 c1 = (int((n1.box[0] + n1.box[2]/2.0)*w), int((n1.box[1] + n1.box[3])*h))
144 |                 c2 = (int((n2.box[0] + n2.box[2] / 2.0) * w), int((n2.box[1] + n2.box[3]) * h))
145 |                 image = cv2.line(image, c1, c2, t.color, 2)
146 | 
147 |         return image
148 | 
149 | 
150 | def update_tracks(tracks, tube, arg):
151 |     mid_frame = tube[0].astype(np.int)
152 |     mid_box = tube[1:5]
153 |     end_frame = tube[5].astype(np.int)
154 |     end_box = tube[6:10]
155 |     start_frame = tube[10].astype(np.int)
156 |     start_box = tube[11:15]
157 |     score = tube[15]
158 | 
159 |     def get_center(box):
160 | 
161 |         return np.array([(box[0] + box[2])/2, (box[1] + box[3])/2])
162 | 
163 |     tube_direction = get_center(end_box) - get_center(start_box)
164 | 
165 |     assert start_frame <= mid_frame and mid_frame <= end_frame
166 | 
167 |     # Pre-compute all inter frame_boxs in this tube
168 |     back_frames = list(range(start_frame, mid_frame))
169 |     front_frames = list(range(mid_frame + 1, end_frame + 1))
170 |     all_tube_frames = back_frames + front_frames + [mid_frame]
171 |     # ! CAUTION: all_tube_frames is not sorted, mid_frame is the last one
172 | 
173 |     back_start_coef = (mid_frame - back_frames) / (mid_frame - start_frame)
174 |     back_mid_coef = (back_frames - start_frame) / (mid_frame - start_frame)
175 |     front_mid_coef = (end_frame - front_frames) / (end_frame - mid_frame)
176 |     front_end_coef = (front_frames - mid_frame) / (end_frame - mid_frame)
177 |     frame_boxs = np.concatenate((np.outer(back_start_coef, start_box), np.outer(front_end_coef, end_box))) + \
178 |             np.outer(np.concatenate((back_mid_coef, front_mid_coef)), mid_box)
179 |     frame_boxs = np.concatenate((frame_boxs, mid_box[None]))
180 | 
181 |     tube_frame_num = len(frame_boxs)
182 | 
183 |     # Above code computes bboxes in tube of corresponding frames
184 |     # Equal to:
185 |     # back_frame_boxs = np.outer((mid_frame - back_frames) / (mid_frame - start_frame), start_box) + \
186 |     #         np.outer((back_frames - start_frame) / (mid_frame - start_frame), mid_box)
187 |     # front_frame_boxs = np.outer((end_frame - front_frames) / (end_frame - mid_frame), mid_box) + \
188 |     #         np.outer((front_frames - mid_frame) / (end_frame - mid_frame), end_box)
189 |     # frame_boxs = np.concatenate((back_frame_boxs, front_frame_boxs))
190 | 
191 |     # Preallocate array of bboxes in track
192 |     track_boxs = np.zeros_like(frame_boxs)
193 | 
194 |     max_idx, max_iou = -1, -1
195 |     for idx, track in enumerate(tracks):
196 |         iou = [0, 0]
197 | 
198 |         has_frame = [(frame in track.frames) for frame in all_tube_frames]
199 |         if sum(has_frame) == 0:  # tube and track does not overlap
200 |             continue
201 | 
202 |         # get the same length of area in the track that near to the tube
203 |         overlap_frames = np.array(all_tube_frames)[np.where(has_frame)[0]]
204 |         overlap_area = [min(overlap_frames), max(overlap_frames)]
205 |         while overlap_area[1] - overlap_area[0] + 1 < tube_frame_num:
206 |             if overlap_area[0] - 1 in track.frames:
207 |                 overlap_area[0] = overlap_area[0] - 1
208 |             elif overlap_area[1] + 1 in track.frames:
209 |                 overlap_area[1] = overlap_area[1] + 1
210 |             else:
211 |                 break
212 |         # calculate the cos value
213 |         track_direction = get_center(track.frames[overlap_area[1]][0] / track.frames[overlap_area[1]][1]) - \
214 |                           get_center(track.frames[overlap_area[0]][0] / track.frames[overlap_area[0]][1])
215 | 
216 |         if np.linalg.norm(tube_direction) < arg.noise_dis:
217 |             tube_direction = np.array([0, 0])
218 |         if np.linalg.norm(track_direction) < arg.noise_dis:
219 |             track_direction = np.array([0, 0])
220 |         if np.linalg.norm(track_direction) * np.linalg.norm(tube_direction) > 0:
221 |             angle_cos = np.dot(track_direction, tube_direction) / (np.linalg.norm(track_direction) * np.linalg.norm(tube_direction))
222 |         else:
223 |             angle_cos = 1
224 | 
225 |         # calculate the IoU
226 |         for i, frame in enumerate(all_tube_frames):
227 |             if has_frame[i]:
228 |                 track_boxs[i] = track.frames[frame][0] / track.frames[frame][1]
229 | 
230 |         iou[0] = sum(bbox_iou(frame_boxs, track_boxs)[has_frame])
231 |         iou[1] = sum(has_frame)
232 | 
233 |         if iou[0] / iou[1] > arg.linking_min_iou + 0.2:
234 |             angle_cos = 1
235 | 
236 |         # whether linking
237 |         if iou[1] > 0 and iou[0] / iou[1] > max_iou and angle_cos > arg.cos_value:
238 |             max_idx = idx
239 |             max_iou = iou[0] / iou[1]
240 | 
241 |     if max_iou > arg.linking_min_iou:
242 |         tracks[max_idx].update_frames(Node(tube))
243 |     else:
244 |         new_tracks(tracks, [tube])
245 | 
246 | 
247 | def new_tracks(tracks, tubes):
248 |     for tube in tubes:
249 |         track = Track()
250 |         track.add_node(Node(tube))
251 |         tracks.append(track)
252 | 
253 | 
254 | def final_processing(tracks, save_path):
255 |     res = []
256 |     assert len(tracks) != 0, 'No Tracks: ' + str(save_path)
257 |     for track in tracks:
258 |         cur_res = np.zeros((len(track.frames), 10))
259 |         for i, (frame, bbox) in enumerate(track.frames.items()):
260 |             cur_res[i, 0] = frame + 1
261 |             cur_res[i, 2:6] = bbox[0] / bbox[1]
262 |             cur_res[i, 6] = bbox[1]  # num of average bbox
263 |             cur_res[i, 7] = bbox[2] / bbox[1]  # average score
264 |         cur_res[:, 1] = track.id
265 |         res.append(cur_res)
266 |     res = np.concatenate(res)
267 |     res = res[res[:, 0].argsort()]
268 |     res[:, -2:] = -1
269 |     res[:, 4:6] -= res[:, 2:4]
270 |     if save_path is not None:
271 |         try:
272 |             if save_path[0] == '/':
273 |                 os.makedirs(os.path.join('/', *(save_path.split('/')[:-1])))
274 |             else:
275 |                 os.makedirs(os.path.join(*(save_path.split('/')[:-1])))
276 |         except:
277 |             pass
278 |         np.savetxt(save_path, res, fmt='%i,%i,%f,%f,%f,%f,%i,%f,%i,%i', delimiter=',')
279 |     # ? return res or track
280 | 
281 | 
282 | def archive_tracks(tracks, arch_tracks, cur_frame, forward_frames):
283 |     track_ = []
284 |     for track in tracks:
285 |         max_frame = max(track.frames.keys())
286 |         if (max_frame + 2 * forward_frames < cur_frame):
287 |             arch_tracks.append(track)
288 |         else:
289 |             track_.append(track)
290 | 
291 |     return track_
292 | 
293 | 
294 | def matching(tubes, arg, save_path=None, verbose=False):
295 |     """
296 |     tubes: All tubes in a video to match. (n, 15 + 1) [mid_frame, mid_box, front_frame, front_box, back_frame, back_box, value]
297 |     save_path: File path to save formatted result.
298 |     """
299 |     tracks = []
300 |     if not isinstance(tubes, np.ndarray):
301 |         tubes = tubes.cpu().data.numpy()
302 |     tubes = pd.DataFrame(tubes)
303 |     tubes = tubes.astype({0: int, 5: int, 10: int})
304 |     tubes_group = tubes.groupby(0)  # group by back_frame, i.e. start_frame
305 | 
306 |     arch_tracks = []
307 |     for frame in sorted(tubes_group.indices.keys()):
308 |         tubes_one_frame = tubes_group.get_group(frame).values
309 | 
310 |         for tube in tubes_one_frame:
311 |             update_tracks(tracks, tube, arg)
312 | 
313 |         if verbose:
314 |             print('{}\tFrame: {}\tTubes: {}\tCur tracks:{}\tArch tracks:{}'.format(\
315 |                     datetime.now().time(), frame, len(tubes_one_frame), len(tracks), len(arch_tracks)))
316 | 
317 |         # Archive tracks 2*forward_frames frames away, they won't be useful anymore
318 |         tracks = archive_tracks(tracks, arch_tracks, frame, arg.forward_frames * arg.frame_stride)
319 | 
320 |     arch_tracks.extend(tracks)
321 |     tracks = arch_tracks
322 |     final_processing(tracks, save_path)
323 |     return tracks
324 | 


--------------------------------------------------------------------------------
/post_processing/tube_nms.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from .nms import nms_cuda, nms_cpu
  4 | 
  5 | 
  6 | def multiclass_nms(multi_tubes,  # n, 15
  7 |                    multi_scores,  # n, 1 + n_cls
  8 |                    score_thr,
  9 |                    iou_thre,
 10 |                    max_num=-1,
 11 |                    score_factors=None,  # n
 12 |                    frame_num=16):
 13 |     """NMS for multi-class tubes.
 14 | 
 15 |     Args:
 16 |         multi_tubes (Tensor): shape (n, #class*4) or (n, 4)
 17 |         multi_scores (Tensor): shape (n, 1+#class)
 18 |         score_thr (float): bbox threshold, bboxes with scores lower than it
 19 |             will not be considered.
 20 |         iou_thre (float): NMS IoU threshold
 21 |         max_num (int): if there are more than max_num bboxes after NMS,
 22 |             only top max_num will be kept.
 23 |         score_factors (Tensor): The factors multiplied to scores before
 24 |             applying NMS
 25 |         frame_num (int): number of frames in input
 26 | 
 27 |     Returns:
 28 |         tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
 29 |             are 0-based.
 30 |     """
 31 |     num_classes = multi_scores.shape[1]
 32 |     tubes, labels = [], []
 33 |     nms_op = nms
 34 |     for i in range(1, num_classes):
 35 |         cls_inds = multi_scores[:, i] > score_thr
 36 |         # print('before: ' + str(len(cls_inds)))
 37 |         if not cls_inds.any():
 38 |             continue
 39 | 
 40 |         # get bboxes and scores of this class
 41 |         _tubes = multi_tubes[cls_inds, :]
 42 |         _scores = multi_scores[cls_inds, i]
 43 |         if score_factors is not None:
 44 |             _scores *= score_factors[cls_inds]
 45 |             pass
 46 | 
 47 |         # do nms in each frame
 48 |         for n_f in range(frame_num):
 49 |             frame_inds = torch.round(_tubes[:, 0]) == n_f
 50 |             if torch.sum(frame_inds) == 0:
 51 |                 continue
 52 |             _tubes_single_frame = _tubes[frame_inds]
 53 |             # mid_frame = _bboxes_single_frame[:, 1:5]
 54 |             # cls_dets = torch.cat([mid_frame, _scores[frame_inds, None]], dim=1)  # n, 4 + 1
 55 |             cls_dets = torch.cat([_tubes_single_frame, _scores[frame_inds, None]], dim=1)  # n, 15 + 1
 56 |             _, inds = nms_op(cls_dets, iou_thre)
 57 |             # cls_dets = _bboxes_single_frame[inds]
 58 |             cls_dets = cls_dets[inds]
 59 |             cls_labels = multi_tubes.new_full(
 60 |                 (cls_dets.shape[0], ), i - 1, dtype=torch.long)
 61 |             tubes.append(cls_dets)
 62 |             labels.append(cls_labels)
 63 |     if tubes:
 64 |         tubes = torch.cat(tubes)
 65 |         labels = torch.cat(labels)
 66 |         # print('middle: ' + str(len(bboxes)))
 67 | 
 68 |         # =====================================
 69 |         # bboxes = bboxes[bboxes[:, -1] > score_thr]
 70 |         # =====================================
 71 | 
 72 |         if tubes.shape[0] > max_num:
 73 |             _, inds = tubes[:, -1].sort(descending=True)
 74 |             inds = inds[:max_num]
 75 |             tubes = tubes[inds]
 76 |             labels = labels[inds]
 77 |     else:
 78 |         tubes = multi_tubes.new_zeros((0, multi_tubes.shape[1] + 1))
 79 |         labels = multi_tubes.new_zeros((0,), dtype=torch.long)
 80 |     # print('after: ' + str(len(bboxes)))
 81 |     return tubes, labels
 82 | 
 83 | 
 84 | def nms(dets, iou_thr, device_id=None):
 85 |     """Dispatch to either CPU or GPU NMS implementations.
 86 | 
 87 |     The input can be either a torch tensor or numpy array. GPU NMS will be used
 88 |     if the input is a gpu tensor or device_id is specified, otherwise CPU NMS
 89 |     will be used. The returned type will always be the same as inputs.
 90 | 
 91 |     Arguments:
 92 |         dets (torch.Tensor or np.ndarray): bboxes with scores.
 93 |         iou_thr (float): IoU threshold for NMS.
 94 |         device_id (int, optional): when `dets` is a numpy array, if `device_id`
 95 |             is None, then cpu nms is used, otherwise gpu_nms will be used.
 96 | 
 97 |     Returns:
 98 |         tuple: kept bboxes and indice, which is always the same data type as
 99 |             the input.
100 |     """
101 |     # convert dets (tensor or numpy array) to tensor
102 |     if isinstance(dets, torch.Tensor):
103 |         is_numpy = False
104 |         dets_th = dets
105 |     elif isinstance(dets, np.ndarray):
106 |         is_numpy = True
107 |         device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id)
108 |         dets_th = torch.from_numpy(dets).to(device)
109 |     else:
110 |         raise TypeError(
111 |             'dets must be either a Tensor or numpy array, but got {}'.format(
112 |                 type(dets)))
113 | 
114 |     # execute cpu or cuda nms
115 |     if dets_th.shape[0] == 0:
116 |         inds = dets_th.new_zeros(0, dtype=torch.long)
117 |     else:
118 |         if dets_th.is_cuda:
119 |             inds = nms_cuda.nms(dets_th, iou_thr, iou_thr)
120 |         else:
121 |             inds = nms_cpu.nms(dets_th, iou_thr, iou_thr)
122 | 
123 |     if is_numpy:
124 |         inds = inds.cpu().numpy()
125 |     return dets[inds, :], inds
126 | 


--------------------------------------------------------------------------------
/pre_processing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoPang1996/TubeTK/bcca334c5348f9ae33e04595e1af93cf8351e50e/pre_processing/__init__.py


--------------------------------------------------------------------------------
/pre_processing/get_tubes_MOT17.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import pandas as pd
  4 | from network.utils import bbox_iou
  5 | import pickle
  6 | from tqdm import tqdm
  7 | import shutil
  8 | import multiprocessing
  9 | from configs.default import __C, cfg_from_file
 10 | from dataset.Parsers.structures import *
 11 | import argparse
 12 | 
 13 | 
 14 | class GTSingleParser:
 15 |     def __init__(self, folder,
 16 |                  min_visibility,
 17 |                  forward_frames,
 18 |                  frame_stride,
 19 |                  tube_thre):
 20 |         # 1. get the gt path and image folder
 21 |         gt_file_path = os.path.join(folder, 'gt/gt.txt')
 22 |         self.folder = folder
 23 |         self.forward_frames = forward_frames
 24 |         self.tube_thre = tube_thre
 25 |         self.min_visibility = min_visibility
 26 |         self.frame_stride = frame_stride
 27 | 
 28 |         # 2. read the gt data
 29 |         gt_file = pd.read_csv(gt_file_path, header=None)
 30 |         gt_file = gt_file[gt_file[6] == 1]  # human class
 31 |         gt_file = gt_file[gt_file[8] > min_visibility]
 32 |         gt_group = gt_file.groupby(0)
 33 |         gt_group_keys = gt_group.indices.keys()
 34 |         self.max_frame_index = max(gt_group_keys)
 35 |         # 3. update tracks
 36 |         self.tracks = Tracks()
 37 |         self.recorder = {}
 38 |         for key in gt_group_keys:
 39 |             det = gt_group.get_group(key).values
 40 |             ids = np.array(det[:, 1]).astype(int)
 41 |             det = np.array(det[:, 2:6])
 42 |             det[:, 2:4] += det[:, :2]
 43 | 
 44 |             self.recorder[key - 1] = list()
 45 |             # 3.1 update tracks
 46 |             for id, d in zip(ids, det):
 47 |                 node = Node(d, key - 1)
 48 |                 track_index, node_index = self.tracks.add_node(node, id)
 49 |                 self.recorder[key - 1].append((track_index, node_index))
 50 | 
 51 |     def bbox2tube(self, track, mid_id, direction, pos_in_video, thre):
 52 |         def get_true_z(mid_node, end_node):
 53 |             return end_node.frame_id - mid_node.frame_id
 54 | 
 55 |         def get_inter_box(start_box, end_box, inter_id, end_id):
 56 |             return start_box * (end_id - inter_id) / end_id + end_box * inter_id / end_id
 57 | 
 58 |         mid_node = track.get_node_by_index(mid_id)
 59 |         mid_box = mid_node.box
 60 |         inter_boxes = []
 61 | 
 62 |         z = 1 if direction == 'front' else -1
 63 |         if mid_id + z >= len(track.nodes) or mid_id + z < 0:
 64 |             return np.array([0, 0, 0, 0, 0])
 65 | 
 66 |         true_z = get_true_z(mid_node, track.get_node_by_index(mid_id + z))
 67 | 
 68 |         max_len = (self.forward_frames * 2 - 1) * self.frame_stride + 1
 69 | 
 70 |         while -1 * pos_in_video <= true_z < max_len - pos_in_video:
 71 |             iou_total = 0
 72 |             end_node = track.get_node_by_index(mid_id + z)
 73 |             end_box = end_node.box
 74 |             for i, gt_box in enumerate(inter_boxes):
 75 |                 iou = sum(bbox_iou(gt_box[None], get_inter_box(mid_box, end_box, i + 1, len(inter_boxes) + 1)[None]))
 76 |                 iou_total += iou
 77 |             iou_total += 1
 78 |             iou_total /= (len(inter_boxes) + 1)
 79 | 
 80 |             if iou_total < thre:
 81 |                 break
 82 | 
 83 |             inter_boxes.append(end_box)
 84 |             if z % self.frame_stride == 0:
 85 |                 res_z = true_z
 86 | 
 87 |             z += 1 if direction == 'front' else -1
 88 |             if mid_id + z >= len(track.nodes) or mid_id + z < 0:
 89 |                 break
 90 |             true_z = get_true_z(mid_node, track.get_node_by_index(mid_id + z))
 91 | 
 92 |         if not inter_boxes or len(inter_boxes) < self.frame_stride:
 93 |             return np.array([0, 0, 0, 0, 0])
 94 |         else:
 95 |             ret_ind = (len(inter_boxes) // self.frame_stride) * self.frame_stride - 1
 96 |             return np.concatenate((np.array([abs(res_z)]), inter_boxes[ret_ind] - mid_box))
 97 | 
 98 |     def get_item(self, frame_index):
 99 |         start_frame = frame_index
100 |         max_len = (self.forward_frames * 2 - 1) * self.frame_stride + 1
101 |         if self.max_frame_index - start_frame < max_len:
102 |             return 0
103 |         # if not frame_index in self.recorder:
104 |         #     return 0
105 | 
106 |         tubes = []
107 |         for i in range(self.forward_frames * 2):
108 |             frame_index = start_frame + i * self.frame_stride
109 |             if frame_index not in self.recorder:
110 |                 continue
111 | 
112 |             det_ids = self.recorder[frame_index]
113 | 
114 |             # 1. get tubes
115 |             for track_index, node_index in det_ids:
116 |                 t = self.tracks.get_track_by_index(track_index)
117 |                 n = t.get_node_by_index(node_index)
118 |                 mid_box = np.concatenate((n.box, np.array([frame_index - start_frame])))
119 |                 # backward
120 |                 back_box = self.bbox2tube(track=t, mid_id=node_index, direction='back',
121 |                                           pos_in_video=i * self.frame_stride, thre=self.tube_thre)
122 |                 # forward
123 |                 front_box = self.bbox2tube(track=t, mid_id=node_index, direction='front',
124 |                                            pos_in_video=i * self.frame_stride, thre=self.tube_thre)
125 |                 tube = np.concatenate((mid_box, front_box, back_box))
126 |                 tubes.append(tube)
127 | 
128 |         if len(tubes) == 0:
129 |             return 0
130 |         tubes = np.array(tubes)
131 |         try:
132 |             os.makedirs(os.path.join(self.folder, 'tubes_' + str(self.forward_frames) + '_' + str(self.frame_stride) + '_' + str(self.min_visibility)))
133 |         except:
134 |             pass
135 |         pickle.dump(tubes, open(os.path.join(self.folder, 'tubes_' + str(self.forward_frames) + '_' + str(self.frame_stride) + '_' + str(self.min_visibility), str(start_frame)), 'wb'))
136 |         return 0
137 | 
138 |     def clear(self):
139 |         try:
140 |             shutil.rmtree(os.path.join(self.folder, 'tubes_' + str(self.forward_frames) + '_' + str(self.frame_stride) + '_' + str(self.min_visibility)))
141 |         except:
142 |             pass
143 | 
144 |     def __len__(self):
145 |         return self.max_frame_index
146 | 
147 | 
148 | class GTParser:
149 |     def __init__(self, mot_root,
150 |                  arg,
151 |                  type='train',
152 |                  ):
153 |         # analsis all the folder in mot_root
154 |         # 1. get all the folders
155 |         mot_root = os.path.join(mot_root, type)
156 |         all_folders = sorted(
157 |             [os.path.join(mot_root, i) for i in os.listdir(mot_root)
158 |              if os.path.isdir(os.path.join(mot_root, i))
159 |              and i.find('FRCNN') != -1]
160 |         )
161 |         # 2. create single parser
162 |         self.parsers = [GTSingleParser(folder, forward_frames=arg.forward_frames,
163 |                                        min_visibility=arg.min_visibility,
164 |                                        frame_stride=arg.frame_stride,
165 |                                        tube_thre=arg.tube_thre) for folder in all_folders]
166 | 
167 |         # 3. get some basic information
168 |         self.lens = [len(p) for p in self.parsers]
169 |         self.len = sum(self.lens)
170 | 
171 |     def __len__(self):
172 |         # get the length of all the matching frame
173 |         return self.len
174 | 
175 |     def clear(self):
176 |         print('Clearing')
177 |         for parser in tqdm(self.parsers, ncols=20):
178 |             parser.clear()
179 | 
180 |     def run(self):
181 |         print('Running')
182 |         pool = multiprocessing.Pool(processes=40)
183 |         pool_list = []
184 |         for item in tqdm(range(self.len), ncols=20):
185 |             total_len = 0
186 |             index = 0
187 |             current_item = item
188 |             for l in self.lens:
189 |                 total_len += l
190 |                 if item < total_len:
191 |                     break
192 |                 else:
193 |                     index += 1
194 |                     current_item -= l
195 | 
196 |             if index >= len(self.parsers):
197 |                 return
198 |             pool_list.append(pool.apply_async(self.parsers[index].get_item, (current_item,)))
199 |             # self.parsers[index].get_item(current_item)
200 |         for p in tqdm(pool_list, ncols=20):
201 |             p.get()
202 |         pool.close()
203 |         pool.join()
204 | 
205 | 
206 | if __name__ == '__main__':
207 |     arg_parser = argparse.ArgumentParser()
208 |     arg_parser.add_argument('--mot_root', default='./data', type=str, help="mot data root")
209 |     arg, unparsed = arg_parser.parse_known_args()
210 |     config = __C
211 |     cfg_from_file('../configs/get_MOT17_tube.yaml')
212 |     parser = GTParser(mot_root=arg.mot_root, arg=config)
213 |     parser.clear()
214 |     parser.run()
215 | 


--------------------------------------------------------------------------------
/pre_processing/get_tubes_jta.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import pandas as pd
  4 | from network.utils import bbox_iou
  5 | import pickle
  6 | from tqdm import tqdm
  7 | import argparse
  8 | import multiprocessing
  9 | from configs.default import __C, cfg_from_file
 10 | from dataset.Parsers.structures import *
 11 | 
 12 | 
 13 | class GTSingleParser:
 14 |     def __init__(self, folder,
 15 |                  min_visibility,
 16 |                  forward_frames,
 17 |                  frame_stride,
 18 |                  tube_thre,
 19 |                  loose,
 20 |                  height_clamp):
 21 |         # 1. get the gt path and image folder
 22 |         split_path = folder.split('/')
 23 |         if folder[0] == '/':
 24 |             jta_root = '/' + os.path.join(*split_path[:-3])
 25 |         else:
 26 |             jta_root = os.path.join(*split_path[:-3])
 27 |         type = split_path[-2]
 28 |         video_name = split_path[-1]
 29 |         gt_file_path = os.path.join(jta_root, 'gt_' + str(loose) + '_' + str(min_visibility) + '_' + str(height_clamp), type, video_name, 'gt.txt')
 30 |         # gt_file_path = os.path.join(folder, 'gt/gt.txt')
 31 | 
 32 |         self.folder = folder
 33 |         self.forward_frames = forward_frames
 34 |         self.tube_thre = tube_thre
 35 |         self.min_visibility = min_visibility
 36 |         self.frame_stride = frame_stride
 37 | 
 38 |         self.tube_res_path = os.path.join(jta_root,
 39 |                                           'tubes_' + str(self.forward_frames) + '_' + str(
 40 |                                               self.frame_stride) + '_' + str(self.min_visibility),
 41 |                                           type,
 42 |                                           video_name)
 43 | 
 44 |         try:
 45 |             os.makedirs(self.tube_res_path)
 46 |         except:
 47 |             pass
 48 | 
 49 |         # 2. read the gt data
 50 |         gt_file = pd.read_csv(gt_file_path, header=None)
 51 |         gt_file = gt_file[gt_file[6] == 1]  # human class
 52 |         gt_file = gt_file[gt_file[8] > min_visibility]
 53 |         gt_group = gt_file.groupby(0)
 54 |         gt_group_keys = gt_group.indices.keys()
 55 |         self.max_frame_index = max(gt_group_keys)
 56 |         # 3. update tracks
 57 |         self.tracks = Tracks()
 58 |         self.recorder = {}
 59 |         for key in gt_group_keys:
 60 |             det = gt_group.get_group(key).values
 61 |             ids = np.array(det[:, 1]).astype(int)
 62 |             det = np.array(det[:, 2:6])
 63 |             det[:, 2:4] += det[:, :2]
 64 | 
 65 |             self.recorder[key - 1] = list()
 66 |             # 3.1 update tracks
 67 |             for id, d in zip(ids, det):
 68 |                 node = Node(d, key - 1)
 69 |                 track_index, node_index = self.tracks.add_node(node, id)
 70 |                 self.recorder[key - 1].append((track_index, node_index))
 71 | 
 72 |     def bbox2tube(self, track, mid_id, direction, pos_in_video, thre):
 73 |         def get_true_z(mid_node, end_node):
 74 |             return end_node.frame_id - mid_node.frame_id
 75 | 
 76 |         def get_inter_box(start_box, end_box, inter_id, end_id):
 77 |             return start_box * (end_id - inter_id) / end_id + end_box * inter_id / end_id
 78 | 
 79 |         mid_node = track.get_node_by_index(mid_id)
 80 |         mid_box = mid_node.box
 81 |         inter_boxes = []
 82 | 
 83 |         z = 1 if direction == 'front' else -1
 84 |         if mid_id + z >= len(track.nodes) or mid_id + z < 0:
 85 |             return np.array([0, 0, 0, 0, 0])
 86 | 
 87 |         true_z = get_true_z(mid_node, track.get_node_by_index(mid_id + z))
 88 | 
 89 |         max_len = (self.forward_frames * 2 - 1) * self.frame_stride + 1
 90 | 
 91 |         while -1 * pos_in_video <= true_z < max_len - pos_in_video:
 92 |             iou_total = 0
 93 |             end_node = track.get_node_by_index(mid_id + z)
 94 |             end_box = end_node.box
 95 |             for i, gt_box in enumerate(inter_boxes):
 96 |                 iou = sum(bbox_iou(gt_box[None], get_inter_box(mid_box, end_box, i + 1, len(inter_boxes) + 1)[None]))
 97 |                 iou_total += iou
 98 |             iou_total += 1
 99 |             iou_total /= (len(inter_boxes) + 1)
100 | 
101 |             if iou_total < thre:
102 |                 break
103 | 
104 |             inter_boxes.append(end_box)
105 |             if z % self.frame_stride == 0:
106 |                 res_z = true_z
107 | 
108 |             z += 1 if direction == 'front' else -1
109 |             if mid_id + z >= len(track.nodes) or mid_id + z < 0:
110 |                 break
111 |             true_z = get_true_z(mid_node, track.get_node_by_index(mid_id + z))
112 | 
113 |         if not inter_boxes or len(inter_boxes) < self.frame_stride:
114 |             return np.array([0, 0, 0, 0, 0])
115 |         else:
116 |             ret_ind = (len(inter_boxes) // self.frame_stride) * self.frame_stride - 1
117 |             return np.concatenate((np.array([abs(res_z)]), inter_boxes[ret_ind] - mid_box))
118 | 
119 |     def get_item(self, frame_index):
120 |         start_frame = frame_index
121 |         max_len = (self.forward_frames * 2 - 1) * self.frame_stride + 1
122 |         if self.max_frame_index - start_frame < max_len:
123 |             return 0
124 | 
125 |         tubes = []
126 |         for i in range(self.forward_frames * 2):
127 |             frame_index = start_frame + i * self.frame_stride
128 |             if frame_index not in self.recorder:
129 |                 continue
130 | 
131 |             det_ids = self.recorder[frame_index]
132 | 
133 |             # 1. get tubes
134 |             for track_index, node_index in det_ids:
135 |                 t = self.tracks.get_track_by_index(track_index)
136 |                 n = t.get_node_by_index(node_index)
137 |                 mid_box = np.concatenate((n.box, np.array([frame_index - start_frame])))
138 |                 # backward
139 |                 back_box = self.bbox2tube(track=t, mid_id=node_index, direction='back',
140 |                                           pos_in_video=i * self.frame_stride, thre=self.tube_thre)
141 |                 # forward
142 |                 front_box = self.bbox2tube(track=t, mid_id=node_index, direction='front',
143 |                                            pos_in_video=i * self.frame_stride, thre=self.tube_thre)
144 | 
145 |                 # remove the fast turning
146 |                 mid_box_w = mid_box[2] - mid_box[0]
147 |                 if abs(front_box[0] - back_box[0]) > 2.5 * mid_box_w:
148 |                     print('remove turning')
149 |                     continue
150 | 
151 |                 tube = np.concatenate((mid_box, front_box, back_box))
152 |                 tubes.append(tube)
153 | 
154 |         if len(tubes) == 0:
155 |             return 0
156 |         tubes = np.array(tubes)
157 | 
158 |         pickle.dump(tubes, open(os.path.join(self.tube_res_path, str(start_frame)), 'wb'))
159 |         return 0
160 | 
161 |     def __len__(self):
162 |         return self.max_frame_index
163 | 
164 | 
165 | class GTParser:
166 |     def __init__(self, jta_root,
167 |                  arg,
168 |                  loose,
169 |                  height_clamp,
170 |                  type='train',
171 |                  ):
172 |         # analsis all the folder in jta_root
173 |         # 1. get all the folders
174 |         self.jta_root = jta_root
175 |         jta_root = os.path.join(jta_root, type)
176 |         all_folders = sorted(
177 |             [os.path.join(jta_root, i) for i in os.listdir(jta_root)
178 |              if os.path.isdir(os.path.join(jta_root, i))]
179 |         )
180 |         # 2. create single parser
181 |         print('Init SingleParser')
182 |         self.parsers = [GTSingleParser(folder, forward_frames=arg.forward_frames,
183 |                                        min_visibility=arg.min_visibility,
184 |                                        frame_stride=arg.frame_stride,
185 |                                        tube_thre=arg.tube_thre,
186 |                                        loose=loose,
187 |                                        height_clamp=height_clamp) for folder in tqdm(all_folders, ncols=20)]
188 | 
189 |         # 3. get some basic information
190 |         self.lens = [len(p) for p in self.parsers]
191 |         self.len = sum(self.lens)
192 | 
193 |     def __len__(self):
194 |         # get the length of all the matching frame
195 |         return self.len
196 | 
197 |     def clear(self):
198 |         print('Clearing')
199 | 
200 |     def run(self):
201 |         print('Running')
202 |         pool = multiprocessing.Pool(processes=40)
203 |         pool_list = []
204 |         for item in tqdm(range(self.len), ncols=20):
205 |             total_len = 0
206 |             index = 0
207 |             current_item = item
208 |             for l in self.lens:
209 |                 total_len += l
210 |                 if item < total_len:
211 |                     break
212 |                 else:
213 |                     index += 1
214 |                     current_item -= l
215 | 
216 |             if index >= len(self.parsers):
217 |                 return
218 |             pool_list.append(pool.apply_async(self.parsers[index].get_item, (current_item,)))
219 |             # self.parsers[index].get_item(current_item)
220 |         for p in tqdm(pool_list, ncols=20):
221 |             p.get()
222 |         pool.close()
223 |         pool.join()
224 | 
225 | 
226 | def get_gt(json_path, frames_path, loose, min_visiblity, height_clamp):
227 |     assert os.path.exists(json_path), 'File does not exist: {}'.format(json_path)
228 |     assert os.path.exists(frames_path), 'Folder does not exist: {}'.format(frames_path)
229 |     split_path = frames_path.split('/')
230 |     if frames_path[0] == '/':
231 |         jta_root = '/' + os.path.join(*split_path[:-3])
232 |     else:
233 |         jta_root = os.path.join(*split_path[:-3])
234 |     type = split_path[-2]
235 |     video_name = split_path[-1]
236 |     gt_path = os.path.join(jta_root, 'gt_' + str(loose) + '_' + str(min_visiblity) + '_' + str(height_clamp), type, video_name)
237 |     try:
238 |         os.makedirs(gt_path)
239 |     except:
240 |         pass
241 |     gt_file = os.path.join(gt_path, 'gt.txt')
242 |     df = pd.read_json(json_path)
243 |     df = df.iloc[:, [0, 1, 3, 4, 8]]  # Frame, ID, x, y, occluded
244 |     df_group = df.groupby([0, 1])  # Group by frame and id
245 | 
246 |     def get_bbox(g):
247 |         assert len(g.columns) == 5
248 |         if g.iloc[:, 4].sum() >= (1 - min_visiblity) * len(g):  # Completely occluded
249 |             return pd.Series([-1, 0, 0, 0, 0, 0, 0], dtype=np.int)
250 |         x1 = np.maximum(0, g.iloc[:, 2].min())
251 |         y1 = np.maximum(0, g.iloc[:, 3].min())
252 |         x2 = np.minimum(1920, g.iloc[:, 2].max())
253 |         y2 = np.minimum(1080, g.iloc[:, 3].max())
254 |         w = x2 - x1
255 |         h = y2 - y1
256 |         # Loose a little bit
257 |         x1 -= np.round(w * loose)
258 |         y1 -= np.round(h * loose)
259 |         x1 = np.maximum(0.0, x1)
260 |         y1 = np.maximum(0.0, y1)
261 |         w = np.round(w * (1 + loose*2))
262 |         h = np.round(h * (1 + loose*2))
263 |         w = np.minimum(1920 - x1, w)
264 |         h = np.minimum(1080 - y1, h)
265 | 
266 |         return pd.Series([x1, y1, w, h, 1, 1, 1], dtype=np.int)
267 | 
268 |     res_df = df_group.apply(get_bbox)
269 |     res_df = res_df[res_df.iloc[:, 0] != -1]
270 | 
271 |     # get mode and remove the small box
272 |     ns, edges = np.histogram(res_df.iloc[:, 3], bins=50)
273 |     max_n = np.argmax(ns)
274 |     mode = np.mean(edges[[max_n, max_n + 1]])
275 |     res_df = res_df[res_df.iloc[:, 3] > height_clamp * mode]
276 |     res_df = res_df[res_df.iloc[:, 3] > 7]
277 | 
278 |     res_df.to_csv(gt_file, header=False)
279 | 
280 | 
281 | def get_gts(jta_root, frames_dir, loose, min_vis, height_clamp):
282 |     pool = multiprocessing.Pool(processes=20)
283 |     pool_list = []
284 |     anno_path = os.path.join(jta_root, 'annotations')
285 |     for type in os.listdir(anno_path):
286 |         for json_file in os.listdir(os.path.join(anno_path, type)):
287 |             json_path = os.path.join(anno_path, type, json_file)
288 |             frames_path = os.path.join(jta_root, frames_dir, type, os.path.splitext(json_file)[0])
289 |             pool_list.append(pool.apply_async(get_gt, (json_path, frames_path, loose, min_vis, height_clamp, )))
290 | 
291 |     for p in tqdm(pool_list, ncols=20):
292 |         p.get()
293 |     pool.close()
294 |     pool.join()
295 | 
296 | 
297 | if __name__ == '__main__':
298 |     parser = argparse.ArgumentParser()
299 |     parser.add_argument('--jta_root', type=str, help="data path of jta")
300 |     parser.add_argument('--loose', type=float, default=0.1, help="ratio to loose the bbox generated from keypoint")
301 |     parser.add_argument('--height_clamp', type=float, default=0.6, help="get rid of the bboxes whose height is smaller "
302 |                                                                         "than 0.6 of the mean height")
303 |     arg_input, unparsed = parser.parse_known_args()
304 | 
305 |     arg = __C
306 |     cfg_from_file('../configs/get_jta_tube.yaml')
307 | 
308 |     print('Generating GT files')
309 |     get_gts(jta_root=arg_input.jta_root, frames_dir='imgs', loose=arg_input.loose, min_vis=arg.min_visibility,
310 |             height_clamp=arg_input.height_clamp)
311 |     print('Generating Tubes')
312 |     parser = GTParser(jta_root=os.path.join(arg_input.jta_root, 'imgs'), arg=arg, type='train', loose=arg_input.loose,
313 |                       height_clamp=arg_input.height_clamp)
314 |     parser.clear()
315 |     parser.run()
316 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.1.0
2 | torchvision
3 | tqdm
4 | tensorboardX
5 | PyYAML==5.1.2
6 | opencv-python
7 | Pillow
8 | easydict


--------------------------------------------------------------------------------
/seqmaps/AVG-TownCentre.txt:
--------------------------------------------------------------------------------
1 | AVG-TownCentre


--------------------------------------------------------------------------------
/seqmaps/JTA_train_turning:
--------------------------------------------------------------------------------
1 | seq_9
2 | seq_15
3 | seq_5


--------------------------------------------------------------------------------
/seqmaps/MOT15_test.txt:
--------------------------------------------------------------------------------
 1 | ADL-Rundle-6
 2 | ADL-Rundle-8
 3 | ETH-Bahnhof
 4 | ETH-Pedcross2
 5 | ETH-Sunnyday
 6 | KITTI-13
 7 | KITTI-17
 8 | PETS09-S2L1
 9 | TUD-Campus
10 | TUD-Stadtmitte
11 | Venice-2


--------------------------------------------------------------------------------
/seqmaps/MOT17-01-FRCNN.txt:
--------------------------------------------------------------------------------
1 | MOT17-01-FRCNN


--------------------------------------------------------------------------------
/seqmaps/MOT17-02-FRCNN.txt:
--------------------------------------------------------------------------------
1 | MOT17-02-FRCNN


--------------------------------------------------------------------------------
/seqmaps/MOT17-04-FRCNN.txt:
--------------------------------------------------------------------------------
1 | MOT17-04-FRCNN


--------------------------------------------------------------------------------
/seqmaps/MOT17-13-FRCNN.txt:
--------------------------------------------------------------------------------
1 | MOT17-13-FRCNN


--------------------------------------------------------------------------------
/seqmaps/MOT17-14-FRCNN.txt:
--------------------------------------------------------------------------------
1 | MOT17-14-FRCNN


--------------------------------------------------------------------------------
/seqmaps/MOT17_test.txt:
--------------------------------------------------------------------------------
1 | MOT17-01-FRCNN
2 | MOT17-03-FRCNN
3 | MOT17-06-FRCNN
4 | MOT17-07-FRCNN
5 | MOT17-08-FRCNN
6 | MOT17-12-FRCNN
7 | MOT17-14-FRCNN


--------------------------------------------------------------------------------
/seqmaps/MOT17_train.txt:
--------------------------------------------------------------------------------
1 | MOT17-02-FRCNN
2 | MOT17-04-FRCNN
3 | MOT17-05-FRCNN
4 | MOT17-09-FRCNN
5 | MOT17-10-FRCNN
6 | MOT17-11-FRCNN
7 | MOT17-13-FRCNN


--------------------------------------------------------------------------------
/seqmaps/PETS09-S2L2.txt:
--------------------------------------------------------------------------------
1 | PETS09-S2L2


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BoPang1996/TubeTK/bcca334c5348f9ae33e04595e1af93cf8351e50e/utils/__init__.py


--------------------------------------------------------------------------------
/utils/mem_track.py:
--------------------------------------------------------------------------------
 1 | import gc
 2 | import datetime
 3 | import pynvml
 4 | 
 5 | import torch
 6 | import numpy as np
 7 | 
 8 | 
 9 | class MemTracker(object):
10 |     """
11 |     Class used to track pytorch memory usage
12 |     Arguments:
13 |         frame: a frame to detect current py-file runtime
14 |         detail(bool, default True): whether the function shows the detail gpu memory usage
15 |         path(str): where to save log file
16 |         verbose(bool, default False): whether show the trivial exception
17 |         device(int): GPU number, default is 0
18 |     """
19 |     def __init__(self, frame, detail=True, path='', verbose=False, device=0):
20 |         self.frame = frame
21 |         self.print_detail = detail
22 |         self.last_tensor_sizes = set()
23 |         self.gpu_profile_fn = path + f'{datetime.datetime.now():%d-%b-%y-%H:%M:%S}-gpu_mem_track.txt'
24 |         self.verbose = verbose
25 |         self.begin = True
26 |         self.device = device
27 | 
28 |         self.func_name = frame.f_code.co_name
29 |         self.filename = frame.f_globals["__file__"]
30 |         if (self.filename.endswith(".pyc") or
31 |                 self.filename.endswith(".pyo")):
32 |             self.filename = self.filename[:-1]
33 |         self.module_name = self.frame.f_globals["__name__"]
34 |         self.curr_line = self.frame.f_lineno
35 | 
36 |     def get_tensors(self):
37 |         for obj in gc.get_objects():
38 |             try:
39 |                 if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
40 |                     tensor = obj
41 |                 else:
42 |                     continue
43 |                 if tensor.is_cuda:
44 |                     yield tensor
45 |             except Exception as e:
46 |                 if self.verbose:
47 |                     print('A trivial exception occured: {}'.format(e))
48 | 
49 |     def track(self):
50 |         """
51 |         Track the GPU memory usage
52 |         """
53 |         pynvml.nvmlInit()
54 |         handle = pynvml.nvmlDeviceGetHandleByIndex(self.device)
55 |         meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
56 |         self.curr_line = self.frame.f_lineno
57 |         where_str = self.module_name + ' ' + self.func_name + ':' + ' line ' + str(self.curr_line)
58 | 
59 |         with open(self.gpu_profile_fn, 'a+') as f:
60 | 
61 |             if self.begin:
62 |                 f.write(f"GPU Memory Track | {datetime.datetime.now():%d-%b-%y-%H:%M:%S} |"
63 |                         f" Total Used Memory:{meminfo.used/1000**2:<7.1f}Mb\n\n")
64 |                 self.begin = False
65 | 
66 |             if self.print_detail is True:
67 |                 ts_list = [tensor.size() for tensor in self.get_tensors()]
68 |                 new_tensor_sizes = {(type(x), tuple(x.size()), ts_list.count(x.size()), np.prod(np.array(x.size()))*4/1000**2)
69 |                                     for x in self.get_tensors()}
70 |                 for t, s, n, m in new_tensor_sizes - self.last_tensor_sizes:
71 |                     f.write(f'+ | {str(n)} * Size:{str(s):<20} | Memory: {str(m*n)[:6]} M | {str(t):<20}\n')
72 |                 for t, s, n, m in self.last_tensor_sizes - new_tensor_sizes:
73 |                     f.write(f'- | {str(n)} * Size:{str(s):<20} | Memory: {str(m*n)[:6]} M | {str(t):<20} \n')
74 |                 self.last_tensor_sizes = new_tensor_sizes
75 | 
76 |             f.write(f"\nAt {where_str:<50}"
77 |                     f"Total Used Memory:{meminfo.used/1000**2:<7.1f}Mb\n\n")
78 | 
79 |         pynvml.nvmlShutdown()


--------------------------------------------------------------------------------
/utils/util.py:
--------------------------------------------------------------------------------
 1 | class AverageMeter(object):
 2 |     """Computes and stores the average and current value"""
 3 | 
 4 |     def __init__(self):
 5 |         self.val = 0
 6 |         self.avg = 0
 7 |         self.sum = 0
 8 |         self.count = 0
 9 |         self.reset()
10 | 
11 |     def reset(self):
12 |         self.val = 0
13 |         self.avg = 0
14 |         self.sum = 0
15 |         self.count = 0
16 | 
17 |     def update(self, val, n=1):
18 |         self.val = val
19 |         self.sum += val * n
20 |         self.count += n
21 |         self.avg = self.sum / self.count
22 | 
23 | 


--------------------------------------------------------------------------------