├── tracking
├── config
│ ├── parameters_per_scene.py
│ └── scene_2_camera_id_file.json
├── requirements.txt
├── src
│ ├── run.py
│ ├── tracking.py
│ ├── utils.py
│ ├── pose.py
│ ├── scpt.py
│ └── mcpt.py
└── infer.py
├── ranking.jpg
├── overall-pipeline.png
├── scripts
├── tracking.sh
├── extract_frame.sh
├── detection.sh
├── embedding.sh
└── pose.sh
├── poser
├── load_tracking_result.py
└── top_down_video_demo_with_track_file.py
├── LICENSE
├── tools
├── extract_frame.py
└── generate_submission.py
├── embedder
└── aic24_extract.py
├── README.md
└── detector
└── aic24_get_detection.py
/tracking/config/parameters_per_scene.py:
--------------------------------------------------------------------------------
1 | parameters_per_scene = {
2 | }
--------------------------------------------------------------------------------
/ranking.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riips/AIC24_Track1_YACHIYO_RIIPS/HEAD/ranking.jpg
--------------------------------------------------------------------------------
/overall-pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riips/AIC24_Track1_YACHIYO_RIIPS/HEAD/overall-pipeline.png
--------------------------------------------------------------------------------
/scripts/tracking.sh:
--------------------------------------------------------------------------------
1 | SCENE=$*
2 |
3 | #cd tracking
4 |
5 | for SCENE in $*
6 | do
7 | echo Procssing scene-$SCENE
8 | python tracking/infer.py -s $SCENE
9 | done
--------------------------------------------------------------------------------
/scripts/extract_frame.sh:
--------------------------------------------------------------------------------
1 | conda activate botsort_env
2 |
3 | for SCENE in $*
4 | do
5 | F_SCENE=$(printf "%03d" "$SCENE")
6 | echo Procssing scene-$F_SCENE
7 | python3 tools/extract_frame.py -s scene_$F_SCENE ./
8 | done
--------------------------------------------------------------------------------
/scripts/detection.sh:
--------------------------------------------------------------------------------
1 | cp ./detector/aic24_get_detection.py ./BoT-SORT/tools/
2 | cd ./BoT-SORT
3 | conda activate botsort_env
4 |
5 | for SCENE in $*
6 | do
7 | F_SCENE=$(printf "%03d" "$SCENE")
8 | echo Procssing scene-$F_SCENE
9 | python3 tools/aic24_get_detection.py -s scene_$F_SCENE ../
10 | done
--------------------------------------------------------------------------------
/scripts/embedding.sh:
--------------------------------------------------------------------------------
1 | cp ./embedder/aic24_extract.py ./deep-person-reid/torchreid/
2 | cd ./deep-person-reid
3 | conda activate torchreid
4 |
5 | for SCENE in $*
6 | do
7 | F_SCENE=$(printf "%03d" "$SCENE")
8 | echo Procssing scene-$F_SCENE
9 | python3 torchreid/aic24_extract.py -s scene_$F_SCENE ../
10 | done
--------------------------------------------------------------------------------
/tracking/requirements.txt:
--------------------------------------------------------------------------------
1 | contourpy==1.2.1
2 | cycler==0.12.1
3 | fonttools==4.51.0
4 | joblib==1.4.0
5 | kiwisolver==1.4.5
6 | matplotlib==3.8.4
7 | numpy==1.26.4
8 | opencv-python-headless==4.9.0.80
9 | packaging==24.0
10 | pillow==10.3.0
11 | pyparsing==3.1.2
12 | python-dateutil==2.9.0.post0
13 | scikit-learn==1.4.2
14 | scipy==1.13.0
15 | six==1.16.0
16 | threadpoolctl==3.4.0
17 | tqdm==4.66.2
18 |
--------------------------------------------------------------------------------
/scripts/pose.sh:
--------------------------------------------------------------------------------
1 | cp ./poser/load_tracking_result.py ./mmpose/demo/
2 | cp ./poser/top_down_video_demo_with_track_file.py ./mmpose/demo/
3 | cd ./mmpose
4 | conda activate openmmlab
5 |
6 | for SCENE in $*
7 | do
8 | F_SCENE=$(printf "%03d" "$SCENE")
9 | echo Procssing scene-$F_SCENE
10 | find "../Detection/scene_$F_SCENE" -maxdepth 1 -type f -name "*.txt" | while read -r file;
11 | do
12 | CAMERA=$(basename "$file")
13 | number=$(echo "$CAMERA" | sed 's/camera_\([0-9]\+\).txt/\1/')
14 | python3 demo/top_down_video_demo_with_track_file.py ../Detection/scene_${F_SCENE}/${CAMERA} ./configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth --video-path ../Original/scene_${F_SCENE}/camera_${number}/video.mp4 --out-file ../Pose/scene_${F_SCENE}/camera_${number}/camera_${number}_out_keypoint.json
15 | done
16 | done
17 |
--------------------------------------------------------------------------------
/poser/load_tracking_result.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 |
4 |
5 | def load_tracking(file_name):
6 | tracking_file = open(file_name)
7 | result = {}
8 | for line in tracking_file:
9 | line = line.rstrip().split(',')
10 | frame_id = int(line[1])
11 | track_id = int(line[2])
12 | bbox = [float(line[3]), float(line[4]), float(line[5]), float(line[6]), 1.0]
13 | if frame_id not in result.keys():
14 | result[frame_id] = []
15 | result[frame_id].append({'bbox': np.array(bbox)})
16 | return result
17 |
18 | def load_tracking_id(file_name):
19 | tracking_file = open(file_name)
20 | result = {}
21 | for line in tracking_file:
22 | line = line.rstrip().split(',')
23 | frame_id = int(line[1])
24 | track_id = int(line[2])
25 | if frame_id not in result.keys():
26 | result[frame_id] = []
27 | result[frame_id].append({'track_id': track_id})
28 | return result
29 |
30 | if __name__ == '__main__':
31 | print("run load_tracking")
32 | #load_tracking('')
33 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 RIIPS
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/tools/extract_frame.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import numpy as np
4 | import PIL.Image as Image
5 | import cv2
6 | from multiprocessing import Pool
7 | from sys import stdout
8 | import argparse
9 | import os.path as osp
10 |
11 | def make_parser():
12 | parser = argparse.ArgumentParser("reid")
13 | parser.add_argument("root_path", type=str, default=None)
14 | parser.add_argument("-s", "--scene", type=str, default=None)
15 | return parser
16 |
17 | args = make_parser().parse_args()
18 | data_root = osp.join(args.root_path, "Original")
19 | scene = args.scene
20 |
21 | fprint, endl = stdout.write, "\n"
22 |
23 | IMAGE_FORMAT = ".jpg"
24 |
25 |
26 | def video2image(parameter_set):
27 | scenario, camera, camera_dir = parameter_set
28 | fprint(f"[Processing] {scenario} {camera}{endl}")
29 | imgs_dir = f"{camera_dir}/Frame"
30 | if not os.path.exists(imgs_dir):
31 | os.makedirs(imgs_dir)
32 | print("camera_dir:" + camera_dir)
33 | cap = cv2.VideoCapture(f"{camera_dir}/video.mp4")
34 | current_frame = 1
35 | ret, frame = cap.read()
36 | while ret:
37 | frame_file_name = f"{str(current_frame).zfill(6)}{IMAGE_FORMAT}"
38 | cv2.imwrite(f"{imgs_dir}/{frame_file_name}", frame)
39 | ret, frame = cap.read()
40 | current_frame += 1
41 | fprint(f"[Done] {scenario} {camera}{endl}")
42 |
43 |
44 | def main():
45 | parameter_sets = []
46 | scenario_dir = osp.join(data_root, scene)
47 | cameras = os.listdir(scenario_dir)
48 | for each_camera in cameras:
49 | cam = each_camera
50 | if "map" in each_camera:
51 | continue
52 | camera_dir = f"{scenario_dir}/{each_camera}"
53 | parameter_sets.append(
54 | [scene, each_camera, camera_dir]
55 | )
56 |
57 | pool = Pool(processes=len(parameter_sets))
58 | pool.map(video2image, parameter_sets)
59 | pool.close()
60 |
61 |
62 | if __name__ == "__main__":
63 | main()
64 |
65 |
--------------------------------------------------------------------------------
/tools/generate_submission.py:
--------------------------------------------------------------------------------
1 | import json
2 | import argparse
3 | import os
4 | import numpy as np
5 |
6 | def read_json_file(file_path):
7 | with open(file_path, 'r') as file:
8 | data = json.load(file)
9 | return data
10 |
11 | def convert_coordinates_2world(x, y):
12 | vector_xyz = np.array([x, y, z])
13 | vector_xyz_3d = np.dot(np.linalg.inv(homography_matrix), vector_xyz.T)
14 | vector_xyz_3d = vector_xyz_3d / vector_xyz_3d[2]
15 | return vector_xyz_3d[0], vector_xyz_3d[1]
16 |
17 | def load_calibration(calib_path):
18 | data = read_json_file(calib_path)
19 | global camera_projection_matrix
20 | global homography_matrix
21 | camera_projection_matrix = np.array(data["camera projection matrix"])
22 | homography_matrix = np.array(data["homography matrix"])
23 |
24 | def generate_submission(json_path, data_root="", save_path=""):
25 | json_path = os.path.join(data_root, json_path)
26 | submission_path = os.path.join(data_root, save_path )
27 | if not os.path.exists(submission_path):
28 | os.makedirs(submission_path)
29 | submission_path = os.path.join(submission_path, 'track1.txt')
30 | json_data = read_json_file(json_path)
31 | ret_data = []
32 | for cam in json_data:
33 | print(f"processing camera : {cam.zfill(3)}")
34 | for seq in json_data[cam]:
35 | item = json_data[cam][seq]
36 | if "GlobalOfflineID" in item:
37 | ret_line = [cam, \
38 | item["GlobalOfflineID"], \
39 | (item["Frame"] - 1), \
40 | item["Coordinate"]["x1"], \
41 | item["Coordinate"]["y1"], \
42 | (item["Coordinate"]["x2"] - item["Coordinate"]["x1"]), \
43 | (item["Coordinate"]["y2"] - item["Coordinate"]["y1"]), \
44 | "{:.6f}".format(item["WorldCoordinate"]["x"]), \
45 | "{:.6f}".format(item["WorldCoordinate"]["y"])]
46 | ret_data.append(ret_line)
47 | ret_data = sorted(ret_data, key=lambda x: (int(x[0]), int(x[2]), int(x[1])))
48 | np.savetxt(submission_path, ret_data, delimiter=' ', fmt="%s")
49 |
50 |
51 | if __name__ == "__main__":
52 | print("create track1.txt")
53 | scenes = os.listdir("./Tracking/")
54 | for sc in scenes:
55 | print(f"processing scene : {sc}")
56 | generate_submission(json_path=os.path.join(f"Tracking", sc,"fixed_whole_tracking_results.json"), save_path=os.path.join(f"Submission", sc))
57 |
58 | print("merge track1.txt")
59 | with open(os.path.join("Submission", "track1.txt"), "w") as merged_file:
60 | for file_path in scenes:
61 | with open(os.path.join("Submission", f"{file_path}/track1.txt"), "r") as file:
62 | merged_file.write(file.read())
--------------------------------------------------------------------------------
/embedder/aic24_extract.py:
--------------------------------------------------------------------------------
1 | '''
2 | extract ReID features from testing data.
3 | '''
4 | import os
5 | import argparse
6 | import os.path as osp
7 | import numpy as np
8 | import torch
9 | import time
10 | import torchvision.transforms as T
11 | from PIL import Image
12 | import sys
13 | from utils import FeatureExtractor
14 | import torchreid
15 | import json
16 |
17 | def make_parser():
18 | parser = argparse.ArgumentParser("reid")
19 | parser.add_argument("root_path", type=str, default=None)
20 | parser.add_argument("-s", "--scene", type=str, default=None)
21 | return parser
22 |
23 | if __name__ == "__main__":
24 |
25 | args = make_parser().parse_args()
26 | data_root = args.root_path
27 | scene = args.scene
28 |
29 | sys.path.append(data_root+'/deep-person-reid')
30 |
31 | img_dir = os.path.join(data_root,'Original')
32 | det_dir = os.path.join(data_root,'Detection')
33 | out_dir = os.path.join(data_root,'EmbedFeature')
34 |
35 | models = {
36 | 'osnet_x1_0':data_root+'/deep-person-reid/checkpoints/osnet_ms_m_c.pth.tar'
37 | }
38 |
39 |
40 | model_names = ['osnet_x1_0']
41 |
42 |
43 | val_transforms = T.Compose([
44 | T.Resize([256, 128]),
45 | T.ToTensor(),
46 | T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
47 | ])
48 |
49 | for model_idx,name in enumerate(models):
50 |
51 | model_p = models[name]
52 | model_name = model_names[model_idx]
53 |
54 | print('Using model {}'.format(name))
55 |
56 | extractor = FeatureExtractor(
57 | model_name=model_name,
58 | model_path=model_p,
59 | device='cuda'
60 | )
61 |
62 | for file in os.listdir(os.path.join(det_dir,scene)):
63 | base, ext = os.path.splitext(file)
64 | if ext == '.txt':
65 | print('processing file {}{}'.format(base,ext))
66 | det_path = os.path.join(det_dir,scene,'{}.txt'.format(base))
67 | json_path = os.path.join(det_dir,scene,'{}.json'.format(base))
68 | dets = np.genfromtxt(det_path,dtype=str,delimiter=',')
69 | with open(json_path) as f:
70 | jf = json.load(f)
71 | cur_frame = 0
72 | u_num = 0
73 | emb = np.array([None]*len(dets))
74 | start = time.time()
75 | print('processing scene {} cam {} with {} detections'.format(scene,base,len(dets)))
76 | for idx,(cam,frame,_,x1,y1,x2,y2,conf) in enumerate(dets):
77 | u_num += 1
78 | x1,y1,x2,y2 = map(float,[x1,y1,x2,y2])
79 | if idx%1000 == 0:
80 | if idx !=0:
81 | end = time.time()
82 | print('processing time :',end-start)
83 | start = time.time()
84 | print('process {}/{}'.format(idx,len(dets)))
85 | if cur_frame != int(frame):
86 | cur_frame = int(frame)
87 | if not os.path.isdir(osp.join(out_dir,scene,cam)):
88 | os.makedirs(osp.join(out_dir,scene,cam))
89 | save_fn = os.path.join(out_dir,scene,cam,'feature_{}_{}_{}_{}_{}_{}_{}.npy'.format(cur_frame,u_num,str(int(x1)),str(int(x2)),str(int(y1)),str(int(y2)),str(conf).replace(".","")))
90 | jf[str(idx).zfill(8)]['NpyPath'] = os.path.join(scene,cam,'feature_{}_{}_{}_{}_{}_{}_{}.npy'.format(cur_frame,u_num,str(int(x1)),str(int(x2)),str(int(y1)),str(int(y2)),str(conf).replace(".","")))
91 | img_path = os.path.join(img_dir,scene,cam,'Frame',frame.zfill(6)+'.jpg')
92 | img = Image.open(img_path)
93 |
94 | img_crop = img.crop((x1,y1,x2,y2))
95 | img_crop = val_transforms(img_crop.convert('RGB')).unsqueeze(0)
96 | feature = extractor(img_crop).cpu().detach().numpy()[0]
97 |
98 | np.save(save_fn,feature)
99 | end = time.time()
100 | print('processing time :',end-start)
101 | start = time.time()
102 | print('process {}/{}'.format(idx+1,len(dets)))
103 | with open(json_path, 'w') as f:
104 | json.dump(jf, f, ensure_ascii=False)
105 |
--------------------------------------------------------------------------------
/tracking/src/run.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import argparse
4 | from datetime import datetime
5 |
6 | from tracking import Tracker
7 | from utils import DetectedObjects
8 |
9 | def run_scpt(feature_data_root, out_dir="outdir", tracking_params={}):
10 | # Load and generate "detected object list"
11 | tracking_results = {}
12 | if not os.path.isdir(feature_data_root):
13 | raise Exception(f"No such directory: {feature_data_root}")
14 | if os.path.basename(feature_data_root).startswith("camera_"):
15 | camera_ids = [os.path.basename(feature_data_root)]
16 | feature_data_root = os.path.dirname(feature_data_root)
17 | is_multi = False
18 | else:
19 | camera_ids = [cam_id for cam_id in os.listdir(feature_data_root) if cam_id[:7] == "camera_"]
20 | is_multi = True
21 |
22 | # loading detections
23 | for camera_id in camera_ids:
24 | data_dir = os.path.join(feature_data_root, camera_id)
25 | camera_id = int(camera_id[7:])
26 | detected_objects = load_detections(data_dir)
27 | tracking_results[camera_id] = detected_objects.to_trackingdict()
28 | del detected_objects
29 |
30 | # Run SCT on all detections of all cameras
31 | for camera_id in tracking_results:
32 | tracking_dict = tracking_results[camera_id]
33 | start_time = datetime.now()
34 | tracker = Tracker(tracking_params)
35 | tracking_results[camera_id] = tracker.scpt(tracking_dict) # tracking returns tracking_dict
36 | end_time = datetime.now()
37 | print(f"Camera{camera_id} elapsed time: {end_time - start_time}")
38 |
39 | # Dump the result
40 | out_json = os.path.join(out_dir, f'camera{camera_id:03d}_tracking_results.json')
41 | os.makedirs(os.path.dirname(out_json), exist_ok=True)
42 | with open(out_json, mode='w') as f:
43 | json.dump(tracking_results[camera_id], f)
44 |
45 | def run_mcpt(scene_id, json_dir,out_dir="outdir", tracking_params={}):
46 | start_time = datetime.now()
47 | tracker = Tracker(tracking_params)
48 | whole_tracking_result = tracker.mcpt(scene_id, json_dir,out_dir)
49 |
50 | # Dump the result
51 | out_file = os.path.join(out_dir, 'whole_tracking_results.json')
52 | with open(out_file, mode='w') as f:
53 | json.dump(whole_tracking_result, f)
54 | end_time = datetime.now()
55 | print(f"Elapsed_time: {end_time - start_time}")
56 |
57 |
58 | def correct_scpt_result(scene_id, json_dir, out_dir=None, tracking_params={}):
59 | if not os.path.isdir(json_dir):
60 | raise Exception(f"The directory '{json_dir}' does not exist.")
61 | if out_dir == None:
62 | out_dir = json_dir
63 |
64 | json_files = [f for f in os.listdir(json_dir) if os.path.splitext(f)[1].lower() == ".json" and f.startswith("camera")]
65 | json_files = sorted(json_files)
66 | for json_file in json_files:
67 | camera_id = int(json_file.split("_")[0][6:])
68 | with open(os.path.join(json_dir, json_file)) as f:
69 | tracking_dict = json.load(f)
70 | tracker = Tracker(tracking_params)
71 | tracking_dict = tracker.correcting_scpt_result(tracking_dict)
72 | out_file = os.path.join(out_dir, "fixed_"+os.path.basename(json_file))
73 | with open(out_file, mode='w') as f:
74 | json.dump(tracking_dict, f)
75 |
76 | def correct_mcpt_result(scene_id,json_dir,out_dir,tracking_params={}):
77 | with open(os.path.join(json_dir, 'whole_tracking_results.json')) as f:
78 | tracking_results = json.load(f)
79 | with open(os.path.join(json_dir, f"representative_nodes_scene{str(scene_id)}.json")) as f:
80 | representative_nodes = json.load(f)
81 | tracker = Tracker(tracking_params)
82 | tracking_resuluts = tracker.correcting_mcpt_result(scene_id,tracking_results,representative_nodes)
83 | out_file = os.path.join(out_dir, "fixed_whole_tracking_results.json")
84 | with open(out_file, mode='w') as f:
85 | json.dump(tracking_resuluts, f)
86 |
87 |
88 | def load_detections(data_root, debug=False):
89 | print(f"Loading detections from {data_root}.")
90 | detected_objects = DetectedObjects()
91 | detected_objects.load_from_directory(feature_root=data_root)
92 | print(f"Found {len(detected_objects.objects)} frames, and {detected_objects.num_objects} objects.")
93 | if debug:
94 | frames = sorted(detected_objects.objects)
95 | min_num_obj = 9999999
96 | max_num_obj = 0
97 | for frame in frames:
98 | obj = detected_objects[frame]
99 | num = len(obj)
100 | min_num_obj = min(min_num_obj, num)
101 | max_num_obj = max(max_num_obj, num)
102 | print(f"### MIN num detections: {min_num_obj}, MAX num detections: {max_num_obj} ###\n")
103 |
104 | return detected_objects
105 |
106 | def get_args():
107 | parser = argparse.ArgumentParser(description='Offline Tracker sample app.')
108 | parser.add_argument('-d', '--data', default='EmbedFeature/scene_001', type=str)
109 | parser.add_argument('-o', '--outdir', default='output', type=str)
110 |
111 | return parser.parse_args()
112 |
113 | if __name__ == "__main__":
114 | args = get_args()
115 |
116 | run(feature_data_root=args.data, out_dir=args.outdir, tracking_params={})
117 |
--------------------------------------------------------------------------------
/tracking/infer.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import json
4 | from datetime import datetime
5 | from multiprocessing import Pool
6 | import subprocess
7 | import glob
8 | import tarfile
9 | import argparse
10 |
11 | sys.path.append("tracking")
12 | sys.path.append("tracking/src")
13 | import run
14 |
15 | """
16 | This file contains functions to execute offline tracking.
17 | """
18 |
19 | # Single camera people tracking
20 | def scpt(tracking_params={}):
21 | # distributed SCPT processing by simply using multiprocessing pool.
22 | global scene_id
23 | global camera_ids
24 | global exp_root
25 | global tracking_parameters
26 | tracking_parameters = tracking_params
27 |
28 | num_processes = 5 # Could be more than 5, but it depends on machine instance
29 | p = Pool(num_processes)
30 | result = p.map(single_tracking, camera_ids)
31 |
32 | run.correct_scpt_result(scene_id=scene_id, json_dir=exp_root, out_dir=exp_root,
33 | tracking_params=tracking_params)
34 |
35 | def single_tracking(cam_id):
36 | global scene_id
37 | global embed_root
38 | global exp_root
39 | global tracking_parameters
40 |
41 | print(f"Started a background process to camera_{cam_id}\n")
42 | run.run_scpt(feature_data_root=f'{embed_root}/scene_{scene_id:03d}/camera_{cam_id:04d}', out_dir=exp_root,
43 | tracking_params=tracking_parameters)
44 | return
45 |
46 | def get_camera_ids(scene_id, json_f="tracking/config/scene_2_camera_id_file.json"):
47 | with open(json_f) as f:
48 | scene2camera = json.load(f)
49 | camera_ids = []
50 | for scene_camera in scene2camera:
51 | if scene_camera["scene_name"] == f"scene_{scene_id:03d}":
52 | camera_ids = scene_camera["camera_ids"]
53 | break
54 | return camera_ids
55 |
56 |
57 | # Multi camera tracking, aka ReID
58 | def mcpt(tracking_params={}):
59 | global scene_id
60 | global exp_root
61 | global tracking_parameters
62 | tracking_parameters = tracking_params
63 |
64 | run.run_mcpt(scene_id=scene_id, json_dir=exp_root, out_dir=exp_root, tracking_params=tracking_parameters)
65 | run.correct_mcpt_result(scene_id=scene_id, json_dir=exp_root, out_dir=exp_root, tracking_params=tracking_parameters)
66 |
67 | def run_tracking(scene, embed, output, debug=False, tracking_params={}):
68 | """
69 | Main routine
70 | """
71 | global scene_id
72 | global embed_root
73 | global exp_root
74 | global output_root
75 | global camera_ids
76 | global exec_scpt
77 | global exec_mcpt
78 |
79 | if debug:
80 | print(f"### tracking parameters: {tracking_params}", flush=True)
81 |
82 | scene_id = scene
83 | embed_root = embed
84 | camera_ids = get_camera_ids(scene_id)
85 | print(f"Target scene ID: {scene_id}, camera IDs: {camera_ids}")
86 |
87 | # Configure output directory
88 | exp_root = os.path.join(output, f"scene_{scene_id:03d}")
89 | output_root = exp_root
90 |
91 | # Execute SCPT (Single Camera People Tracking)
92 | if exec_scpt:
93 | scpt_started = datetime.now()
94 | print(f"Start SCPT: {scpt_started}", flush=True)
95 | scpt(tracking_params=tracking_params)
96 | print(f"SCPT finished. Elapsed: {datetime.now()-scpt_started}", flush=True)
97 |
98 | # Execute MCPT (Multi Camera People Tracking) aka ReID
99 | if exec_mcpt:
100 | mcpt_started = datetime.now()
101 | print(f"Start MCPT: {mcpt_started}")
102 | mcpt(tracking_params=tracking_params)
103 | print(f"MCPT finished. Elapsed: {datetime.now()-mcpt_started}", flush=True)
104 |
105 |
106 | def get_parameters_to_scene(scene_id, param_file):
107 | if not os.path.isfile(param_file):
108 | print(f"'parameters_per_scene file does not exist. {param_file}")
109 | return {}
110 |
111 | sys.path.append("tracking/config")
112 | import parameters_per_scene as pps
113 |
114 | scene = int(scene_id)
115 | if scene in pps.parameters_per_scene:
116 | return pps.parameters_per_scene[scene]
117 | else:
118 | return {}
119 |
120 | def get_args():
121 | parser = argparse.ArgumentParser(description='Offline Tracker Inferencing app.')
122 | parser.add_argument('-s', '--scene', type=int, required=True)
123 | parser.add_argument('-o', '--output', default="Tracking", type=str)
124 | parser.add_argument('-all', '--exec_all', action='store_true')
125 | parser.add_argument('-scpt', '--exec_scpt', action='store_true')
126 | parser.add_argument('-mcpt', '--exec_mcpt', action='store_true')
127 |
128 | return parser.parse_args()
129 |
130 | if __name__ == "__main__":
131 | global exec_scpt
132 | global exec_mcpt
133 |
134 | args = get_args()
135 |
136 | if args.exec_all or (not (args.exec_scpt | args.exec_mcpt)):
137 | exec_scpt = exec_mcpt = True
138 | else:
139 | exec_scpt = exec_mcpt = False
140 | if args.exec_scpt:
141 | exec_scpt = True
142 | if args.exec_mcpt:
143 | exec_mcpt = True
144 |
145 | # Default tracking parameter
146 | default_tracking_parameters = {
147 | "epsilon_scpt": 0.10, "time_period":3,"epsilon_mcpt": 0.37, "short_track_th":120,
148 | "keypoint_condition_th":1, "replace_similarity_by_wcoordinate":True, "distance_type":"min",
149 | "distance_th":10, "sim_th":0.85, "delete_gid_th":5000
150 | }
151 |
152 | scene = args.scene
153 | param_file = "tracking/config/parameters_per_scene.py"
154 | parameters = get_parameters_to_scene(scene, param_file)
155 | if len(parameters) > 0:
156 | tracking_parameters = parameters["tracking_parameters"]
157 | else:
158 | # Empty parameters to the scene, so use the default parameters.
159 | tracking_parameters = default_tracking_parameters
160 | embed_path = f"EmbedFeature"
161 |
162 | # Run offline tracking
163 | run_tracking(scene=scene, embed=embed_path, output=args.output, tracking_params=tracking_parameters)
164 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CVPRW2024: Overlap Suppression Clustering for Offline Multi-Camera People Tracking
2 |
3 | The highest HOTA submission in the 8th NVIDIA AI City Challenge (2024) Track 1: Multi-Camera People Tracking. This submission placed 2nd in the competition due to its offline tracking algorithm.
4 | [[Paper]](https://openaccess.thecvf.com/content/CVPR2024W/AICity/papers/Yoshida_Overlap_Suppression_Clustering__for_Offline_Multi-Camera_People_Tracking_CVPRW_2024_paper.pdf)
5 |
6 | ## Dataset Availability
7 |
8 | The official dataset can be downloaded from the AI City Challenge website (https://www.aicitychallenge.org/2024-data-and-evaluation/). You need to fill out the dataset request form to obtain the password to download them.
9 |
10 | Referring to the DATASET LICENSE AGREEMENT from the dataset author(s), we are not allowed to share the dataset.
11 | ```
12 | 2.c. ... you may not copy, sell, rent, sublicense, transfer or distribute the DATASET, or share with others.
13 | ```
14 |
15 |
16 | ## Ranking
17 |
18 |
19 |
20 | ## Overall Pipeline
21 |
22 |
23 |
24 | ## Environment Requirements
25 |
26 | The implementation of our work is built upon [BoT-SORT](https://github.com/NirAharon/BoT-SORT), [OpenMMLab](https://github.com/open-mmlab), and [torchreid](https://github.com/KaiyangZhou/deep-person-reid).
27 |
28 | Three different environments are required for the reproduction process. Please install these three environments according to the following repos:
29 |
30 | 1. [Install BoT-SORT for people detection](https://github.com/NirAharon/BoT-SORT#installation)
31 | 2. [Install torchreid for feature extraction](https://github.com/KaiyangZhou/deep-person-reid#installation)
32 | 3. [Install mmpose for pose estimation](https://mmpose.readthedocs.io/en/latest/installation.html) (*Please note that you need to have a version in the 0.x series for this to work.)
33 |
34 | We will provide you with the installation command for mmpose v0.29.0.
35 | Please note that this command may change due to updates or modifications in mmpose.
36 | ```
37 | #step 1
38 | conda create --name openmmlab python=3.8 -y
39 | conda activate openmmlab
40 |
41 | #step 2
42 | conda install pytorch torchvision -c pytorch
43 |
44 | #step 3
45 | pip install -U openmim
46 | mim install mmengine
47 | mim install "mmcv==1.7.0"
48 |
49 | mim install "mmdet==2.28.2"
50 |
51 | #Build mmpose from source
52 | git clone https://github.com/open-mmlab/mmpose.git -b v0.29.0 --depth 1
53 | cd mmpose
54 | pip install -r requirements.txt
55 | pip install -v -e .
56 | ```
57 | If you receive an mmcv AssertionError, please reinstall mmcv.
58 | ```
59 | mim uninstall mmcv
60 | mim install "mmcv==1.7.0"
61 | ```
62 | Once you installed above to the same machine, you'll see the root folder organized as follows:
63 | ```
64 | root
65 | │ README.md
66 | │ ranking.jpg
67 | │ overall-pipeline.png
68 | │
69 | ├─assets
70 | ├─detector
71 | ├─embedder
72 | ├─poser
73 | ├─scripts
74 | ├─tools
75 | ├─tracking
76 | │
77 | ├─BoT-SORT
78 | ├─deep-person-reid
79 | └─mmpose
80 | ```
81 |
82 | ## Training
83 | This project executes
84 | 1) Person detection
85 | 2) Feature extraction of each person
86 | 3) Pose estimation of each person,
87 |
88 | However we'll utilize pre-trained models for those. Therefore there's nothing to train.
89 |
90 | ## Running Tracking
91 |
92 | ### Preparation
93 | #### 0. Place your video files.
94 |
95 | Place your video files to under directory that corresponds to scene/camera IDs, such as Original///video.mp4.
96 |
97 | For example, you'd need to place camera-361 of scene-41 video file, run commands like below. Please don't forget to place whole video files of the scene you'd want to process.
98 | ```
99 | mkdir -p Original/scene_041/camera_0361
100 | cp Original/scene_041/camera_0361/video.mp4
101 | ```
102 |
103 | #### 1. Frame Extraction
104 |
105 | Run a command below to extract frame images.
106 | ```
107 | sh scripts/extract_frame.sh 41
108 | ```
109 |
110 | #### 2. Person Detection
111 |
112 | Run steps below for person detection.
113 | - Install BoT-SORT as instructed in Environment Requirement section above [here](#install).
114 | - Prepare Models. Download the pretrained YOLOX_x model from [ByteTrack [Google Drive]](https://drive.google.com/file/d/1P4mY0Yyd3PPTybgZkjMYhFri88nTmJX5/view), and store it to BoT-SORT directory.
115 | - Run person detection by executing a command below.
116 | ```
117 | sh scripts/detection.sh 41
118 | ```
119 |
120 | #### 3. Feature extraction
121 |
122 | Run steps below to get feature extraction.
123 | - Install deep-person-reid as instructed in Environment Requirement section above [here](#install).
124 | - Prepare Models. Download the pretrained deep-person-reid model from [torchreid](https://kaiyangzhou.github.io/deep-person-reid/MODEL_ZOO). By running a script mentioned below will download this pretrained model automatically and will store it accordingly.
125 | - Run feature extraction by executing a command below.
126 | ```
127 | sh scripts/embedding.sh 41
128 | ```
129 |
130 | #### 4. Pose estimation
131 |
132 | Run steps below to get pose estimation.
133 | - Install mmpose as instructed in Environment Requirement section above [here](#install).
134 | - Run pose estimation by executing a command below.
135 | ```
136 | sh scripts/pose.sh 41
137 | ```
138 |
139 | ### Single Camera People Tracking and Multi Camera People Tracking
140 |
141 | #### 5. Both Single Camera People Tracking and Multi Camera People Tracking
142 |
143 | Run command steps below to run both Single Camera People Tracking and Multi Camera People Tracking at once.
144 | ```
145 | python3 -m venv .venv
146 | source .venv/bin/activate
147 | pip install -r tracking/requirements.txt
148 | sh scripts/tracking.sh 41
149 | ```
150 |
151 | #### 6. Combine tracking results of each scenes for submission.
152 |
153 | Run command below to combine all results of each scenes. This will generate track1.txt under "Submission" directory.
154 | ```
155 | python3 tools/generate_submission.py
156 | ```
157 |
--------------------------------------------------------------------------------
/poser/top_down_video_demo_with_track_file.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import os
3 | import warnings
4 | from argparse import ArgumentParser
5 |
6 | import cv2
7 | import mmcv
8 | import json
9 | import numpy as np
10 |
11 | from mmpose.apis import (collect_multi_frames, inference_top_down_pose_model,
12 | init_pose_model, process_mmdet_results,
13 | vis_pose_result)
14 | from mmpose.datasets import DatasetInfo
15 |
16 | try:
17 | from mmdet.apis import inference_detector, init_detector
18 | has_mmdet = True
19 | except (ImportError, ModuleNotFoundError):
20 | has_mmdet = False
21 |
22 | from load_tracking_result import load_tracking
23 |
24 |
25 | def main():
26 | """Visualize the demo video (support both single-frame and multi-frame).
27 |
28 | Using mmdet to detect the human.
29 | """
30 | parser = ArgumentParser()
31 | parser.add_argument('track_result', help='Track result file')
32 | parser.add_argument('pose_config', help='Config file for pose')
33 | parser.add_argument('pose_checkpoint', help='Checkpoint file for pose')
34 | parser.add_argument('--video-path', type=str, help='Video path')
35 | parser.add_argument(
36 | '--show',
37 | action='store_true',
38 | default=False,
39 | help='whether to show visualizations.')
40 | parser.add_argument(
41 | '--out-video-root',
42 | default='',
43 | help='Root of the output video file. '
44 | 'Default not saving the visualization video.')
45 | parser.add_argument(
46 | '--device', default='cuda:0', help='Device used for inference')
47 | parser.add_argument(
48 | '--det-cat-id',
49 | type=int,
50 | default=1,
51 | help='Category id for bounding box detection model')
52 | parser.add_argument(
53 | '--bbox-thr',
54 | type=float,
55 | default=0.3,
56 | help='Bounding box score threshold')
57 | parser.add_argument(
58 | '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold')
59 | parser.add_argument(
60 | '--radius',
61 | type=int,
62 | default=4,
63 | help='Keypoint radius for visualization')
64 | parser.add_argument(
65 | '--thickness',
66 | type=int,
67 | default=1,
68 | help='Link thickness for visualization')
69 | parser.add_argument(
70 | '--use-multi-frames',
71 | action='store_true',
72 | default=False,
73 | help='whether to use multi frames for inference in the pose'
74 | 'estimation stage. Default: False.')
75 | parser.add_argument(
76 | '--online',
77 | action='store_true',
78 | default=False,
79 | help='inference mode. If set to True, can not use future frame'
80 | 'information when using multi frames for inference in the pose'
81 | 'estimation stage. Default: False.')
82 | parser.add_argument(
83 | '--out-file',
84 | type=str
85 | )
86 |
87 |
88 | assert has_mmdet, 'Please install mmdet to run the demo.'
89 |
90 | args = parser.parse_args()
91 |
92 | # assert args.show or (args.out_video_root != '')
93 |
94 | print('Initializing model...')
95 | # # build the detection model from a config file and a checkpoint file
96 | # det_model = init_detector(
97 | # args.det_config, args.det_checkpoint, device=args.device.lower())
98 | track_results = load_tracking(args.track_result)
99 |
100 | # build the pose model from a config file and a checkpoint file
101 | pose_model = init_pose_model(
102 | args.pose_config, args.pose_checkpoint, device=args.device.lower())
103 |
104 | dataset = pose_model.cfg.data['test']['type']
105 | # get datasetinfo
106 | dataset_info = pose_model.cfg.data['test'].get('dataset_info', None)
107 | if dataset_info is None:
108 | warnings.warn(
109 | 'Please set `dataset_info` in the config.'
110 | 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
111 | DeprecationWarning)
112 | else:
113 | dataset_info = DatasetInfo(dataset_info)
114 |
115 | # read video
116 | video = mmcv.VideoReader(args.video_path)
117 | assert video.opened, f'Faild to load video file {args.video_path}'
118 |
119 | if args.out_video_root == '':
120 | save_out_video = False
121 | else:
122 | os.makedirs(args.out_video_root, exist_ok=True)
123 | save_out_video = True
124 |
125 | if save_out_video:
126 | fps = video.fps
127 | size = (video.width, video.height)
128 | fourcc = cv2.VideoWriter_fourcc(*'mp4v')
129 | videoWriter = cv2.VideoWriter(
130 | os.path.join(args.out_video_root,
131 | f'vis_{os.path.basename(args.video_path)}'), fourcc,
132 | fps, size)
133 |
134 | # frame index offsets for inference, used in multi-frame inference setting
135 | if args.use_multi_frames:
136 | assert 'frame_indices_test' in pose_model.cfg.data.test.data_cfg
137 | indices = pose_model.cfg.data.test.data_cfg['frame_indices_test']
138 |
139 | # whether to return heatmap, optional
140 | return_heatmap = False
141 |
142 | # return the output of some desired layers,
143 | # e.g. use ('backbone', ) to return backbone feature
144 | output_layer_names = None
145 |
146 | save_results = {}
147 |
148 | print('Running inference...')
149 | for frame_id, cur_frame in enumerate(mmcv.track_iter_progress(video)):
150 | # get the detection results of current frame
151 | # the resulting box is (x1, y1, x2, y2)
152 | # mmdet_results = inference_detector(det_model, cur_frame)
153 |
154 | # # keep the person class bounding boxes.
155 | # person_results = process_mmdet_results(mmdet_results, args.det_cat_id)
156 | if frame_id not in track_results.keys():
157 | continue
158 | person_results = track_results[frame_id]
159 |
160 | if args.use_multi_frames:
161 | frames = collect_multi_frames(video, frame_id, indices,
162 | args.online)
163 |
164 | # test a single image, with a list of bboxes.
165 | pose_results, returned_outputs = inference_top_down_pose_model(
166 | pose_model,
167 | frames if args.use_multi_frames else cur_frame,
168 | person_results,
169 | bbox_thr=args.bbox_thr,
170 | format='xyxy',
171 | dataset=dataset,
172 | dataset_info=dataset_info,
173 | return_heatmap=return_heatmap,
174 | outputs=output_layer_names)
175 |
176 | save_results[frame_id] = pose_results
177 | # show the results
178 | vis_frame = vis_pose_result(
179 | pose_model,
180 | cur_frame,
181 | pose_results,
182 | dataset=dataset,
183 | dataset_info=dataset_info,
184 | kpt_score_thr=args.kpt_thr,
185 | radius=args.radius,
186 | thickness=args.thickness,
187 | show=False)
188 |
189 | if args.show:
190 | cv2.imshow('Frame', vis_frame)
191 |
192 | if save_out_video:
193 | videoWriter.write(vis_frame)
194 |
195 | if args.show and cv2.waitKey(1) & 0xFF == ord('q'):
196 | break
197 |
198 | class NumpyEncoder(json.JSONEncoder):
199 | """ Special json encoder for numpy types """
200 | def default(self, obj):
201 | if isinstance(obj, np.integer):
202 | return int(obj)
203 | elif isinstance(obj, np.floating):
204 | return float(obj)
205 | elif isinstance(obj, np.ndarray):
206 | return obj.tolist()
207 | return json.JSONEncoder.default(self, obj)
208 | os.makedirs(os.path.dirname(args.out_file), exist_ok=True)
209 | json.dump(save_results, open(args.out_file, 'w'), cls=NumpyEncoder)
210 |
211 | if save_out_video:
212 | videoWriter.release()
213 | if args.show:
214 | cv2.destroyAllWindows()
215 |
216 |
217 | if __name__ == '__main__':
218 | main()
219 |
--------------------------------------------------------------------------------
/tracking/src/tracking.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import tqdm
4 | from sklearn.cluster import DBSCAN
5 | from scipy.spatial.distance import cdist
6 | from sklearn.metrics.pairwise import cosine_similarity
7 |
8 | from utils import DetectedObjects
9 | from scpt import *
10 | from mcpt import *
11 |
12 | class Tracker():
13 | """
14 | This class represents YOTM, aka Yoshida Offline Tracking Method.
15 | """
16 | def __init__(self, params={}):
17 | self.camera_ids = []
18 | self.tracking_dicts = {}
19 | self._init_parameters()
20 | self.update_parameters(**params)
21 | self.frame_period = self.parameters["time_period"] * self.parameters["fps"]
22 |
23 | def _init_parameters(self):
24 |
25 | #self.parameters[""]: =
26 | self.parameters = {}
27 | self.parameters["image_size"] = (1920,1080)
28 |
29 | # sct parameters
30 | self.parameters["time_period"]:int = 3
31 | self.parameters["fps"]:int = 30
32 | self.parameters["epsilon_scpt"]:float = 0.1
33 | self.parameters["min_samples"]:int = 4
34 | self.parameters["remove_noise_cluster"]:bool = True
35 | self.parameters["overlap_suppression"]:bool = True
36 | self.parameters["num_candidates"]:int = 10
37 | self.parameters["clustering_method"]:str = "agglomerative" #agglomerative or dbsacn
38 | self.parameters["debug"]:bool = False
39 |
40 | #fix_sct parameters
41 | self.parameters["sequential_nms"]:bool = True
42 | self.parameters["temporally_snms_th"]:float = 0.6
43 | self.parameters["spatially_snms_th"]:float = 0.6
44 | self.parameters["merge_nonoverlap"]:bool = True
45 |
46 | self.parameters["separate_warp"]:bool = True
47 | self.parameters["warp_th"]:int = 40
48 | self.parameters["alpha"]:float = 0.5
49 |
50 | self.parameters["exclude_short_track"]:bool = False
51 | self.parameters["short_tracklet_th"]:int = 120
52 |
53 | self.parameters["exclude_motionless_track"]:bool = False
54 | self.parameters["stop_track_th"]:int = 25
55 |
56 | # mct parameters
57 | self.parameters["epsilon_mcpt"]:float = 0.4
58 | self.parameters["keypoint_th"]:float = 0.8
59 | self.parameters["keypoint_condition_th"]:float = 1
60 | self.parameters["distance_th"]:int = 5
61 |
62 | self.parameters["check_sc_overlap"]:bool = False
63 | self.parameters["distance_type"]:str = "max" #max or mean or min
64 | self.parameters["replace_similarity_by_wcoordinate"]:bool = False
65 | self.parameters["replace_value"]: float = -10
66 | self.parameters["representative_selection_method"]:str = "keypoint" #keypoint or centrality
67 | self.parameters["aspect_th"]:float =0.5
68 |
69 | # fix mct parameters
70 | self.parameters["reassign_global_id"]:bool = True
71 | self.parameters["short_track_th"]:int = 120
72 | self.parameters["delete_gid_th"]:int = 6000
73 | self.parameters["assign_all_tracklet"]:bool = False
74 | self.parameters["sim_th"]:float = 0.75
75 | self.parameters["delete_few_camera_cluster"]:bool = False
76 |
77 | self.parameters["measure_wcoordinate"]:bool = False
78 |
79 | self.parameters["remove_noise_image"]:bool = True
80 |
81 | self.parameters["delete_distant_person"]:bool = True
82 |
83 | self.parameters["interpolate_track"]:bool = True
84 | self.parameters["max_interpolate_interval"]:int = 15
85 |
86 |
87 | def update_parameter(self, parameter, value):
88 | if not parameter in self.parameters:
89 | print(f"Unknown parameter: {parameter}.")
90 | sys.exit()
91 | return
92 | self.parameters[parameter] = value
93 |
94 | def update_parameters(self, **params):
95 | for key in params:
96 | self.update_parameter(key, params[key])
97 |
98 | def scpt(self, tracking_dict):
99 | """
100 | This performs object tracking with single camera dataset.
101 | Most of code below are just copied from '20240214_OfflineTracking-Debug.ipynb' and tweaked few.
102 | """
103 |
104 | frame_period = self.parameters["time_period"] * self.parameters["fps"]
105 | epsilon = self.parameters["epsilon_scpt"]
106 |
107 | max_offlineid = -1
108 | last_frame = get_max_value_of_dict(tracking_dict, "Frame")
109 | time_section_serial_dict = {timesection:[] for timesection in range(last_frame//frame_period+1) }
110 |
111 | for serial in tracking_dict.keys():
112 | frame = tracking_dict[serial]["Frame"]
113 | time_section = frame // frame_period
114 | time_section_serial_dict[time_section].append(serial)
115 |
116 | for time_section in range(last_frame//frame_period+1):
117 | serials = time_section_serial_dict[time_section]
118 | if len(serials) == 0: continue
119 | clusters = tracking_by_clustering(tracking_dict,serials, **self.parameters)
120 |
121 | clusters = [cluster+max_offlineid+1 if cluster != -1 else -i for i,cluster in enumerate(clusters)]
122 | max_offlineid = max(clusters) if max(clusters) > 0 else max_offlineid
123 |
124 | if time_section == 0:
125 | for serial,cluster in zip(serials,clusters):
126 | tracking_dict[serial]["OfflineID"] = int(cluster)
127 | elif time_section > 0:
128 | past_serials = time_section_serial_dict[time_section-1]
129 | tracking_dict = associate_cluster_between_period(tracking_dict, clusters, serials, past_serials, **self.parameters)
130 |
131 | # We have tracking results in TrackingDict, yet will gather results for debugging. Could be deleted.
132 | offline_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict]
133 | new_offline_ids_dict = {key:i for i,key in enumerate(set(offline_ids)) if key != -1}
134 | new_offline_ids_dict[-1] = -1
135 |
136 | for serial in tracking_dict:
137 | offline_id = tracking_dict[serial]["OfflineID"]
138 | tracking_dict[serial]["OfflineID"] = new_offline_ids_dict[offline_id]
139 |
140 | return tracking_dict
141 |
142 | def correcting_scpt_result(self,tracking_dict,**kwargs):
143 |
144 | sequential_nms = self.parameters["sequential_nms"]
145 | separate_warp = self.parameters["separate_warp"]
146 | exclude_short_track = self.parameters["exclude_short_track"]
147 | exclude_motionless_track = self.parameters["exclude_motionless_track"]
148 | print("sequential_nms:",sequential_nms)
149 | print("separate_warp:",separate_warp)
150 | print("exclude_short_track:",exclude_short_track)
151 | print("exclude_motionless_track:",exclude_motionless_track)
152 |
153 | if sequential_nms:
154 | tracking_dict = sequential_non_maximum_suppression(tracking_dict, **self.parameters)
155 | if separate_warp:
156 | tracking_dict = separate_warp_tracklet(tracking_dict, **self.parameters)
157 | if exclude_short_track:
158 | tracking_dict = exclude_short_tracklet(tracking_dict, **self.parameters)
159 | if exclude_motionless_track:
160 | tracking_dict = exclude_motionless_tracklet(tracking_dict, **self.parameters)
161 | return tracking_dict
162 |
163 | def mcpt(self,scene_id, json_dir,out_dir):
164 | epsilon = self.parameters["epsilon_mcpt"]
165 |
166 | if not os.path.isdir(json_dir):
167 | raise Exception(f"The directory '{json_dir}' does not exist.")
168 | if out_dir == None:
169 | out_dir = json_dir
170 | tracking_results = {}
171 | json_files = [f for f in os.listdir(json_dir) if os.path.splitext(f)[1].lower() == ".json" and f.startswith("fixed_camera")]
172 | json_files = sorted(json_files)
173 | for json_file in json_files:
174 | camera_id = int(json_file.split("_")[1][6:])
175 | with open(os.path.join(json_dir, json_file)) as f:
176 | tracking_dict = json.load(f)
177 | print(f"{json_file} len(serials):{len(tracking_dict)}")
178 | tracking_results[camera_id] = tracking_dict
179 | tracking_results = multi_camera_people_tracking(tracking_results, scene_id=scene_id, json_dir=json_dir, out_dir=out_dir, **self.parameters)
180 |
181 | return tracking_results
182 |
183 | def correcting_mcpt_result(self,scene_id,tracking_results,represntative_nodes,**kwargs):
184 | reassign_global_id = self.parameters["reassign_global_id"]
185 | measure_wcoordinate = self.parameters["measure_wcoordinate"]
186 | interpolate_track = self.parameters["interpolate_track"]
187 | remove_noise_image = self.parameters["remove_noise_image"]
188 | delete_distant_person = self.parameters["delete_distant_person"]
189 | print("reassign_global_id:",reassign_global_id)
190 | print("measure_wcoordinate:",measure_wcoordinate)
191 | print("interpolate_track:",interpolate_track)
192 | print("delete_distant_person:",delete_distant_person)
193 |
194 | if reassign_global_id:
195 | tracking_results = global_id_reassignment(tracking_results,represntative_nodes,scene_id,**self.parameters)
196 | if measure_wcoordinate:
197 | tracking_results = measure_world_coordinate(scene_id,tracking_results,**self.parameters)
198 | if remove_noise_image:
199 | tracking_results = remove_noise_images(scene_id,tracking_results,**self.parameters)
200 | if delete_distant_person:
201 | tracking_results = delete_distant_persons(tracking_results,**self.parameters)
202 | if interpolate_track:
203 | tracking_results = interpolate_tracklet(tracking_results,represntative_nodes,**self.parameters)
204 |
205 | return tracking_results
206 |
--------------------------------------------------------------------------------
/tracking/src/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import json
4 | import glob
5 |
6 |
7 | class DetectedObjects:
8 | """
9 | Represents whole detected objects to track.
10 | Object dict is built by frame_id as a key and its entity contains a list of all Detected objects of the frame.
11 | """
12 | def __init__(self):
13 | self.num_objects = 0
14 | self.objects = {}
15 | self._objects_registered = {}
16 | #self.scene_id = scene_id
17 | #self.camera_id = -1
18 | self.camera_projection_matrix = None
19 | self.homography_matrix = None
20 |
21 | def __str__(self):
22 | return f"DetectedObjects: scene_id:{self.scene_id}, camera_id:{self.camera_id}, num_objects:{self.num_objects}"
23 |
24 | def load_from_directory(self, feature_root, calibration_path="Calibration"):
25 | if not os.path.isdir(feature_root):
26 | raise Exception(f'There is no directory to read from. {feature_root}')
27 | npys = sorted(glob.glob(os.path.join(feature_root, "**/*.npy"), recursive=True))
28 | scene_id = None
29 | camera_id = None
30 | path_list = feature_root.split("/")
31 | for dir in path_list:
32 | if dir.startswith("scene_"):
33 | scene_id = int(dir.replace("scene_",""))
34 | if dir.startswith("camera_"):
35 | camera_id = int(dir.replace("camera_",""))
36 | if scene_id is not None and camera_id is not None:
37 | calibration_path = f"Original/scene_{scene_id:03d}/camera_{camera_id:04d}/calibration.json"
38 | self.load_calibration(calibration_path)
39 | else:
40 | print(f'\033[33mwarning\033[0m : failed to get scene_id and camera_id from feature path.')
41 | print(f'\033[33mwarning\033[0m : world coordinate calculations are ignored.')
42 |
43 |
44 | # Below is to parse camera id from the path, we're probably not going to use it though.
45 | #camera_id = None
46 | #dirs = npys[0].split("/")
47 | #if len(dirs) < 2:
48 | # print(f"Cannot prop camera id from input path. {feature_path}")
49 | #else:
50 | # camera_id = dirs[-1]
51 | # if "Camera" in camera_id:
52 | # self.camera_id = int(camera_id[len("Camera"):])
53 |
54 | for f in npys:
55 | self.add_object_from_image_path(f)
56 |
57 | def add_object(self, frame_id, coordinate, world_coordinate, confidence, feature_path, image_path=None):
58 | if isinstance(frame_id, str):
59 | frame_id = int(frame_id)
60 |
61 | # Check if coordinate is reasonable
62 | if coordinate.x1 >= coordinate.x2 or coordinate.y1 >= coordinate.y2:
63 | print(f"Unnatural coordinate found in frame {frame_id}: {coordinate}")
64 | return
65 |
66 | detected_obj = DetectedObject(object_id=self.num_objects, frame_id=frame_id, coordinate=coordinate, worldcoordinate=world_coordinate,
67 | confidence=confidence, feature_path=feature_path)
68 | key = f"{coordinate.x1}_{coordinate.y1}_{coordinate.x2}_{coordinate.y2}"
69 | if frame_id in self.objects:
70 | if not key in self._objects_registered[frame_id]:
71 | objects_per_frame = self.objects[frame_id].append(detected_obj)
72 | self._objects_registered[frame_id].append(key)
73 | else:
74 | print(f"Duplicate coord found in frame {frame_id}: {coordinate}")
75 | return
76 | else:
77 | objects_per_frame = self.objects[frame_id] = [detected_obj]
78 | self._objects_registered[frame_id] = [key]
79 | self.num_objects += 1
80 |
81 | def add_object_from_image_path(self, feature_path, image_path=None, calibration_path="Calibration"):
82 | file_path = os.path.basename(feature_path)
83 | if file_path.startswith("feature_"):
84 | _, frame_id, serial_no, x1, x2, y1, y2, conf = os.path.splitext(file_path)[0].split("_")
85 | conf = conf if len(conf) == 1 else conf[0]+"."+conf[1:]
86 | else:
87 | serial_no, frame_id, x1, x2, y1, y2 = os.path.splitext(file_path)[0].split("_")
88 | x1, x2, y1, y2 = int(x1.replace("x","")), int(x2), int(y1.replace("y","")), int(y2)
89 | conf = 0.98765 # Dummy
90 | World_coordinate = None
91 | if self.homography_matrix is not None:
92 | w_x, w_y = self.convert_coordinates_2world((int(float(x1)) + int(float(x2))) / 2, int(float(y2)))
93 | World_coordinate = WorldCoordinate(w_x, w_y)
94 |
95 | self.add_object(frame_id=int(frame_id), coordinate=Coordinate(x1, y1, x2, y2), world_coordinate=World_coordinate,
96 | confidence=float(conf), feature_path=feature_path, image_path=image_path)
97 |
98 | def get_objects_of_frames(self, start_frame, end_frame):
99 | if start_frame > self.num_frames() or end_frame > self.num_frames():
100 | return None
101 | object_dict = {}
102 | for frame_id in range(start_frame, end_frame):
103 | if frame_id in self.objects:
104 | object_dict[frame_id] = self[frame_id]
105 | #else:
106 | # print(f"There is no such frame in the DetectedObjects, will be ignored. frame_id: {frame_id}")
107 | return object_dict
108 |
109 | def get_object_ids_of_frames(self, start_frame, end_frame):
110 | """
111 | Returns a list of detected object IDs that appeared within the specified frame window.
112 | """
113 | if start_frame > self.num_frames() or end_frame > self.num_frames():
114 | return None
115 | object_ids = []
116 | for frame_id in range(start_frame, end_frame):
117 | if frame_id in self.objects:
118 | for det in self[frame_id]:
119 | object_ids.append(det.object_id)
120 | return sorted(object_ids)
121 |
122 | def __getitem__(self, frame_id):
123 | if frame_id in self.objects:
124 | return self.objects[frame_id]
125 | else:
126 | return None
127 |
128 | def num_frames(self):
129 | """
130 | Returns number of frames that currently holding.
131 | """
132 | return len(self.objects)
133 |
134 | def last_frame_id(self):
135 | """
136 | Returns the last frame id.
137 | """
138 | return max(self.objects.keys())
139 |
140 | def to_trackingdict(self):
141 | """
142 | Compatibility function to convert detections in TrackingDict format.
143 | """
144 | track_dict = {}
145 | for frame_id in self.objects:
146 | for detected_object in self.objects[frame_id]:
147 | serial_no = detected_object.object_id
148 | coordinate = json.loads(detected_object.coordinate.__str__())
149 | if detected_object.worldcoordinate.__str__() != "None":
150 | world_coordinate = json.loads(detected_object.worldcoordinate.__str__())
151 | else:
152 | world_coordinate = None
153 | new_object = { "Frame": frame_id, "NpyPath": detected_object.feature_path,
154 | "Coordinate": coordinate, "WorldCoordinate": world_coordinate, "OfflineID": -1 } #"ClusterID": None,
155 | track_dict[serial_no] = new_object
156 | return track_dict
157 |
158 | def load_calibration(self, calib_path):
159 | if os.path.isfile(calib_path):
160 | with open(calib_path, 'r') as file:
161 | data = json.load(file)
162 | self.camera_projection_matrix = np.array(data["camera projection matrix"])
163 | self.homography_matrix = np.array(data["homography matrix"])
164 | else:
165 | print(f'\033[33mwarning\033[0m : not found Calibration File.')
166 | print(f'\033[33mwarning\033[0m : world coordinate calculations are ignored.')
167 |
168 | def convert_coordinates_2world(self, x, y):
169 | vector_xyz = np.array([x, y, 1]) # z=1
170 | vector_xyz_3d = np.dot(np.linalg.inv(self.homography_matrix), vector_xyz.T)
171 | return vector_xyz_3d[0] / vector_xyz_3d[2], vector_xyz_3d[1] / vector_xyz_3d[2]
172 |
173 | class DetectedObject:
174 | """
175 | Represents individual detected object to track.
176 | """
177 | def __init__(self, object_id, frame_id, coordinate, confidence, worldcoordinate, feature_path, image_path=None):
178 | self.object_id = f"{object_id:08d}" # AKA serial number
179 | self.frame_id = frame_id
180 | self.feature_path = feature_path
181 | self.confidence = confidence
182 | self.image_path = image_path
183 | if isinstance(coordinate, Coordinate):
184 | self.coordinate = coordinate
185 | elif isinstance(coordinate, (list, tuple)) and len(coordinate) == 4:
186 | self.coordinate = Coordinate(*coordinate)
187 | else:
188 | raise Exception(f"Unknown coordinate format: {coordinate}")
189 |
190 | if isinstance(worldcoordinate, WorldCoordinate):
191 | self.worldcoordinate = worldcoordinate
192 | elif isinstance(worldcoordinate, (list, tuple)) and len(worldcoordinate) == 4:
193 | self.worldcoordinate = WorldCoordinate(*worldcoordinate)
194 | else:
195 | self.worldcoordinate = None
196 |
197 | class Coordinate:
198 | def __init__(self, x1, y1, x2, y2):
199 | self.x1 = int(float(x1))
200 | self.y1 = int(float(y1))
201 | self.x2 = int(float(x2))
202 | self.y2 = int(float(y2))
203 |
204 | def __str__(self):
205 | return(f'{{"x1":{self.x1}, "y1":{self.y1}, "x2":{self.x2}, "y2":{self.y2}}}')
206 |
207 | class WorldCoordinate:
208 | def __init__(self, x, y):
209 | self.x = float(x)
210 | self.y = float(y)
211 |
212 | def __str__(self):
213 | return(f'{{"x":{self.x}, "y":{self.y}}}')
214 |
215 | class TrackingCluster:
216 | def __init__(self, camera_id, offline_id):
217 | self.camera_id = camera_id
218 | self.offline_id = 0
219 | self.global_offline_id = -1
220 | self.clusters = {}
221 | self.serials = []
222 |
223 | def add(self, serial):
224 | if serial in self.serials:
225 | raise Exception("DUP!")
226 | self.serials.append(serial)
227 |
228 |
229 | class TrackingClusters:
230 | def __init__(self, camera_id):
231 | self.camera_id = camera_id
232 | self.clusters = []
233 | self.offline_ids = []
234 |
235 | def add(self, cluster: TrackingCluster):
236 | cl_id = cluster.offline_id
237 | if cl_id in self.offline_ids:
238 | raise Exception("DUP!")
239 | else:
240 | self.clusters.append(cluster)
241 |
242 | def get(self, cluster_id):
243 | if not cluster_id in self.offline_ids:
244 | raise Exception("No cluster_id registered. {cluster_id}")
245 | else:
246 | return self.clusters[offline_ids.index(cluster_id)]
247 |
248 | class feature_vector_shed:
249 | def __init__(self):
250 | self.features = {}
251 |
252 | def add_vector(self, camera_id, serial_no, npy_path):
253 | key = camera_id + "_" + serial_no
254 | if key in self.features:
255 | print(f"Feature vector of camera ID '{camera_id}' and serial no '{serial_no}' is already exist. ")
256 | return
257 |
258 | if not os.path.isfile(npy_path):
259 | print(f"The feature vector file '{npy_path}' does not exist. ")
260 | return
261 | feature = np.load(npy_path)
262 | self.features[key] = feature
263 |
264 | def get(self, camera_id, serial_no):
265 | key = camera_id + "_" + serial_no
266 | return self.features[key]
267 |
--------------------------------------------------------------------------------
/tracking/src/pose.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import json
4 | import cv2
5 |
6 | class PoseKeypoints:
7 | def __init__(self, keypoint_json):
8 | self.kp_indice_foot = [15, 16] # ankles
9 | self.kp_indice_torso = [5, 6, 11, 12, 13, 14] # shoulders, hips, knees
10 | self.kp_indice_torso_legs = [5, 6, 11, 12, 13, 14, 15, 16] # shoulders, hips, knees, ankles
11 |
12 | self._parse_keypoint_json(keypoint_json)
13 | self.serial_dict = {}
14 |
15 | def _parse_keypoint_json(self, file_path):
16 | if os.path.isfile(file_path):
17 | with open(file_path, 'r') as file:
18 | data = json.load(file)
19 | self.keypoints = data
20 | else:
21 | raise Exception(f"Keypoint json file '{file_path}' does not exist.")
22 |
23 | def filter(self, keypoints=None, score_thr=0.3, target_parts="torso_legs", max_frames=0):
24 | filtered = {}
25 | if keypoints == None:
26 | keypoints = self.keypoints
27 | for i, frame in enumerate(keypoints):
28 | if max_frames != 0 and i >= max_frames:
29 | break
30 | detections = keypoints[frame]
31 | target_indices = self.kp_indice_torso if target_parts == "torso" else self.kp_indice_torso_legs
32 | for det in detections:
33 | kps = det["keypoints"]
34 | confidences = [k for i2, k in enumerate(kps) if i2 in target_indices and k[2] >= score_thr]
35 | if len(confidences) < (len(target_indices)):
36 | continue
37 |
38 | pose_entity = [det["bbox"], ]
39 | if int(frame) in filtered:
40 | filtered[frame].append(det)
41 | else:
42 | filtered[frame] = [det]
43 | print(f"Num of filtered results: {len(filtered)}")
44 | return filtered
45 |
46 | def summary(self): # Just show top_n data
47 | if len(self.keypoints) <= 0:
48 | print(f"Empty keypoints")
49 | return
50 | print(f"Number of frames: {len(self.keypoints)}")
51 |
52 | def get_keypoints(self, serial:str):
53 | """
54 | This must be called after assign_serial_from_tracking_dict() was called,
55 | as it builds a dictionary with "serial" number as keys.
56 |
57 | serial: zero-filled 8-digit string
58 | """
59 | if isinstance(serial, int):
60 | serial = f"{serial:08d}"
61 | elif isinstance(serial, str) and len(serial) != 8:
62 | serial = f"{int(serial):08d}"
63 | if len(self.serial_dict) <= 0:
64 | raise Exception(f"Serial based dictionary is not built yet.")
65 | if serial in self.serial_dict:
66 | return self.serial_dict[serial]
67 | else:
68 | return None
69 |
70 | def _build_serial_dict(self, keypoints=None):
71 | """
72 | This must be called after assign_serial_from_tracking_dict() was called,
73 | as it builds a dictionary with "serial" number as keys.
74 | """
75 | if len(keypoints) == 0:
76 | return None
77 | serial_dict = {}
78 | if keypoints == None:
79 | keypoints = self.keypoints
80 | for i, frame in enumerate(keypoints):
81 | detections = keypoints[frame]
82 | for det in detections:
83 | if "serial" in det:
84 | serial = det["serial"]
85 | if serial in serial_dict:
86 | print(f"DUP in serial numbers!!")
87 | else:
88 | serial_dict[serial] = {"bbox": det["bbox"], "Keypoints": det["keypoints"]}
89 | self.serial_dict = serial_dict
90 |
91 | return self.serial_dict
92 |
93 | def assign_serial_from_tracking_dict(self, tracking_dict, keypoints=None):
94 | """
95 | tracking_dict: dictionary of tracking_dict or path to tracking_dict json file.
96 | """
97 | if keypoints == None:
98 | keypoints = self.keypoints
99 | if isinstance(tracking_dict, str):
100 | if os.path.isfile(tracking_dict):
101 | with open(tracking_dict) as f:
102 | tracking_dict = json.load(f)
103 | tracking_coord = {}
104 | for serial in tracking_dict:
105 | td_coord = tracking_dict[serial]["Coordinate"]
106 | td_frame = tracking_dict[serial]["Frame"]
107 | key = f"{td_frame}_{td_coord['x1']}_{td_coord['y1']}_{td_coord['x2']}_{td_coord['y2']}"
108 | if key in tracking_coord:
109 | continue #raise Exception(f"DUP! {key}")
110 | tracking_coord[key] = serial
111 | for frame in keypoints:
112 | detections = keypoints[frame]
113 |
114 | for det in detections:
115 | bbox = det["bbox"]
116 | key = f"{int(frame)}_{int(bbox[0])}_{int(bbox[1])}_{int(bbox[2])}_{int(bbox[3])}"
117 | if key in tracking_coord:
118 | det["serial"] = tracking_coord[key]
119 | else:
120 | #print(f"No tracking found for bbox: {key}, {bbox}")
121 | pass
122 |
123 | # Build dict with serial as key
124 | return self._build_serial_dict(keypoints=keypoints)
125 |
126 | def show_footpoints(self, keypoints=None, frame_img_root="Frames", output_mp4=None, score_thr=0.3, target_parts="torso_legs", max_frames=0): # Generate mp4
127 | # Creating mp4
128 | if output_mp4 == None:
129 | output_mp4 = f"foot_points.mp4"
130 | fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
131 | video_wtr = cv2.VideoWriter(output_mp4, fourcc=fourcc, fps=30.0, frameSize=(1280, 960))
132 | if not video_wtr.isOpened():
133 | print(f"Cannot open video writer.")
134 | return
135 |
136 | filtered = self.filter(keypoints=keypoints, score_thr=score_thr, target_parts=target_parts, max_frames=max_frames)
137 | for frame in filtered:
138 | # Read frame image file
139 | frame_img_path = os.path.join(frame_img_root, f"{int(frame):06d}.jpg")
140 | frame_img = cv2.imread(frame_img_path)
141 | detections = self.keypoints[frame]
142 | target_indices = self.kp_indice_torso if target_parts == "torso" else self.kp_indice_torso_legs
143 | for det in detections:
144 | keypoints = det["keypoints"]
145 | left_ankle, right_ankle = keypoints[self.kp_indice_foot[0]], keypoints[self.kp_indice_foot[1]]
146 | if float(left_ankle[2]) >= score_thr:
147 | color = (0, 255, 0)
148 | else:
149 | #print(f"Low confidence on KP[15]: {float(fp1[2])}")
150 | color = (0, 0, 255)
151 | cv2.circle(frame_img, (int(left_ankle[0]), int(left_ankle[1])), 5, color, 3)
152 |
153 | if float(right_ankle[2]) >= score_thr:
154 | color = (0, 255, 0)
155 | else:
156 | #print(f"Low confidence on KP[16]: {float(fp2[2])}")
157 | color = (0, 0, 255)
158 | cv2.circle(frame_img, (int(right_ankle[0]), int(right_ankle[1])), 5, color, 3)
159 | frame_img = cv2.resize(frame_img, (1280, 960))
160 | video_wtr.write(frame_img)
161 | video_wtr.release()
162 | print(f"Saved video file: {output_mp4}\n")
163 |
164 | def show_footpoints_custom(self, frame_img_root="Frames", output_mp4=None, score_thr=0.3, target_parts="torso_legs"): # Generate mp4
165 | # Creating mp4
166 | if output_mp4 == None:
167 | output_mp4 = f"foot_points.mp4"
168 | fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
169 | video_wtr = cv2.VideoWriter(output_mp4, fourcc=fourcc, fps=30.0, frameSize=(1280, 960))
170 | if not video_wtr.isOpened():
171 | print(f"Cannot open video writer.")
172 | return
173 |
174 | for i, frame in enumerate(self.keypoints):
175 | if i >= 300: # only 10-sec, just for debug
176 | break
177 | # Read frame image file
178 | frame_img_path = os.path.join(frame_img_root, f"{int(frame):06d}.jpg")
179 | frame_img = cv2.imread(frame_img_path)
180 | detections = self.keypoints[frame]
181 | target_indices = self.kp_indice_torso if target_parts == "torso" else self.kp_indice_torso_legs
182 | for det in detections:
183 | keypoints = det["keypoints"]
184 | confidences = [k for i2, k in enumerate(keypoints) if i2 in target_indices and k[2] >= score_thr]
185 | if len(confidences) < (len(target_indices)):
186 | # Show bbox in red if doesn't meet the criteria
187 | bbox = det["bbox"]
188 | cv2.rectangle(frame_img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), thickness=2)
189 |
190 | left_ankle, right_ankle = keypoints[self.kp_indice_foot[0]], keypoints[self.kp_indice_foot[1]]
191 | if float(left_ankle[2]) >= score_thr:
192 | color = (0, 255, 0)
193 | else:
194 | #print(f"Low confidence on KP[15]: {float(fp1[2])}")
195 | color = (0, 0, 255)
196 | cv2.circle(frame_img, (int(left_ankle[0]), int(left_ankle[1])), 5, color, 3)
197 |
198 | if float(right_ankle[2]) >= score_thr:
199 | color = (0, 255, 0)
200 | else:
201 | #print(f"Low confidence on KP[16]: {float(fp2[2])}")
202 | color = (0, 0, 255)
203 | cv2.circle(frame_img, (int(right_ankle[0]), int(right_ankle[1])), 5, color, 3)
204 | frame_img = cv2.resize(frame_img, (1280, 960))
205 | video_wtr.write(frame_img)
206 | video_wtr.release()
207 | print(f"Saved video file: {output_mp4}\n")
208 |
209 | def draw_keypoints(self, frame_img, frame_id, out_file="kp_img.jpg"):
210 | def draw_line(img, s1, s2, bbox):
211 | color = (255, 0, 0) # Blue
212 | cv2.line(img, (int(s1[0]), int(s1[1])),
213 | (int(s2[0]), int(s2[1])), color, thickness=2)
214 |
215 | def draw_dot(img, src, bbox):
216 | color = (0, 255, 0) # Green
217 | cv2.circle(img, (int(src[0]), int(src[1])), 5, color, 2)
218 |
219 | frame_id = str(frame_id)
220 | if not frame_id in self.keypoints:
221 | print(f"There's no record asssiate with frame {frame_id} in the keypoint data.")
222 | return
223 |
224 | # Read frame image file
225 | if os.path.isfile(frame_img):
226 | img = cv2.imread(frame_img)
227 | else:
228 | print(f"There's no such image file {frame_img}.")
229 | return
230 |
231 | detections = self.keypoints[str(frame_id)]
232 | for det in detections:
233 | keypoints = det["keypoints"]
234 | bbox = det["bbox"]
235 |
236 | # draw lines
237 | # 0 to 1, 2
238 | draw_line(img, keypoints[0], keypoints[1], bbox)
239 | draw_line(img, keypoints[0], keypoints[2], bbox)
240 | # 1 to 2, 3
241 | draw_line(img, keypoints[1], keypoints[2], bbox)
242 | draw_line(img, keypoints[1], keypoints[3], bbox)
243 | # 2 to 4
244 | draw_line(img, keypoints[2], keypoints[4], bbox)
245 | # 3 to 5
246 | draw_line(img, keypoints[3], keypoints[5], bbox)
247 | # 4 to 6
248 | draw_line(img, keypoints[4], keypoints[6], bbox)
249 | # 5 to 6, 7, 11
250 | draw_line(img, keypoints[5], keypoints[6], bbox)
251 | draw_line(img, keypoints[5], keypoints[7], bbox)
252 | draw_line(img, keypoints[5], keypoints[11], bbox)
253 | # 6 to 8, 12
254 | draw_line(img, keypoints[6], keypoints[8], bbox)
255 | draw_line(img, keypoints[6], keypoints[12], bbox)
256 | # 7 to 9
257 | draw_line(img, keypoints[7], keypoints[9], bbox)
258 | # 8 to 10
259 | draw_line(img, keypoints[8], keypoints[10], bbox)
260 | # 11 to 12, 13
261 | draw_line(img, keypoints[11], keypoints[12], bbox)
262 | draw_line(img, keypoints[11], keypoints[13], bbox)
263 | # 12 to 14
264 | draw_line(img, keypoints[12], keypoints[14], bbox)
265 | # 13 to 15
266 | draw_line(img, keypoints[13], keypoints[15], bbox)
267 | # 14 to 16
268 | draw_line(img, keypoints[14], keypoints[16], bbox)
269 |
270 | # Draw dots
271 | for kp in keypoints:
272 | draw_dot(img, (int(kp[0]), int(kp[1])), bbox)
273 |
274 | cv2.imwrite(out_file, img)
275 | print(f"Saved keypoint file: {out_file}")
276 |
--------------------------------------------------------------------------------
/tracking/config/scene_2_camera_id_file.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "scene_name": "scene_001",
4 | "camera_ids": [
5 | 1,
6 | 2,
7 | 3,
8 | 4,
9 | 5,
10 | 6,
11 | 7,
12 | 8,
13 | 9,
14 | 10
15 | ]
16 | },
17 | {
18 | "scene_name": "scene_041",
19 | "camera_ids": [
20 | 361,
21 | 362,
22 | 363,
23 | 364,
24 | 365,
25 | 366,
26 | 367,
27 | 368,
28 | 369,
29 | 370
30 | ]
31 | },
32 | {
33 | "scene_name": "scene_042",
34 | "camera_ids": [
35 | 371,
36 | 372,
37 | 373,
38 | 374,
39 | 375,
40 | 376,
41 | 377,
42 | 378,
43 | 379
44 | ]
45 | },
46 | {
47 | "scene_name": "scene_043",
48 | "camera_ids": [
49 | 380,
50 | 381,
51 | 382,
52 | 383,
53 | 384,
54 | 385,
55 | 386,
56 | 387,
57 | 388,
58 | 389
59 | ]
60 | },
61 | {
62 | "scene_name": "scene_044",
63 | "camera_ids": [
64 | 390,
65 | 391,
66 | 392,
67 | 393,
68 | 394,
69 | 395,
70 | 396
71 | ]
72 | },
73 | {
74 | "scene_name": "scene_045",
75 | "camera_ids": [
76 | 397,
77 | 398,
78 | 399,
79 | 400,
80 | 401,
81 | 402,
82 | 403,
83 | 404
84 | ]
85 | },
86 | {
87 | "scene_name": "scene_046",
88 | "camera_ids": [
89 | 405,
90 | 406,
91 | 407,
92 | 408,
93 | 409,
94 | 410,
95 | 411
96 | ]
97 | },
98 | {
99 | "scene_name": "scene_047",
100 | "camera_ids": [
101 | 412,
102 | 413,
103 | 414,
104 | 415,
105 | 416,
106 | 417,
107 | 418,
108 | 419,
109 | 420,
110 | 421
111 | ]
112 | },
113 | {
114 | "scene_name": "scene_048",
115 | "camera_ids": [
116 | 422,
117 | 423,
118 | 424,
119 | 425,
120 | 426,
121 | 427,
122 | 428,
123 | 429
124 | ]
125 | },
126 | {
127 | "scene_name": "scene_049",
128 | "camera_ids": [
129 | 430,
130 | 431,
131 | 432,
132 | 433,
133 | 434,
134 | 435,
135 | 436,
136 | 437,
137 | 438,
138 | 439
139 | ]
140 | },
141 | {
142 | "scene_name": "scene_050",
143 | "camera_ids": [
144 | 440,
145 | 441,
146 | 442,
147 | 443,
148 | 444,
149 | 445,
150 | 446,
151 | 447
152 | ]
153 | },
154 | {
155 | "scene_name": "scene_051",
156 | "camera_ids": [
157 | 448,
158 | 449,
159 | 450,
160 | 451,
161 | 452,
162 | 453,
163 | 454
164 | ]
165 | },
166 | {
167 | "scene_name": "scene_052",
168 | "camera_ids": [
169 | 455,
170 | 456,
171 | 457,
172 | 458,
173 | 459,
174 | 460,
175 | 461,
176 | 462
177 | ]
178 | },
179 | {
180 | "scene_name": "scene_053",
181 | "camera_ids": [
182 | 463,
183 | 464,
184 | 465,
185 | 466,
186 | 467,
187 | 468,
188 | 469,
189 | 470,
190 | 471
191 | ]
192 | },
193 | {
194 | "scene_name": "scene_054",
195 | "camera_ids": [
196 | 472,
197 | 473,
198 | 474,
199 | 475,
200 | 476,
201 | 477,
202 | 478,
203 | 479
204 | ]
205 | },
206 | {
207 | "scene_name": "scene_055",
208 | "camera_ids": [
209 | 480,
210 | 481,
211 | 482,
212 | 483,
213 | 484,
214 | 485,
215 | 486
216 | ]
217 | },
218 | {
219 | "scene_name": "scene_056",
220 | "camera_ids": [
221 | 487,
222 | 488,
223 | 489,
224 | 490,
225 | 491,
226 | 492,
227 | 493,
228 | 494,
229 | 495
230 | ]
231 | },
232 | {
233 | "scene_name": "scene_057",
234 | "camera_ids": [
235 | 496,
236 | 497,
237 | 498,
238 | 499,
239 | 500,
240 | 501,
241 | 502,
242 | 503,
243 | 504,
244 | 505
245 | ]
246 | },
247 | {
248 | "scene_name": "scene_058",
249 | "camera_ids": [
250 | 506,
251 | 507,
252 | 508,
253 | 509,
254 | 510,
255 | 511,
256 | 512,
257 | 513,
258 | 514
259 | ]
260 | },
261 | {
262 | "scene_name": "scene_059",
263 | "camera_ids": [
264 | 515,
265 | 516,
266 | 517,
267 | 518,
268 | 519,
269 | 520,
270 | 521,
271 | 522,
272 | 523,
273 | 524
274 | ]
275 | },
276 | {
277 | "scene_name": "scene_060",
278 | "camera_ids": [
279 | 525,
280 | 526,
281 | 527,
282 | 528,
283 | 529,
284 | 530,
285 | 531,
286 | 532,
287 | 533,
288 | 534
289 | ]
290 | },
291 | {
292 | "scene_name": "scene_061",
293 | "camera_ids": [
294 | 535,
295 | 536,
296 | 537,
297 | 538,
298 | 539,
299 | 540,
300 | 541,
301 | 542,
302 | 543,
303 | 544
304 | ]
305 | },
306 | {
307 | "scene_name": "scene_062",
308 | "camera_ids": [
309 | 545,
310 | 546,
311 | 547,
312 | 548,
313 | 549,
314 | 550,
315 | 551,
316 | 552,
317 | 553,
318 | 554
319 | ]
320 | },
321 | {
322 | "scene_name": "scene_063",
323 | "camera_ids": [
324 | 555,
325 | 556,
326 | 557,
327 | 558,
328 | 559,
329 | 560,
330 | 561,
331 | 562,
332 | 563,
333 | 564
334 | ]
335 | },
336 | {
337 | "scene_name": "scene_064",
338 | "camera_ids": [
339 | 565,
340 | 566,
341 | 567,
342 | 568,
343 | 569,
344 | 570,
345 | 571,
346 | 572,
347 | 573,
348 | 574
349 | ]
350 | },
351 | {
352 | "scene_name": "scene_065",
353 | "camera_ids": [
354 | 575,
355 | 576,
356 | 577,
357 | 578,
358 | 579,
359 | 580,
360 | 581,
361 | 582,
362 | 583,
363 | 584
364 | ]
365 | },
366 | {
367 | "scene_name": "scene_066",
368 | "camera_ids": [
369 | 585,
370 | 586,
371 | 587,
372 | 588,
373 | 589,
374 | 590,
375 | 591,
376 | 592,
377 | 593,
378 | 594
379 | ]
380 | },
381 | {
382 | "scene_name": "scene_067",
383 | "camera_ids": [
384 | 595,
385 | 596,
386 | 597,
387 | 598,
388 | 599,
389 | 600,
390 | 601,
391 | 602,
392 | 603,
393 | 604
394 | ]
395 | },
396 | {
397 | "scene_name": "scene_068",
398 | "camera_ids": [
399 | 605,
400 | 606,
401 | 607,
402 | 608,
403 | 609,
404 | 610,
405 | 611,
406 | 612,
407 | 613,
408 | 614
409 | ]
410 | },
411 | {
412 | "scene_name": "scene_069",
413 | "camera_ids": [
414 | 615,
415 | 616,
416 | 617,
417 | 618,
418 | 619,
419 | 620,
420 | 621,
421 | 622,
422 | 623,
423 | 624
424 | ]
425 | },
426 | {
427 | "scene_name": "scene_070",
428 | "camera_ids": [
429 | 625,
430 | 626,
431 | 627,
432 | 628,
433 | 629,
434 | 630,
435 | 631,
436 | 632,
437 | 633,
438 | 634
439 | ]
440 | },
441 | {
442 | "scene_name": "scene_071",
443 | "camera_ids": [
444 | 635,
445 | 636,
446 | 637,
447 | 638,
448 | 639,
449 | 640,
450 | 641,
451 | 642,
452 | 643,
453 | 644,
454 | 645,
455 | 646,
456 | 647,
457 | 648,
458 | 650
459 | ]
460 | },
461 | {
462 | "scene_name": "scene_072",
463 | "camera_ids": [
464 | 651,
465 | 652,
466 | 653,
467 | 654,
468 | 655,
469 | 656,
470 | 657,
471 | 658,
472 | 659,
473 | 660,
474 | 661,
475 | 662,
476 | 663,
477 | 664,
478 | 665,
479 | 666
480 | ]
481 | },
482 | {
483 | "scene_name": "scene_073",
484 | "camera_ids": [
485 | 667,
486 | 668,
487 | 669,
488 | 670,
489 | 671,
490 | 672,
491 | 673,
492 | 674,
493 | 675,
494 | 676,
495 | 677,
496 | 678,
497 | 679,
498 | 680,
499 | 681,
500 | 682
501 | ]
502 | },
503 | {
504 | "scene_name": "scene_074",
505 | "camera_ids": [
506 | 683,
507 | 684,
508 | 685,
509 | 686,
510 | 687,
511 | 688,
512 | 689,
513 | 690,
514 | 691,
515 | 692,
516 | 693,
517 | 694,
518 | 695,
519 | 696,
520 | 697,
521 | 698
522 | ]
523 | },
524 | {
525 | "scene_name": "scene_075",
526 | "camera_ids": [
527 | 699,
528 | 700,
529 | 701,
530 | 702,
531 | 703,
532 | 704,
533 | 705,
534 | 706,
535 | 707,
536 | 708,
537 | 709,
538 | 710,
539 | 711,
540 | 712,
541 | 713,
542 | 714
543 | ]
544 | },
545 | {
546 | "scene_name": "scene_076",
547 | "camera_ids": [
548 | 715,
549 | 716,
550 | 717,
551 | 718,
552 | 719,
553 | 720,
554 | 721,
555 | 722,
556 | 723,
557 | 724,
558 | 725,
559 | 726,
560 | 727,
561 | 728,
562 | 729,
563 | 730
564 | ]
565 | },
566 | {
567 | "scene_name": "scene_077",
568 | "camera_ids": [
569 | 731,
570 | 732,
571 | 733,
572 | 734,
573 | 735,
574 | 736,
575 | 737,
576 | 738,
577 | 739,
578 | 740,
579 | 741,
580 | 742,
581 | 743,
582 | 744,
583 | 745,
584 | 746
585 | ]
586 | },
587 | {
588 | "scene_name": "scene_078",
589 | "camera_ids": [
590 | 747,
591 | 748,
592 | 749,
593 | 750,
594 | 751,
595 | 752,
596 | 753,
597 | 754,
598 | 755,
599 | 756,
600 | 757,
601 | 758,
602 | 759,
603 | 760,
604 | 761,
605 | 762
606 | ]
607 | },
608 | {
609 | "scene_name": "scene_079",
610 | "camera_ids": [
611 | 763,
612 | 764,
613 | 765,
614 | 766,
615 | 767,
616 | 768,
617 | 769,
618 | 770,
619 | 771,
620 | 772,
621 | 773,
622 | 774,
623 | 775,
624 | 776,
625 | 777,
626 | 778
627 | ]
628 | },
629 | {
630 | "scene_name": "scene_080",
631 | "camera_ids": [
632 | 779,
633 | 780,
634 | 781,
635 | 782,
636 | 783,
637 | 784,
638 | 785,
639 | 786,
640 | 787,
641 | 788,
642 | 789,
643 | 790,
644 | 791,
645 | 792,
646 | 793,
647 | 794
648 | ]
649 | },
650 | {
651 | "scene_name": "scene_081",
652 | "camera_ids": [
653 | 795,
654 | 796,
655 | 797,
656 | 798,
657 | 799,
658 | 800,
659 | 801,
660 | 802,
661 | 803,
662 | 804,
663 | 805,
664 | 806,
665 | 807,
666 | 808,
667 | 809,
668 | 810
669 | ]
670 | },
671 | {
672 | "scene_name": "scene_082",
673 | "camera_ids": [
674 | 811,
675 | 812,
676 | 813,
677 | 814,
678 | 815,
679 | 816,
680 | 817,
681 | 818,
682 | 819,
683 | 820,
684 | 821,
685 | 822,
686 | 823,
687 | 824,
688 | 825,
689 | 826
690 | ]
691 | },
692 | {
693 | "scene_name": "scene_083",
694 | "camera_ids": [
695 | 827,
696 | 828,
697 | 829,
698 | 830,
699 | 831,
700 | 832,
701 | 833,
702 | 834,
703 | 835,
704 | 836,
705 | 837,
706 | 838,
707 | 839,
708 | 840,
709 | 841,
710 | 842
711 | ]
712 | },
713 | {
714 | "scene_name": "scene_084",
715 | "camera_ids": [
716 | 843,
717 | 844,
718 | 845,
719 | 846,
720 | 847,
721 | 848,
722 | 849,
723 | 850,
724 | 851,
725 | 852,
726 | 853,
727 | 854,
728 | 855,
729 | 856,
730 | 857,
731 | 858
732 | ]
733 | },
734 | {
735 | "scene_name": "scene_085",
736 | "camera_ids": [
737 | 859,
738 | 860,
739 | 861,
740 | 862,
741 | 863,
742 | 864,
743 | 865,
744 | 866,
745 | 867,
746 | 868,
747 | 869,
748 | 870,
749 | 871,
750 | 872,
751 | 873,
752 | 874
753 | ]
754 | },
755 | {
756 | "scene_name": "scene_086",
757 | "camera_ids": [
758 | 875,
759 | 876,
760 | 877,
761 | 878,
762 | 879,
763 | 880,
764 | 881,
765 | 882,
766 | 883,
767 | 884,
768 | 885,
769 | 886,
770 | 887,
771 | 888,
772 | 889,
773 | 890
774 | ]
775 | },
776 | {
777 | "scene_name": "scene_087",
778 | "camera_ids": [
779 | 891,
780 | 892,
781 | 893,
782 | 894,
783 | 895,
784 | 896,
785 | 897,
786 | 898,
787 | 899,
788 | 900,
789 | 901,
790 | 902,
791 | 903,
792 | 904,
793 | 905,
794 | 906
795 | ]
796 | },
797 | {
798 | "scene_name": "scene_088",
799 | "camera_ids": [
800 | 907,
801 | 908,
802 | 909,
803 | 910,
804 | 911,
805 | 912,
806 | 913,
807 | 914,
808 | 915,
809 | 916,
810 | 917,
811 | 918,
812 | 919,
813 | 920,
814 | 921,
815 | 922
816 | ]
817 | },
818 | {
819 | "scene_name": "scene_089",
820 | "camera_ids": [
821 | 923,
822 | 924,
823 | 925,
824 | 926,
825 | 927,
826 | 928,
827 | 929,
828 | 930,
829 | 931,
830 | 932,
831 | 933,
832 | 934,
833 | 935,
834 | 936,
835 | 937
836 | ]
837 | },
838 | {
839 | "scene_name": "scene_090",
840 | "camera_ids": [
841 | 938,
842 | 939,
843 | 940,
844 | 941,
845 | 942,
846 | 943,
847 | 944,
848 | 945,
849 | 946,
850 | 947,
851 | 948,
852 | 949,
853 | 950,
854 | 951,
855 | 952,
856 | 953
857 | ]
858 | }
859 | ]
--------------------------------------------------------------------------------
/detector/aic24_get_detection.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import argparse
3 | import os
4 | import os.path as osp
5 | import time
6 | import cv2
7 | import torch
8 | import numpy as np
9 | import json
10 |
11 | from loguru import logger
12 |
13 | sys.path.append('.')
14 |
15 | from yolox.data.data_augment import preproc
16 | from yolox.exp import get_exp
17 | from yolox.utils import fuse_model, get_model_info, postprocess
18 | from yolox.utils.visualize import plot_tracking
19 | from tracker.bot_sort import BoTSORT
20 | from tracker.tracking_utils.timer import Timer
21 |
22 |
23 | IMAGE_EXT = [".jpg"]
24 | def make_parser():
25 | parser = argparse.ArgumentParser("BoT-SORT Demo!")
26 | parser.add_argument("root_path", type=str, default=None)
27 | parser.add_argument("-s","--scene", default=None, type=str)
28 | #parser.add_argument("demo", default="image", help="demo type, eg. image, video and webcam")
29 | parser.add_argument("-expn", "--experiment-name", type=str, default=None)
30 | parser.add_argument("-n", "--name", type=str, default=None, help="model name")
31 | parser.add_argument("--path", default="", help="path to images or video")
32 | parser.add_argument("--camid", type=int, default=0, help="webcam demo camera id")
33 | parser.add_argument("--save_result", action="store_true",help="whether to save the inference result of image/video")
34 | parser.add_argument("-f", "--exp_file", default="yolox/exps/example/mot/yolox_x_mix_det.py", type=str, help="pls input your expriment description file")
35 | parser.add_argument("-c", "--ckpt", default="bytetrack_x_mot17.pth.tar", type=str, help="ckpt for eval")
36 | parser.add_argument("--device", default="gpu", type=str, help="device to run our model, can either be cpu or gpu")
37 | parser.add_argument("--conf", default=None, type=float, help="test conf")
38 | parser.add_argument("--nms", default=None, type=float, help="test nms threshold")
39 | parser.add_argument("--tsize", default=None, type=int, help="test img size")
40 | parser.add_argument("--fps", default=30, type=int, help="frame rate (fps)")
41 | parser.add_argument("--fp16", dest="fp16", default=False, action="store_true",help="Adopting mix precision evaluating.")
42 | parser.add_argument("--fuse", dest="fuse", default=False, action="store_true", help="Fuse conv and bn for testing.")
43 | parser.add_argument("--trt", dest="trt", default=False, action="store_true", help="Using TensorRT model for testing.")
44 |
45 | # tracking args
46 | parser.add_argument("--track_high_thresh", type=float, default=0.6, help="tracking confidence threshold")
47 | parser.add_argument("--track_low_thresh", default=0.1, type=float, help="lowest detection threshold")
48 | parser.add_argument("--new_track_thresh", default=0.7, type=float, help="new track thresh")
49 | parser.add_argument("--track_buffer", type=int, default=30, help="the frames for keep lost tracks")
50 | parser.add_argument("--match_thresh", type=float, default=0.8, help="matching threshold for tracking")
51 | parser.add_argument("--aspect_ratio_thresh", type=float, default=1.6, help="threshold for filtering out boxes of which aspect ratio are above the given value.")
52 | parser.add_argument('--min_box_area', type=float, default=10, help='filter out tiny boxes')
53 | parser.add_argument("--fuse-score", dest="fuse_score", default=False, action="store_true", help="fuse score and iou for association")
54 |
55 | # CMC
56 | parser.add_argument("--cmc-method", default="orb", type=str, help="cmc method: files (Vidstab GMC) | orb | ecc")
57 |
58 | # ReID
59 | parser.add_argument("--with-reid", dest="with_reid", default=False, action="store_true", help="test mot20.")
60 | parser.add_argument("--fast-reid-config", dest="fast_reid_config", default=r"fast_reid/configs/MOT17/sbs_S50.yml", type=str, help="reid config file path")
61 | parser.add_argument("--fast-reid-weights", dest="fast_reid_weights", default=r"pretrained/mot17_sbs_S50.pth", type=str,help="reid config file path")
62 | parser.add_argument('--proximity_thresh', type=float, default=0.5, help='threshold for rejecting low overlap reid matches')
63 | parser.add_argument('--appearance_thresh', type=float, default=0.25, help='threshold for rejecting low appearance similarity reid matches')
64 | return parser
65 |
66 |
67 | def get_image_list(path):
68 | image_names = []
69 | for maindir, subdir, file_name_list in os.walk(path):
70 | for filename in file_name_list:
71 | apath = osp.join(maindir, filename)
72 | ext = osp.splitext(apath)[1]
73 | if ext in IMAGE_EXT:
74 | image_names.append(apath)
75 | return image_names
76 |
77 |
78 | def write_results(filename, results):
79 | save_format = '{frame},{id},{x1},{y1},{w},{h},{s},-1,-1,-1\n'
80 | with open(filename, 'w') as f:
81 | for frame_id, tlwhs, track_ids, scores in results:
82 | for tlwh, track_id, score in zip(tlwhs, track_ids, scores):
83 | if track_id < 0:
84 | continue
85 | x1, y1, w, h = tlwh
86 | line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1), s=round(score, 2))
87 | f.write(line)
88 | logger.info('save results to {}'.format(filename))
89 |
90 |
91 | class Predictor(object):
92 | def __init__(
93 | self,
94 | model,
95 | exp,
96 | trt_file=None,
97 | decoder=None,
98 | device=torch.device("cpu"),
99 | fp16=False
100 | ):
101 | self.model = model
102 | self.decoder = decoder
103 | self.num_classes = exp.num_classes
104 | self.confthre = exp.test_conf
105 | self.nmsthre = exp.nmsthre
106 | self.test_size = exp.test_size
107 | self.device = device
108 | self.fp16 = fp16
109 | if trt_file is not None:
110 | from torch2trt import TRTModule
111 |
112 | model_trt = TRTModule()
113 | model_trt.load_state_dict(torch.load(trt_file))
114 |
115 | x = torch.ones((1, 3, exp.test_size[0], exp.test_size[1]), device=device)
116 | self.model(x)
117 | self.model = model_trt
118 | self.rgb_means = (0.485, 0.456, 0.406)
119 | self.std = (0.229, 0.224, 0.225)
120 |
121 | def inference(self, img, timer):
122 | img_info = {"id": 0}
123 | if isinstance(img, str):
124 | img_info["file_name"] = osp.basename(img)
125 | img = cv2.imread(img)
126 | else:
127 | img_info["file_name"] = None
128 |
129 | height, width = img.shape[:2]
130 | img_info["height"] = height
131 | img_info["width"] = width
132 | img_info["raw_img"] = img
133 |
134 | img, ratio = preproc(img, self.test_size, self.rgb_means, self.std)
135 | img_info["ratio"] = ratio
136 | img = torch.from_numpy(img).unsqueeze(0).float().to(self.device)
137 | if self.fp16:
138 | img = img.half() # to FP16
139 |
140 | with torch.no_grad():
141 | timer.tic()
142 | outputs = self.model(img)
143 | if self.decoder is not None:
144 | outputs = self.decoder(outputs, dtype=outputs.type())
145 | outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
146 | return outputs, img_info
147 |
148 |
149 | def image_demo(predictor, vis_folder, current_time, args):
150 |
151 | root_path = args.root_path
152 | scene = args.scene
153 | input = osp.join(root_path, "Original", scene)
154 | cameras = []
155 | for f in os.listdir(input):
156 | if os.path.isdir(os.path.join(input, f)):
157 | cameras.append(f)
158 | cameras = sorted(cameras)
159 | scale = min(800/1080,1440/1920)
160 | for cam in cameras:
161 | imgs = sorted(os.listdir(osp.join(input, cam, 'Frame')))
162 | timer = Timer()
163 | output = osp.join(root_path,'Detection', '{}.txt'.format(osp.join(scene, cam)))
164 | outjson = osp.join(root_path,'Detection', '{}.json'.format(osp.join(scene, cam)))
165 | if not os.path.isdir(osp.join(root_path,'Detection',scene)):
166 | os.makedirs(osp.join(root_path,'Detection',scene))
167 | u_num = 0
168 | ret_json = {}
169 | results = []
170 | for frame_id, img_path in enumerate(imgs, 1):
171 | img_path = osp.join(input, cam, 'Frame',img_path)
172 |
173 | # Detect objects
174 | outputs, img_info = predictor.inference(img_path, timer)
175 |
176 | detections = []
177 | if outputs[0] is not None:
178 | outputs = outputs[0].cpu().numpy()
179 | detections = outputs[:, :7]
180 | detections[:, :4] /= scale
181 | detections = detections[detections[:,4]>0.1]
182 | timer.toc()
183 | else:
184 | timer.toc()
185 |
186 | for det in detections:
187 | x1,y1,x2,y2,score,_,_ = det
188 | x1 = max(0,x1)
189 | y1 = max(0,y1)
190 | x2 = min(1920,x2)
191 | y2 = min(1080,y2)
192 | results.append([cam,frame_id,1,int(x1),int(y1),int(x2),int(y2),score])
193 | det_json = {}
194 | det_json['Frame'] = frame_id
195 | det_json['ImgPath'] = img_path.replace(root_path + '/','')
196 | det_json['NpyPath'] = ''
197 | Coordinate = {'x1':int(x1), 'y1':int(y1), 'x2': int(x2), 'y2': int(y2)}
198 | det_json['Coordinate'] = Coordinate
199 | det_json['ClusterID'] = None
200 | det_json['OfflineID'] = None
201 | ret_json[str(u_num).zfill(8)] = det_json
202 | u_num += 1
203 |
204 | if frame_id % 1000 == 0:
205 | logger.info('Processing cam {} frame {} ({:.2f} fps)'.format(cam, frame_id, 1. / max(1e-5, timer.average_time)))
206 |
207 | with open(output,'a') as f:
208 | for cam,frame_id,cls,x1,y1,x2,y2,score in results:
209 | f.write('{},{},{},{},{},{},{},{}\n'.format(cam,frame_id,cls,x1,y1,x2,y2,score))
210 | with open(outjson, 'a') as f:
211 | json.dump(ret_json, f, ensure_ascii=False)
212 |
213 | def imageflow_demo(predictor, vis_folder, current_time, args):
214 | cap = cv2.VideoCapture(args.path if args.demo == "video" else args.camid)
215 | width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float
216 | height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float
217 | fps = cap.get(cv2.CAP_PROP_FPS)
218 | timestamp = time.strftime("%Y_%m_%d_%H_%M_%S", current_time)
219 | save_folder = osp.join(vis_folder, timestamp)
220 | os.makedirs(save_folder, exist_ok=True)
221 | if args.demo == "video":
222 | save_path = osp.join(save_folder, args.path.split("/")[-1])
223 | else:
224 | save_path = osp.join(save_folder, "camera.mp4")
225 | logger.info(f"video save_path is {save_path}")
226 | vid_writer = cv2.VideoWriter(
227 | save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height))
228 | )
229 | tracker = BoTSORT(args, frame_rate=args.fps)
230 | timer = Timer()
231 | frame_id = 0
232 | results = []
233 | while True:
234 | if frame_id % 20 == 0:
235 | logger.info('Processing frame {} ({:.2f} fps)'.format(frame_id, 1. / max(1e-5, timer.average_time)))
236 | ret_val, frame = cap.read()
237 | if ret_val:
238 | # Detect objects
239 | outputs, img_info = predictor.inference(frame, timer)
240 | scale = min(exp.test_size[0] / float(img_info['height'], ), exp.test_size[1] / float(img_info['width']))
241 |
242 | if outputs[0] is not None:
243 | outputs = outputs[0].cpu().numpy()
244 | detections = outputs[:, :7]
245 | detections[:, :4] /= scale
246 |
247 | # Run tracker
248 | online_targets = tracker.update(detections, img_info["raw_img"])
249 |
250 | online_tlwhs = []
251 | online_ids = []
252 | online_scores = []
253 | for t in online_targets:
254 | tlwh = t.tlwh
255 | tid = t.track_id
256 | vertical = tlwh[2] / tlwh[3] > args.aspect_ratio_thresh
257 | if tlwh[2] * tlwh[3] > args.min_box_area and not vertical:
258 | online_tlwhs.append(tlwh)
259 | online_ids.append(tid)
260 | online_scores.append(t.score)
261 | results.append(
262 | f"{frame_id},{tid},{tlwh[0]:.2f},{tlwh[1]:.2f},{tlwh[2]:.2f},{tlwh[3]:.2f},{t.score:.2f},-1,-1,-1\n"
263 | )
264 | timer.toc()
265 | online_im = plot_tracking(
266 | img_info['raw_img'], online_tlwhs, online_ids, frame_id=frame_id + 1, fps=1. / timer.average_time
267 | )
268 | else:
269 | timer.toc()
270 | online_im = img_info['raw_img']
271 | if args.save_result:
272 | vid_writer.write(online_im)
273 | ch = cv2.waitKey(1)
274 | if ch == 27 or ch == ord("q") or ch == ord("Q"):
275 | break
276 | else:
277 | break
278 | frame_id += 1
279 |
280 | if args.save_result:
281 | res_file = osp.join(vis_folder, f"{timestamp}.txt")
282 | with open(res_file, 'w') as f:
283 | f.writelines(results)
284 | logger.info(f"save results to {res_file}")
285 |
286 |
287 | def main(exp, args):
288 | if not args.experiment_name:
289 | args.experiment_name = exp.exp_name
290 |
291 | output_dir = osp.join(exp.output_dir, args.experiment_name)
292 | os.makedirs(output_dir, exist_ok=True)
293 |
294 | if args.save_result:
295 | vis_folder = osp.join(output_dir, "track_vis")
296 | os.makedirs(vis_folder, exist_ok=True)
297 |
298 | if args.trt:
299 | args.device = "gpu"
300 | args.device = torch.device("cuda" if args.device == "gpu" else "cpu")
301 |
302 | logger.info("Args: {}".format(args))
303 |
304 | if args.conf is not None:
305 | exp.test_conf = args.conf
306 | if args.nms is not None:
307 | exp.nmsthre = args.nms
308 | if args.tsize is not None:
309 | exp.test_size = (args.tsize, args.tsize)
310 |
311 | model = exp.get_model().to(args.device)
312 | logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size)))
313 | model.eval()
314 |
315 | if not args.trt:
316 | if args.ckpt is None:
317 | ckpt_file = osp.join(output_dir, "best_ckpt.pth.tar")
318 | else:
319 | ckpt_file = args.ckpt
320 | logger.info("loading checkpoint")
321 | ckpt = torch.load(ckpt_file, map_location="cpu")
322 | # load the model state dict
323 | model.load_state_dict(ckpt["model"])
324 | logger.info("loaded checkpoint done.")
325 |
326 | if args.fuse:
327 | logger.info("\tFusing model...")
328 | model = fuse_model(model)
329 |
330 | if args.fp16:
331 | model = model.half() # to FP16
332 |
333 | if args.trt:
334 | assert not args.fuse, "TensorRT model is not support model fusing!"
335 | trt_file = osp.join(output_dir, "model_trt.pth")
336 | assert osp.exists(
337 | trt_file
338 | ), "TensorRT model is not found!\n Run python3 tools/trt.py first!"
339 | model.head.decode_in_inference = False
340 | decoder = model.head.decode_outputs
341 | logger.info("Using TensorRT to inference")
342 | else:
343 | trt_file = None
344 | decoder = None
345 |
346 | predictor = Predictor(model, exp, trt_file, decoder, args.device, args.fp16)
347 | current_time = time.localtime()
348 |
349 | image_demo(predictor, None, current_time, args)
350 |
351 |
352 | if __name__ == "__main__":
353 | args = make_parser().parse_args()
354 | exp = get_exp(args.exp_file, args.name)
355 |
356 | args.ablation = False
357 | args.mot20 = not args.fuse_score
358 |
359 | main(exp, args)
360 |
--------------------------------------------------------------------------------
/tracking/src/scpt.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import sys
4 | from sklearn.cluster import DBSCAN
5 | from sklearn.metrics.pairwise import cosine_similarity
6 | from itertools import combinations, permutations, product, chain
7 | from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
8 | from scipy.spatial.distance import squareform
9 | from scipy.interpolate import RegularGridInterpolator
10 | from collections import Counter
11 |
12 |
13 | def create_centrality_matrix(clusters, similarity_matrix,frames,**kwargs):
14 | # translate the similarity matrix between each node into the centrality matrix between each cluster
15 | remove_noise_cluster = kwargs.get('remove_noise_cluster', True)
16 | epsilon = kwargs.get('epsilon', 0.3)
17 |
18 | unique_clusters = sorted(list(set(clusters)))
19 | if remove_noise_cluster:
20 | if -1 in unique_clusters:
21 | unique_clusters.remove(-1)
22 |
23 | centrality_matrix = np.ones((len(unique_clusters),len(unique_clusters)))*-1
24 | np.fill_diagonal(centrality_matrix, 0)
25 |
26 | cluster_frames_dict = {cluster:[] for cluster in unique_clusters}
27 | if remove_noise_cluster:
28 | [cluster_frames_dict[cluster].append(frame) for frame,cluster in zip(frames,clusters) if cluster != -1]
29 | else:
30 | [cluster_frames_dict[cluster].append(frame) for frame,cluster in zip(frames,clusters)]
31 |
32 | for i in range(len(unique_clusters)):
33 | cluster1 = unique_clusters[i]
34 | cluster1_frames = cluster_frames_dict[cluster1]
35 | cluster1_indices = [k for k,cluster in enumerate(clusters) if cluster ==cluster1] #indices of similarity_matrix
36 | for j in range(i+1,len(unique_clusters)):
37 | cluster2 = unique_clusters[j]
38 | cluster2_frames = cluster_frames_dict[cluster2]
39 | common_frames = set(cluster1_frames).intersection(set(cluster2_frames))
40 | if len(common_frames) > 0: continue
41 | cluster2_indices = [k for k,cluster in enumerate(clusters) if cluster ==cluster2]
42 | similarities = similarity_matrix[np.ix_(cluster1_indices, cluster2_indices)]
43 | centrality = np.sum(similarities[similarities > (1 - epsilon)])
44 | centrality_matrix[i,j] = centrality
45 | centrality_matrix[j,i] = centrality
46 | return centrality_matrix
47 |
48 | def associate_cluster(clusters,centrality_matrix,**kwargs):
49 | # perform hierarchical clustering that targets clusters.
50 | epsilon = kwargs.get('epsilon', 0.3)
51 | remove_noise_cluster = kwargs.get('remove_noise_cluster', True)
52 | cost_function = kwargs.get('cost_function', 1)
53 | minimize = kwargs.get("minimize",True)
54 | """
55 | cost_function:1 ⇒ single linkage like
56 | cost_function:2 ⇒ average linkage like
57 | """
58 | np.fill_diagonal(centrality_matrix, 0)
59 | clusters = np.array(clusters)
60 | unique_clusters = np.sort(np.unique(clusters))
61 | if remove_noise_cluster:
62 | if -1 in unique_clusters:
63 | unique_clusters = unique_clusters[unique_clusters != -1]
64 |
65 | if cost_function == 1:
66 | pass
67 | elif cost_function == 2:
68 | count = Counter(clusters)
69 | if remove_noise_cluster:
70 | if -1 in count.keys():
71 | del count[-1]
72 | centrality = np.max(centrality_matrix)
73 |
74 | th = 1 - epsilon
75 | while centrality > th:
76 | if cost_function == 1:
77 | max_index = np.argmax(centrality_matrix)
78 | elif cost_function == 2:
79 | len_element_matrix = np.outer(list(count.values()),list(count.values()))
80 | averaged_centrality_matrix = np.multiply(centrality_matrix,1/len_element_matrix)
81 | np.fill_diagonal(averaged_centrality_matrix, 0)
82 | max_index = np.argmax(averaged_centrality_matrix)
83 |
84 | cluster1_index, cluster2_index = np.unravel_index(max_index, centrality_matrix.shape)
85 | cluster1 = unique_clusters[cluster1_index]
86 | cluster2 = unique_clusters[cluster2_index]
87 | if cost_function == 1 or cost_function == 3:
88 | centrality = centrality_matrix[cluster1_index, cluster2_index]
89 | elif cost_function == 2:
90 | centrality = averaged_centrality_matrix[cluster1_index, cluster2_index]
91 |
92 | if centrality > th:
93 | target_row = centrality_matrix[[cluster1_index,cluster2_index],:]
94 | sum_row = np.sum(target_row,axis=0)
95 | if minimize:
96 | mask = np.min(target_row, axis=0)
97 | sum_row = np.where(mask < 0, -1, sum_row)
98 | centrality_matrix[:, cluster1_index] = sum_row
99 | centrality_matrix[cluster1_index,:] = sum_row
100 |
101 | next_indices = np.arange(len(unique_clusters))
102 | next_indices = next_indices[next_indices != cluster2_index]
103 | centrality_matrix = centrality_matrix[np.ix_(next_indices,next_indices)]
104 | np.fill_diagonal(centrality_matrix, 0)
105 | clusters = np.where(clusters == cluster2, cluster1, clusters)
106 | unique_clusters = unique_clusters[unique_clusters != cluster2]
107 |
108 | if cost_function == 2:
109 | count[cluster1] += count[cluster2]
110 | del count[cluster2]
111 | else:
112 | break
113 | return clusters
114 |
115 |
116 | def get_initial_index(distance_matrix,overlap_indices_list):
117 | # determines the initial index for the assignment problem.
118 | distances = []
119 | for overlap_indices in overlap_indices_list:
120 | min_distance = 2
121 | for index1,index2 in combinations(overlap_indices,2): #
122 | distance = distance_matrix[index1,index2]
123 | min_distance = distance if distance < min_distance else min_distance
124 | distances.append(min_distance)
125 | max_index = np.argmax(distances)
126 | return max_index
127 |
128 |
129 | def fill_none(lst):
130 | # fill "None" to the missing value in sequential number list
131 | used_nums = [num for num in lst if num is not None]
132 | unused_nums = [num for num in range(len(lst)) if num not in used_nums]
133 | for i in range(len(lst)):
134 | if lst[i] is None:
135 | lst[i] = unused_nums.pop(0)
136 | return lst
137 |
138 | def get_candidates_indices_list(similarity_matrix,subcluster_indices_list,overlap_indices_list,epsilon,**kwargs):
139 | # get candidates of the assignment problem
140 | num_candidates = kwargs.get('num_candidates', 10)
141 |
142 | if len(overlap_indices_list) < num_candidates:
143 | candidates_indices_list = overlap_indices_list
144 | else:
145 | np.fill_diagonal(similarity_matrix, 0)
146 | flatten_subcluster_indices = list(chain.from_iterable(subcluster_indices_list))
147 | tmp_similarity_matrix = similarity_matrix[flatten_subcluster_indices]
148 |
149 | max_similarities =np.max(tmp_similarity_matrix,axis=0)
150 | neighbor_indices = np.where(max_similarities > (1-epsilon))[0]
151 | sorted_indices = np.argsort(max_similarities[neighbor_indices])[::-1]
152 | neighbor_indices = neighbor_indices[sorted_indices]
153 |
154 | if len(neighbor_indices) > num_candidates:
155 | neighbor_indices = neighbor_indices[:num_candidates]
156 | neighbor_indices = neighbor_indices.tolist()
157 |
158 | candidates_indices_list = []
159 | for neighbor_index in neighbor_indices:
160 | for overlap_indices in overlap_indices_list:
161 | if neighbor_index not in overlap_indices: continue
162 | candidates_indices_list.append(overlap_indices)
163 | for overlap_index in overlap_indices:
164 | try:
165 | neighbor_indices.remove(overlap_index)
166 | except:
167 | pass
168 | return candidates_indices_list
169 |
170 | def agglomerative_clustering(distance_matrix,**kwargs):
171 | # perform agglomerative hierarchical clustering
172 | epsilon = kwargs.get('epsilon', 0.3)
173 | metric = kwargs.get('metric','cosine')
174 | np.fill_diagonal(distance_matrix, 0)
175 | linked = linkage(squareform(distance_matrix), method='single', metric=metric)
176 | clusters = list(fcluster(linked, epsilon, criterion='distance')) # min(clusters)=1
177 | return clusters
178 |
179 | def bipartite_matching(new_key,centrality_dict,centrality_matrix,overlap_indices,**kwargs):
180 | # bipartite matching between unclustered overlap nodes and clustered overlap nodes
181 | epsilon = kwargs.get('epsilon', 0.3)
182 |
183 | sum_centrality = 0
184 | subcluster_indices = [None]*len(overlap_indices)
185 | th = 1-epsilon
186 | while np.max(centrality_matrix) > th:
187 | max_index = np.argmax(centrality_matrix)
188 | row_index, col_index = np.unravel_index(max_index, centrality_matrix.shape)
189 | centrality = centrality_matrix[row_index, col_index]
190 | sum_centrality += centrality
191 | subcluster_indices[row_index] = col_index
192 | centrality_matrix[row_index,:]=0
193 | centrality_matrix[:,col_index]=0
194 | centrality_dict[new_key] = {"overlap_indices":overlap_indices,"indices":subcluster_indices,"centrality":sum_centrality}
195 |
196 | return centrality_dict
197 |
198 |
199 | def separate_into_subcluster(tmp_clusters, overlap_indices_list, distance_matrix,**kwargs):
200 | # overlap nodes are separated into subclusters
201 | epsilon = kwargs.get('epsilon', 0.3)
202 | matching_algo_th = kwargs.get('matching_algo_th', 0)
203 | debug = kwargs.get('debug', False)
204 |
205 | max_overlap = max([len(i) for i in overlap_indices_list]) #the number of overlap in the same frame
206 | initial_index = get_initial_index(distance_matrix,overlap_indices_list) #index of overlap_indices_list
207 | initial_node_indices = overlap_indices_list[initial_index]
208 | del overlap_indices_list[initial_index]
209 |
210 | subcluster_indices_list = [[] for _ in range(max_overlap)]
211 | [subcluster_indices_list[i].append(initial_node_index) for i,initial_node_index in enumerate(initial_node_indices)]
212 |
213 | similarity_matrix = 1-distance_matrix
214 | np.fill_diagonal(similarity_matrix, 0)
215 |
216 | # separte overlap nodes into several groups
217 | while len(overlap_indices_list) != 0:
218 | centrality_dict = {}
219 | max_centrality = 0
220 |
221 | candidates_indices_list = get_candidates_indices_list(similarity_matrix,subcluster_indices_list,overlap_indices_list,epsilon)
222 | for i,overlap_indices in enumerate(candidates_indices_list):
223 | centrality_matrix = np.zeros((len(overlap_indices),len(subcluster_indices_list))) #can not use create_centrality_matrix
224 | for j, overlap_index in enumerate(overlap_indices):
225 | tmp_similarity_matrix = similarity_matrix[overlap_index]
226 | for k, subcluster_indices in enumerate(subcluster_indices_list):
227 | similarities = tmp_similarity_matrix[subcluster_indices]
228 | centrality = np.sum(similarities[similarities > (1 - epsilon)])
229 | centrality_matrix[j,k] = centrality
230 |
231 | centrality_dict = bipartite_matching(i,centrality_dict,centrality_matrix,overlap_indices,epsilon=epsilon)
232 |
233 | max_centrality = 0 if centrality_dict == {} else np.max([value["centrality"] for value in centrality_dict.values()])
234 |
235 | if max_centrality == 0:
236 | max_index = get_initial_index(distance_matrix,overlap_indices_list)
237 | max_subcluster_indices = list(range(max_overlap))
238 | overlap_indices = overlap_indices_list[max_index]
239 | else:
240 | max_index = [key for key,value in zip(centrality_dict,centrality_dict.values()) if value["centrality"]==max_centrality][0]
241 | max_subcluster_indices = list(centrality_dict[max_index]["indices"])
242 | if None in max_subcluster_indices:
243 | max_subcluster_indices = fill_none(max_subcluster_indices)
244 | overlap_indices = centrality_dict[max_index]["overlap_indices"]
245 | [subcluster_indices_list[max_subcluster_index].append(overlap_index) for max_subcluster_index,overlap_index in zip(max_subcluster_indices,overlap_indices)]
246 | overlap_indices_list.remove(overlap_indices)
247 |
248 | # assign cluster ID
249 | for subcluster_indices in subcluster_indices_list:
250 | if len(subcluster_indices) == 1:
251 | tmp_clusters[subcluster_indices[0]] = np.max(tmp_clusters)+1
252 | else:
253 | sub_clusters = agglomerative_clustering(distance_matrix[np.ix_(subcluster_indices, subcluster_indices)],epsilon=epsilon)
254 | sub_clusters = [sub_cluster+max(tmp_clusters) for sub_cluster in sub_clusters]
255 | for sub_cluster,sub_cluster_index in zip(sub_clusters,subcluster_indices):
256 | tmp_clusters[sub_cluster_index] = sub_cluster
257 | return tmp_clusters
258 |
259 | def overlap_suppression_clustering(distance_matrix,frames,nonoverlap_indices,overlap_indices_list,**kwargs): #overlap_indices_list,
260 | epsilon = kwargs.get('epsilon', 0.3)
261 | debug = kwargs.get('debug', False)
262 | clusters = [-1]*len(frames)
263 |
264 | # clustering for non-overlapping nodes
265 | if nonoverlap_indices != []:
266 | if len(nonoverlap_indices) > 1:
267 | nonoverlap_clusters = agglomerative_clustering(distance_matrix[np.ix_(nonoverlap_indices,nonoverlap_indices)], epsilon=epsilon)
268 | else:
269 | nonoverlap_clusters = [0]
270 | for k,target_index in enumerate(nonoverlap_indices):
271 | clusters[target_index] = nonoverlap_clusters[k]
272 |
273 | # clustering for overlapping nodes
274 | clusters = separate_into_subcluster(clusters, overlap_indices_list, distance_matrix,epsilon=epsilon,debug=debug)
275 |
276 | similarity_matrix = 1 - distance_matrix
277 | centrality_matrix = create_centrality_matrix(clusters,similarity_matrix,frames,epsilon=epsilon)
278 |
279 | # merging for subcluster
280 | clusters = associate_cluster(clusters,centrality_matrix, epsilon=epsilon)
281 |
282 | return clusters
283 |
284 | def divide_overlap_or_nonoverlap(cluster_frames,cluster_indices):
285 |
286 | frame_indices_dict = {frame:[] for frame in sorted(list(set(cluster_frames)))}
287 | [frame_indices_dict[frame].append(index) for index,frame in zip(cluster_indices,cluster_frames)]
288 | overlap_indices_list = [indices for indices in frame_indices_dict.values() if len(indices) > 1]
289 | flattened_overlap_indices = sum(overlap_indices_list, [])
290 | nonoverlap_indices = [index for index in cluster_indices if index not in flattened_overlap_indices]
291 |
292 | return overlap_indices_list, nonoverlap_indices
293 |
294 | def reclustering_overlap_cluster(distance_matrix,tracking_dict,serials,clusters,**kwargs):
295 | epsilon = kwargs.get('epsilon', 0.3)
296 | debug = kwargs.get('debug', False)
297 |
298 | frames = [tracking_dict[serial]["Frame"] for serial in serials]
299 |
300 | cluster_frame_dict = {cluster:[] for cluster in set(clusters)} #20240418 add set()
301 | [cluster_frame_dict[cluster].append(frame) for cluster,frame in zip(clusters,frames)]
302 | cluster_indices_dict = {cluster:[] for cluster in set(clusters)} #20240418 add set()
303 | [cluster_indices_dict[cluster].append(i) for i,cluster in enumerate(clusters)]
304 |
305 | for cluster in cluster_frame_dict:
306 | cluster_frames = cluster_frame_dict[cluster]
307 | if len(list(set(cluster_frames))) == len(cluster_frames):continue
308 | cluster_indices = cluster_indices_dict[cluster]
309 |
310 | #divide overlap/nonoverlap
311 | overlap_indices_list, nonovelap_indices = divide_overlap_or_nonoverlap(cluster_frames,cluster_indices)
312 |
313 | tmp_clusters = overlap_suppression_clustering(distance_matrix,frames,nonovelap_indices,overlap_indices_list,epsilon=epsilon,debug=debug)
314 |
315 | max_cluster_id = np.max(clusters)
316 | for index,tmp_cluster in enumerate(tmp_clusters):
317 | if clusters[index] != cluster: continue
318 | clusters[index] = max_cluster_id + tmp_cluster + 1
319 | return clusters
320 |
321 | def create_similarity_matrix_scpt(serials, tracking_dict, epsilon):
322 | # create a similarity matrix from features
323 | for n,serial in enumerate(serials):
324 | feature = np.load(tracking_dict[serial]["NpyPath"])
325 | if n==0: feature_stack = np.empty((0,len(feature.flatten())))
326 | feature_stack = np.append(feature_stack , feature.reshape(1,-1) , axis=0)
327 | similarity_matrix = cosine_similarity(feature_stack)
328 | similarity_matrix = similarity_matrix.astype(np.float16)
329 |
330 | similarity_matrix = np.where(similarity_matrix < (1-epsilon),0,similarity_matrix)
331 | return similarity_matrix
332 |
333 |
334 | def tracking_by_clustering(tracking_dict,serials,**kwargs):
335 | min_samples = kwargs.get('min_samples', 4)
336 | epsilon = kwargs.get('epsilon_scpt', 0.3)
337 | overlap_suppression = kwargs.get('overlap_suppression', True)
338 | debug = kwargs.get('debug', False)
339 | clustering_method = kwargs.get('clustering_method', "agglomerative")
340 |
341 | if len(serials) ==1:
342 | clusters = [0]
343 | else:
344 | similarity_matrix = create_similarity_matrix_scpt(serials,tracking_dict,epsilon)
345 |
346 | np.fill_diagonal(similarity_matrix, 1)
347 | distance_matrix = 1 - similarity_matrix
348 | if clustering_method == "agglomerative":
349 | clusters = agglomerative_clustering(distance_matrix,epsion=epsilon) #min(clusters)=1
350 |
351 | elif clustering_method == "dbscan":
352 | dbscan = DBSCAN(eps=epsilon,min_samples=min_samples,metric="precomputed")
353 | clusters = dbscan.fit_predict(distance_matrix)
354 | coreindices = dbscan.core_sample_indices_
355 | clusters = [cluster if cluster != -1 else -i for i,cluster in enumerate(clusters)]
356 | unique_clusters = list(set(clusters))
357 | new_clusterid_dict = {key:i for i,key in enumerate(unique_clusters)}
358 | clusters = [new_clusterid_dict[old_cluster] for old_cluster in clusters]
359 |
360 | if overlap_suppression == True:
361 | clusters = reclustering_overlap_cluster(distance_matrix,tracking_dict,serials,clusters,epsilon=epsilon,debug=debug)
362 |
363 | unique_clusters = list(set(clusters))
364 | new_clusterid_dict = {key:i for i,key in enumerate(unique_clusters)}
365 | clusters = [new_clusterid_dict[old_cluster] for old_cluster in clusters]
366 |
367 | return clusters
368 |
369 | def associate_cluster_between_period(tracking_dict,clusters,serials,past_serials,**kwargs):
370 | # associate clusters between adjacent time periods
371 | epsilon = kwargs.get('epsilon_scpt', 0.3)
372 | frames = [tracking_dict[serial]["Frame"] for serial in serials]
373 | past_frames = [tracking_dict[serial]["Frame"] for serial in past_serials]
374 | offline_ids = [tracking_dict[serial]["OfflineID"] for serial in past_serials]
375 |
376 | unique_offline_ids = list(set(offline_ids))
377 | unique_clusters = list(set(clusters))
378 |
379 | all_serials = past_serials + serials
380 | all_clusters = offline_ids + clusters
381 | all_unique_clusters = sorted(unique_offline_ids + unique_clusters)
382 | all_frames = past_frames + frames
383 |
384 | similarity_matrix = create_similarity_matrix_scpt(all_serials, tracking_dict,epsilon)
385 |
386 | centrality_matrix = create_centrality_matrix(all_clusters, similarity_matrix,all_frames,epsilon=epsilon)
387 | del similarity_matrix
388 | np.fill_diagonal(centrality_matrix, 0)
389 |
390 | all_clusters = associate_cluster(all_clusters,centrality_matrix,epsilon=epsilon)
391 |
392 | for serial,cluster in zip(all_serials,all_clusters):
393 | tracking_dict[serial]["OfflineID"] = int(cluster)
394 | return tracking_dict
395 |
396 | def get_overlap_coefficient(rectangle1, rectangle2):
397 | # meaure spatially overlap_coefficient
398 | overlap_width = min(rectangle1[2], rectangle2[2]) - max(rectangle1[0], rectangle2[0])
399 | overlap_height = min(rectangle1[3], rectangle2[3]) - max(rectangle1[1], rectangle2[1])
400 | overlap_area = max(overlap_width, 0) * max(overlap_height, 0)
401 | rectangle1_area = (rectangle1[2] - rectangle1[0]) * (rectangle1[3] - rectangle1[1])
402 | rectangle2_area = (rectangle2[2] - rectangle2[0]) * (rectangle2[3] - rectangle2[1])
403 | #iou = overlap_area / (rectangle1_area + rectangle2_area - overlap_area)
404 | overlap_coefficient = overlap_area / min(rectangle1_area,rectangle2_area)
405 | return overlap_coefficient
406 |
407 | def sequential_non_maximum_suppression(tracking_dict,**kwargs):
408 | #Sequential NMS is perfomed in this function.
409 | #Sequential NMS calculates the overlap coefficient both temporally and spatially.
410 | temporally_snms_th = kwargs.get('temporally_snms_th', 0.6)
411 | spatially_snms_th = kwargs.get('spatially_snms_th', 0.6)
412 | remove_noise_cluster = kwargs.get('remove_noise_cluster', True)
413 | merge_nonoverlap = kwargs.get('merge_nonoverlap', True)
414 |
415 | offline_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict.keys()]
416 | unique_offline_ids = sorted(list(set(offline_ids)))
417 | if remove_noise_cluster:
418 | if min(unique_offline_ids) == -1:
419 | unique_offline_ids.remove(-1)
420 |
421 | offline_id_serial_dict = {offline_id:[] for offline_id in unique_offline_ids}
422 | [offline_id_serial_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict.keys() if tracking_dict[serial]["OfflineID"] != -1]
423 | offline_id_frame_dict = {offline_id:[] for offline_id in unique_offline_ids}
424 | [offline_id_frame_dict[tracking_dict[serial]["OfflineID"]].append(tracking_dict[serial]["Frame"]) for serial in tracking_dict.keys() if tracking_dict[serial]["OfflineID"] != -1]
425 |
426 | for offline_id1, offline_id2 in combinations(unique_offline_ids,2):
427 |
428 | id1_frames = offline_id_frame_dict[offline_id1]
429 | id2_frames = offline_id_frame_dict[offline_id2]
430 | overlap_frames = set(id1_frames).intersection(set(id2_frames))
431 |
432 | if len(id1_frames) < len(id2_frames):
433 | (offline_id1,offline_id2) = (offline_id2,offline_id1)
434 | (id1_frames,id2_frames) = (id2_frames,id1_frames)
435 |
436 | if max(len(overlap_frames)/len(id1_frames),len(overlap_frames)/len(id2_frames)) 1:
491 | weighted_cumsum = alpha*weighted_cumsum+(1-alpha)*np.array([delta_x[t-1],delta_y[t-1]])
492 | if frame not in frames:
493 | continue
494 | current_position = interpolaterd_trajectory[t]
495 | past_position = interpolaterd_trajectory[t-1]
496 | pred_current_position = current_position + weighted_cumsum
497 | distance = np.sqrt(np.square(current_position[0] - pred_current_position[0])+np.square(current_position[1] - pred_current_position[1]))
498 | if distance > warp_th:
499 | break
500 | last_frame = frame
501 | if last_frame != max(frames):
502 | split_index = frames.index(last_frame)
503 | return split_index
504 |
505 | def separate_warp_tracklet(tracking_dict,**kwargs):
506 | # separate warp tracklets based on motion feature.
507 | remove_noise_cluster = kwargs.get('remove_noise_cluster', True)
508 | warp_th = kwargs.get('warp_th', 50)
509 |
510 | offline_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict.keys()]
511 | unique_offline_ids = sorted(list(set(offline_ids)))
512 | if remove_noise_cluster:
513 | if min(unique_offline_ids) == -1:
514 | unique_offline_ids.remove(-1)
515 |
516 | offline_id_serial_dict = {offline_id:[] for offline_id in unique_offline_ids}
517 | [offline_id_serial_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1]
518 |
519 | max_offline_id = max(unique_offline_ids)
520 |
521 | while len(unique_offline_ids) > 0:
522 | offline_id = unique_offline_ids.pop(0)
523 | serials = offline_id_serial_dict[offline_id]
524 | if len(serials) <= 2:
525 | continue
526 | frames = [tracking_dict[serial]["Frame"] for serial in serials]
527 | if len(frames) != len(set(frames)):
528 | print(f"offline_id{offline_id} contains overlap")
529 | continue
530 | frames, serials = zip(*sorted(zip(frames, serials))) #sort by frame
531 | pos_list = [tracking_dict[serial]["Coordinate"] for serial in serials]
532 | trajectory = [((pos["x1"]+pos["x2"])/2,pos["y2"]) for pos in pos_list]
533 | split_index = get_warp_index(frames,trajectory,warp_th=warp_th)
534 |
535 | if split_index != None:
536 | split_serials = serials[split_index:]
537 | max_offline_id += 1
538 | unique_offline_ids.append(max_offline_id)
539 | offline_id_serial_dict[max_offline_id] = split_serials
540 | for serial in split_serials:
541 | tracking_dict[serial]["OfflineID"] = max_offline_id
542 | return tracking_dict
543 |
544 | def exclude_short_tracklet(tracking_dict,**kwargs):
545 | # exclude tracklet that contains only a little serials from tracking_dict
546 | short_tracklet_th = kwargs.get('short_tracklet_th', 5)
547 |
548 | offline_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict]
549 | unique_offline_ids = sorted(list(set(offline_ids)))
550 | if min(unique_offline_ids) == -1: unique_offline_ids.remove(-1)
551 |
552 | offline_id_serial_dict = {offlineID:[] for offlineID in unique_offline_ids} #OnlineIDからserialを検索するDict
553 | [offline_id_serial_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1]
554 |
555 | for offline_id in unique_offline_ids:
556 | serials = offline_id_serial_dict[offline_id]
557 | if len(serials) <= short_tracklet_th:
558 | for serial in serials:
559 | tracking_dict[serial]["OfflineID"] = -1
560 | return tracking_dict
561 |
562 | def exclude_motionless_tracklet(tracking_dict,**kwargs):
563 | # exclude tracklet from tracking_dict
564 | stop_track_th = kwargs.get('stop_track_th', 25)
565 |
566 | offline_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict]
567 | unique_offline_ids = sorted(list(set(offline_ids)))
568 | if min(unique_offline_ids) == -1: unique_offline_ids.remove(-1)
569 |
570 | offline_id_serial_dict = {offlineID:[] for offlineID in unique_offline_ids} #OnlineIDからserialを検索するDict
571 | [offline_id_serial_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1]
572 |
573 | for offline_id in unique_offline_ids:
574 | serials = offline_id_serial_dict[offline_id]
575 | pos_list = [tracking_dict[serial]["Coordinate"] for serial in serials]
576 | x_pos_list = [(pos["x1"]+pos["x2"])/2 for pos in pos_list]
577 | y_pos_list = [pos["y2"] for pos in pos_list]
578 | x_min = np.min(x_pos_list)
579 | x_max = np.max(x_pos_list)
580 | y_min = np.min(y_pos_list)
581 | y_max = np.max(y_pos_list)
582 | if (x_max-x_min < stop_track_th) and (y_max-y_min < stop_track_th):
583 | for serial in serials:
584 | tracking_dict[serial]["OfflineID"] = -1
585 |
586 | return tracking_dict
587 |
--------------------------------------------------------------------------------
/tracking/src/mcpt.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | import json
4 | import numpy as np
5 | from datetime import datetime
6 | from collections import Counter
7 | from sklearn.cluster import DBSCAN, AgglomerativeClustering
8 | from sklearn.metrics.pairwise import cosine_similarity
9 | from sklearn.metrics import pairwise_distances
10 | from scipy.interpolate import RegularGridInterpolator
11 | from itertools import combinations
12 | from scipy.stats import mode
13 | from scipy.spatial.distance import pdist, squareform
14 |
15 | from scpt import associate_cluster,agglomerative_clustering
16 | import pose
17 |
18 | """
19 | Definitions for clustering to muilt-camera tracking.
20 | """
21 |
22 | def get_max_value_of_dict(dictionary, key):
23 | # get max value of any key from nested dictionary
24 | max_value = float('-inf')
25 | for k, v in dictionary.items():
26 | if isinstance(v, dict):
27 | max_value = max(max_value, get_max_value_of_dict(v, key))
28 | elif k == key:
29 | max_value = max(max_value, v)
30 | return max_value
31 |
32 | def create_similarity_matrix_mcpt(representative_nodes,**kwargs):
33 | # create similarity matrix from representative feature
34 | short_track_th = kwargs.get('short_track_th', 0)
35 | representative_selection_method = kwargs.get("representative_selection_method","keypoint")
36 | keypoint_condition_th = kwargs.get("keypoint_condition_th",2)
37 | feature_stack = None
38 | for camera_id in representative_nodes:
39 | tmp_representative_nodes = representative_nodes[camera_id]
40 | for local_id in tmp_representative_nodes:
41 | value = tmp_representative_nodes[local_id]
42 | representative_node = value["representative_node"]
43 | serials = value["all_serials"]
44 |
45 | if len(serials) < short_track_th:
46 | continue
47 | if representative_selection_method == "keypoint":
48 | score = representative_node["score"]
49 | if score > keypoint_condition_th:
50 | continue
51 |
52 | feature = np.load(representative_node["npy_path"])
53 | if feature_stack is None:
54 | feature_stack = np.empty((0, len(feature.flatten())))
55 | feature_stack = np.append(feature_stack , feature.reshape(1, -1) , axis=0)
56 | similarity_matrix = cosine_similarity(feature_stack)
57 | similarity_matrix = similarity_matrix.astype(np.float16)
58 | return similarity_matrix
59 |
60 | def measure_intersect_area(rectangle1, rectangle2):
61 | # measure intersect area
62 | intersect_width = min(rectangle1[2], rectangle2[2]) - max(rectangle1[0], rectangle2[0])
63 | intersect_height = min(rectangle1[3], rectangle2[3]) - max(rectangle1[1], rectangle2[1])
64 | intersect_area = max(intersect_width, 0) * max(intersect_height, 0)
65 | return intersect_area
66 |
67 | def eval_keypoints(serial,other_serials,keypoints_results,**kwargs):
68 | # evaluate results of pose estimation
69 | """
70 | condition = 1: All keypoints has high confidence
71 | condition = 2: half of keypoint has high confidence (left or right half of the body)
72 | condition = 3: part of the keypoint has high confidence in left or right half of the body
73 | condition = 4: almost keypoints has low confidence
74 | """
75 | keypoint_th = kwargs.get("keypoint_th",0.7)
76 |
77 | kp = keypoints_results.get_keypoints(serial)
78 | if kp == None:
79 | condition, intersect_ratio, score,area = 4, 1 , 0, 0
80 | else:
81 | x1,y1,x2,y2,bbox_confidence = kp["bbox"]
82 | keypoints = kp["Keypoints"]
83 | area = (x2-x1)*(y2-y1)
84 | x_list, y_list, scores = zip(*keypoints)
85 |
86 | intersect_area = 0
87 | for other_serial in other_serials :
88 | other_kp =keypoints_results.get_keypoints(other_serial)
89 | if other_kp==None: continue
90 | x1_,y1_,x2_,y2_,bbox_confidence = other_kp["bbox"]
91 | tmp_intersect_area = measure_intersect_area([x1,y1,x2,y2],[x1_,y1_,x2_,y2_])
92 | intersect_area = max(intersect_area,tmp_intersect_area)
93 | intersect_ratio = intersect_area/((x2-x1)*(y2-y1))
94 |
95 | if np.min(scores) >= keypoint_th:
96 | score = np.mean(scores)
97 | condition = 1
98 | else:
99 | right_scores = [score for i,score in enumerate(scores) if i%2==0]
100 | left_scores = [score for i,score in enumerate(scores) if i%2==1]
101 | nose_score = right_scores.pop(0)
102 | min_right_scores = np.min(right_scores)
103 | min_left_scores = np.min(left_scores)
104 | target_scores = left_scores if min_left_scores > min_right_scores else right_scores
105 | min_score = np.min(target_scores)
106 | score = np.mean(target_scores)
107 | if min_score >= keypoint_th:
108 | condition = 2
109 | else:
110 | count = len([tmp_score for tmp_score in target_scores if tmp_score >= keypoint_th])
111 | if count/len(target_scores) > 0.7:
112 | condition =3
113 | else:
114 | condition = 4
115 | return condition, intersect_ratio, score, area
116 |
117 | def find_high_confidence_keypoint_node(tracking_dict,serials,keypoints_results,frame_serials_dict,**kwargs):
118 | keypoint_th = kwargs.get("keypoint_th",0.7)
119 |
120 | conditions = []
121 | intersects = []
122 | image_scores = []
123 | areas = []
124 |
125 | for k,serial in enumerate(serials):
126 | frame = tracking_dict[serial]["Frame"]
127 | other_serials = frame_serials_dict[frame]
128 | other_serials.remove(serial)
129 |
130 | condition,intersect_ratio ,image_score,area = eval_keypoints(serial,other_serials,keypoints_results)
131 | conditions.append(condition)
132 | intersects.append(intersect_ratio)
133 | image_scores.append(image_score)
134 | areas.append(area)
135 | min_condition = np.min(conditions)
136 | index_area = np.array([(i,area) for i,(condition,area) in enumerate(zip(conditions,areas)) if condition == min_condition])
137 | max_index = np.argmax(index_area[:,1])
138 | index,max_area = index_area[max_index]
139 |
140 | serial = serials[int(index)]
141 | feature = np.load(tracking_dict[serial]["NpyPath"])
142 | return serial, feature, int(min_condition)
143 |
144 | def decide_representative_nodes(tracking_results,out_dir,scene_id,**kwargs):
145 | # decide representative nodes from each tracklet
146 | epsilon = kwargs.get('epsilon_mcpt', 0.3)
147 | representative_selection_method = kwargs.get("representative_selection_method","centrality")
148 | short_track_th = kwargs.get("short_track_th",20)
149 | model = kwargs.get("model","mmpose_hrnet")
150 | keypoint_th = kwargs.get("keypoint_th",0.7)
151 |
152 | representative_nodes = {}
153 | for camera_id in tracking_results:
154 | representative_nodes[camera_id] = {}
155 | tracking_dict = tracking_results[camera_id]
156 | if representative_selection_method == "keypoint":
157 | keypoints_results = pose.PoseKeypoints(f"Pose/scene_{str(scene_id).zfill(3)}/camera_{str(camera_id).zfill(4)}/camera_{str(camera_id).zfill(4)}_out_keypoint.json")
158 | keypoints_results.assign_serial_from_tracking_dict(tracking_dict=tracking_dict)
159 | max_frame = get_max_value_of_dict(tracking_dict,"Frame")
160 | frame_serials_dict = {n+1:[] for n in range(max_frame)}
161 | [frame_serials_dict[tracking_dict[serial]["Frame"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1]
162 |
163 | # Get each clusters, we need to iterate tracking_dict to extract cluster-wise data
164 | local_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict]
165 | unique_local_ids = sorted(set(local_ids))
166 | if -1 in unique_local_ids:
167 | unique_local_ids.remove(-1)
168 | local_id_serials_dict = {local_id:[] for local_id in unique_local_ids}
169 | [local_id_serials_dict[local_id].append(serial) for local_id,serial in zip(local_ids,tracking_dict) if local_id >= 0]
170 |
171 | # Get the representative node of each clusters
172 | for local_id in local_id_serials_dict:
173 | serials = local_id_serials_dict[local_id]
174 | if representative_selection_method == "centrality":
175 | serials, serial, feature = find_highest_centrality_node(tracking_dict, serials, epsilon=epsilon)
176 | if serial != None:
177 | representative_node = {"serial": serial, "npy_path": tracking_dict[serial]["NpyPath"]}
178 | elif representative_selection_method == "keypoint":
179 | serial, feature, score = find_high_confidence_keypoint_node(tracking_dict,serials,keypoints_results,frame_serials_dict,keypoint_th = keypoint_th)
180 | representative_node = {"serial": serial,"score":score, "npy_path": tracking_dict[serial]["NpyPath"]}
181 | else:
182 | print("representative_selection_method is wrong")
183 | sys.exit()
184 | # Save result out to json
185 | if serials !=[]:
186 |
187 | representative_nodes[camera_id][local_id] = {"representative_node": representative_node, "all_serials": serials}
188 | json_path = os.path.join(out_dir, f"representative_nodes_scene{scene_id}.json")
189 | with open(json_path, "w") as f:
190 | json.dump(representative_nodes, f)
191 |
192 | return representative_nodes
193 |
194 | def multi_camera_people_tracking(tracking_results, scene_id, json_dir, out_dir,**kwargs):
195 | # perform mcpt using tracking_results
196 | # tracking_results contains tracking_dict, which contains results of scpt in each camera
197 | print("running multi_camera_people_tracking")
198 |
199 | appearance_based_tracking = kwargs.get("appearance_based_tracking",True)
200 | distance_type = kwargs.get("distance_type","max")
201 | distance_th = kwargs.get("distance_th",5)
202 | epsilon = kwargs.get("epsilon_mcpt",0.4)
203 | representative_selection_method = kwargs.get("representative_selection_method","keypoint")
204 | short_track_th = kwargs.get("short_track_th",0)
205 | keypoint_th = kwargs.get("keypoint_th",0.7)
206 | keypoint_condition_th = kwargs.get("keypoint_condition_th",2)
207 | replace_similarity_by_wcoordinate = kwargs.get("replace_similarity_by_wcoordinate",True)
208 | replace_value = kwargs.get('replace_value', -10)
209 |
210 | print("representative_selection_method:",representative_selection_method)
211 | print("short_track_th:",short_track_th)
212 | print("epsilon:",epsilon)
213 | if representative_selection_method == "keypoint":
214 | print("keypoint_condition_th:",keypoint_condition_th)
215 |
216 | # Representative image extraction
217 | representative_nodes = get_representative_nodes_cache(scene_id=scene_id, out_dir=out_dir)
218 | if representative_nodes == None:
219 | representative_nodes = decide_representative_nodes(tracking_results,out_dir,scene_id,epsilon=epsilon,representative_selection_method=representative_selection_method,short_track_th=short_track_th,keypoint_th=keypoint_th)
220 | else:
221 | print(f"Found repsentative_nodes cache file. Got {len(representative_nodes)} camera(s) info.")
222 | print("representative feature is selected")
223 |
224 | similarity_matrix = create_similarity_matrix_mcpt(representative_nodes,short_track_th=short_track_th,representative_selection_method=representative_selection_method,keypoint_condition_th=keypoint_condition_th)
225 | similarity_matrix[similarity_matrix < (1-epsilon)] = 0
226 | clusters = list(range(len(similarity_matrix)))
227 | print("number of tracklet:",len(set(clusters)))
228 | similarity_matrix = replace_similarity(representative_nodes,similarity_matrix,tracking_results,clusters,distance_th=distance_th,
229 | distance_type=distance_type,replace_similarity_by_wcoordinate=replace_similarity_by_wcoordinate,
230 | short_track_th = short_track_th, keypoint_condition_th=keypoint_condition_th,
231 | representative_selection_method=representative_selection_method)
232 | # perform Re-identification using hieralchical clustering with average linkage
233 | clusters = associate_cluster(clusters, similarity_matrix, epsilon=epsilon, cost_function=2, minimize=False)
234 | del similarity_matrix
235 |
236 | print("unique_clusters:",len(set(clusters)))
237 |
238 | camera_dict = create_camera_dict(representative_nodes,short_track_th = short_track_th,
239 | keypoint_condition_th=keypoint_condition_th, representative_selection_method=representative_selection_method)
240 |
241 | for camera_id in camera_dict:
242 | tracking_dict = tracking_results[int(camera_id)]
243 | indices = camera_dict[camera_id]["indices"]
244 | local_ids = camera_dict[camera_id]["unique_local_ids"]
245 | tmp_clusters = [clusters[index] for index in indices]
246 | local_id_cluster_dict = {local_id:cluster for local_id,cluster in zip(local_ids,tmp_clusters)}
247 |
248 | local_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict]
249 | unique_local_ids = sorted(set(local_ids))
250 | if -1 in unique_local_ids:
251 | unique_local_ids.remove(-1)
252 | local_id_serials_dict = {local_id:[] for local_id in unique_local_ids}
253 | [local_id_serials_dict[local_id].append(serial) for local_id,serial in zip(local_ids,tracking_dict) if local_id >= 0]
254 | for local_id in unique_local_ids:
255 | for serial in local_id_serials_dict[local_id]:
256 | value = tracking_dict[serial]
257 | if local_id in local_id_cluster_dict:
258 | value["GlobalOfflineID"] = int(local_id_cluster_dict[local_id])
259 | return tracking_results
260 |
261 | def get_representative_nodes_cache(scene_id, out_dir):
262 | # Get cached representative nodes info if any
263 | representative_node_json = os.path.join(out_dir, f"representative_nodes_scene{scene_id}.json")
264 | if os.path.isfile(representative_node_json):
265 | with open(representative_node_json, "r") as f:
266 | representative_nodes = json.load(f)
267 | return representative_nodes
268 | return None
269 |
270 | def get_unique_global_ids(tracking_results,representative_nodes):
271 | # get unique global ids from tracking_results
272 | global_ids = []
273 | for camera_id in representative_nodes:
274 | tracking_dict = tracking_results[camera_id]
275 | for local_id in representative_nodes[camera_id]:
276 | serial = representative_nodes[camera_id][local_id]["representative_node"]["serial"]
277 | if "GlobalOfflineID" in tracking_dict[serial]:
278 | global_ids.append(tracking_dict[serial]["GlobalOfflineID"])
279 | unique_global_ids = sorted(list(set(global_ids)))
280 | return unique_global_ids
281 |
282 | def get_serials_each_global_id(tracking_results,representative_nodes,unique_global_ids):
283 | # get serials assigned each global id
284 | global_serial_dict = {} #global_id: {camera_id:(local_id, serial)}
285 | for global_id in unique_global_ids:
286 | tmp_dict = {}
287 | for camera_id in representative_nodes:
288 | tmp_dict[camera_id] = []
289 | global_serial_dict[global_id] = tmp_dict
290 | for camera_id in representative_nodes:
291 | tracking_dict = tracking_results[camera_id]
292 | for local_id in representative_nodes[camera_id]:
293 | serial = representative_nodes[camera_id][local_id]["representative_node"]["serial"]
294 | if "GlobalOfflineID" in tracking_dict[serial]:
295 | global_id = tracking_dict[serial]["GlobalOfflineID"]
296 | global_serial_dict[global_id][camera_id].append((local_id,serial))
297 | return global_serial_dict
298 |
299 | def create_camera_dict(representative_nodes,**kwargs):
300 | #
301 | short_track_th = kwargs.get('short_track_th', 0)
302 | representative_selection_method = kwargs.get("representative_selection_method","keypoint")
303 | keypoint_condition_th = kwargs.get("keypoint_condition_th",2)
304 |
305 | camera_dict = {camera_id:{"indices":[],"unique_local_ids":[]} for camera_id in representative_nodes}
306 | max_id = 0
307 | for camera_id in representative_nodes:
308 | tmp_representative_nodes = representative_nodes[camera_id]
309 | local_ids = []
310 | for local_id in tmp_representative_nodes:
311 | serials = tmp_representative_nodes[local_id]["all_serials"]
312 | if len(serials) < short_track_th:
313 | continue
314 | if representative_selection_method == "keypoint":
315 | score = tmp_representative_nodes[local_id]["representative_node"]["score"]
316 | if score > keypoint_condition_th:
317 | continue
318 | local_ids.append(int(local_id))
319 | unique_local_ids = sorted(list(set(local_ids)))
320 | camera_dict[camera_id]["indices"] += list(range(max_id,max_id+len(unique_local_ids)))
321 | camera_dict[camera_id]["unique_local_ids"] += unique_local_ids
322 | max_id += len(unique_local_ids)
323 | return camera_dict
324 |
325 | def create_mcpt_feature_stack(tracking_results,target_list):
326 | feature_stack = None
327 | for camera_id, serial in target_list:
328 | feature = np.load(tracking_results[camera_id][serial]["NpyPath"])
329 | if feature_stack is None:
330 | feature_stack = np.empty((0, len(feature.flatten())))
331 | feature_stack = np.append(feature_stack , feature.reshape(1, -1), axis=0)
332 | return feature_stack
333 |
334 |
335 |
336 | def assign_global_id(tracking_results,representative_nodes,**kwargs):
337 | # assign unclustered tracklets to global id
338 |
339 | epsilon = kwargs.get('epsilon_mcpt', 0.3)
340 | assign_all_tracklet = kwargs.get('assign_all_tracklet', False)
341 | sim_th = kwargs.get('sim_th', 0.9)
342 | print("sim_th:",sim_th)
343 | print("assign_all_tracklet:",assign_all_tracklet)
344 | model = kwargs.get("model","mmpose_hrnet")
345 |
346 | counter = 0
347 | assigned_tracks = []
348 | unassigned_tracks = []
349 |
350 | for camera_id in representative_nodes:
351 | tracking_dict = tracking_results[camera_id]
352 | for local_id in representative_nodes[camera_id]:
353 | serial = representative_nodes[camera_id][local_id]["representative_node"]["serial"]
354 | if "GlobalOfflineID" in tracking_dict[serial]:
355 | global_id = tracking_dict[serial]["GlobalOfflineID"]
356 | assigned_tracks.append((global_id,camera_id,local_id,serial))
357 | else:
358 | unassigned_tracks.append((camera_id,local_id))
359 |
360 | target_list = [(camera_id,serial) for global_id,camera_id,local_id,serial in assigned_tracks]
361 | feature_stack = create_mcpt_feature_stack(tracking_results,target_list)
362 | feature_stack_T = feature_stack.T
363 | feature_stack_norm = np.linalg.norm(feature_stack, axis=1)
364 | global_ids = [global_id for global_id,camera_id,local_id,serial in assigned_tracks]
365 |
366 | for k,(camera_id,local_id) in enumerate(unassigned_tracks):
367 | npy_path = representative_nodes[camera_id][local_id]["representative_node"]["npy_path"]
368 | feature = np.load(npy_path)
369 | cos_sims = np.dot(feature,feature_stack_T)/ (np.linalg.norm(feature)*feature_stack_norm)
370 |
371 | if assign_all_tracklet == False:
372 | max_sim = np.max(cos_sims)
373 | if max_sim < sim_th:
374 | continue
375 |
376 | similar_indices = list(np.where(cos_sims >= sim_th)[0])
377 | if len(similar_indices) == 0:
378 | continue
379 |
380 | tmp_global_ids = [global_id for i,global_id in enumerate(global_ids) if i in similar_indices]
381 | global_id = mode(tmp_global_ids).mode
382 |
383 | counter += 1
384 | serials = representative_nodes[camera_id][local_id]["all_serials"]
385 | for serial in serials:
386 | tracking_results[camera_id][serial]["GlobalOfflineID"] = int(global_id)
387 | print(f"{counter} tracklets are reassigned")
388 | return tracking_results
389 |
390 | def global_id_reassignment(tracking_results, representative_nodes,scene_id,**kwargs):
391 | # perform delete_small_global_id() and assign_global_id() for reassigning unclustered tracklets
392 | epsilon = kwargs.get("epsilon_mcpt",0.3)
393 | representative_selection_method = kwargs.get("representative_selection_method","centrality")
394 | delete_gid_th = kwargs.get("delete_gid_th",10000)
395 | assign_all_tracklet = kwargs.get("assign_all_tracklet",True)
396 | sim_th = kwargs.get("sim_th",0.8)
397 | delete_few_camera_cluter = kwargs.get('delete_few_camera_cluter',False)
398 |
399 | unique_global_ids = get_unique_global_ids(tracking_results,representative_nodes)
400 |
401 | global_serial_dict = get_serials_each_global_id(tracking_results,representative_nodes,unique_global_ids)
402 |
403 | tracking_results, unique_global_ids = delete_small_global_id(tracking_results,representative_nodes,global_serial_dict,
404 | delete_gid_th = delete_gid_th,delete_few_camera_cluter=delete_few_camera_cluter)
405 |
406 | tracking_results = assign_global_id(tracking_results,representative_nodes,
407 | delete_gid_th=delete_gid_th, assign_all_tracklet=assign_all_tracklet,sim_th=sim_th)
408 |
409 | return tracking_results
410 |
411 | def translate_world_coordinate(x, y, homography_matrix):
412 | # translate camera coordinate to world coordinate
413 | vector_xyz = np.array([x, y, 1]) # z=1
414 | vector_xyz_3d = np.dot(np.linalg.inv(homography_matrix), vector_xyz.T)
415 | return vector_xyz_3d[0] / vector_xyz_3d[2], vector_xyz_3d[1] / vector_xyz_3d[2]
416 |
417 |
418 | def interpolate_tracklet(tracking_results,representative_nodes,**kwargs):
419 | # interpolate missing detections for each tracklet
420 | max_interpolate_interval = kwargs.get("max_interpolate_interval",150)
421 | frame_sampling_freq = kwargs.get("frame_sampling_freq",1)
422 | for camera_id in tracking_results:
423 | tracking_dict = tracking_results[camera_id]
424 | local_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict]
425 | unique_local_ids = sorted(list(set(local_ids)))
426 | if min(unique_local_ids) == -1: unique_local_ids.remove(-1)
427 | local_id_serial_dict = {local_id:[] for local_id in unique_local_ids}
428 | [local_id_serial_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1]
429 | local_id_frame_dict = {local_id:[] for local_id in unique_local_ids}
430 | [local_id_frame_dict[tracking_dict[serial]["OfflineID"]].append(tracking_dict[serial]["Frame"]) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1]
431 |
432 | max_serial = int(max(tracking_dict.keys()))
433 | for local_id in unique_local_ids:
434 | frames, serials = zip(*sorted(zip(local_id_frame_dict[local_id], local_id_serial_dict[local_id])))
435 | missing_frames = []
436 | for frame,next_frame in zip(frames[:-1],frames[1:]):
437 | diff = next_frame - frame
438 | if diff > max_interpolate_interval: continue
439 | while diff > frame_sampling_freq:
440 | diff -= frame_sampling_freq
441 | missing_frame = next_frame - diff
442 | missing_frames.append(missing_frame)
443 | if missing_frames==0: continue
444 | global_id = tracking_dict[serials[0]]["GlobalOfflineID"] if "GlobalOfflineID" in tracking_dict[serials[0]] else None
445 |
446 | coordinates = [list(tracking_dict[serial]["Coordinate"].values())+list(tracking_dict[serial]["WorldCoordinate"].values()) for serial in serials]
447 | interpolator = RegularGridInterpolator((np.array(frames),), np.array(coordinates), method='linear')
448 | for frame in missing_frames:
449 | x1,y1,x2,y2,w_x,w_y = interpolator([frame])[0]
450 | (x1, y1, x2, y2), (w_x,w_y) = map(int, [x1, y1, x2, y2]),map(float,[w_x,w_y])
451 | max_serial += 1
452 | if global_id != None:
453 | tracking_dict[str(max_serial).zfill(8)] = {"Frame": frame, "Coordinate": {'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2}, "WorldCoordinate": {'x': w_x, 'y': w_y}, "OfflineID": local_id, "GlobalOfflineID": global_id}
454 | else:
455 | tracking_dict[str(max_serial).zfill(8)] = {"Frame": frame, "Coordinate": {'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2}, "WorldCoordinate": {'x': w_x, 'y': w_y}, "OfflineID": local_id}
456 | return tracking_results
457 |
458 |
459 | def find_highest_centrality_node(tracking_dict, serials, **kwargs):
460 | # find highest centrality node from each tracklet
461 | epsilon = kwargs.get('epsilon_mcpt', 0.3)
462 | stack_max_size = kwargs.get('stack_max_size', 2000)
463 | image_size = kwargs.get('image_size', (1920,1080))
464 | aspect_th = kwargs.get('aspect_th', 1.6)
465 |
466 | pos_list = [list(tracking_dict[serial]["Coordinate"].values()) for serial in serials]
467 | pos_list = np.array(pos_list)
468 | aspects = (pos_list[:,3]-pos_list[:,1])/(pos_list[:,2]-pos_list[:,0])
469 | pos_list[:, 2] = image_size[0] - pos_list[:, 2]
470 | pos_list[:, 3] = image_size[1] - pos_list[:, 3]
471 | edge_distances = np.min(pos_list,axis = 1)
472 | new_serials = []
473 | for i, (serial,aspect, edge_distance) in enumerate(zip(serials,aspects,edge_distances)):
474 | if (aspect >= aspect_th): # (edge_distance <= 1) and
475 | new_serials.append(serial)
476 | if len(new_serials) == 0:
477 | serial,feature = None,None
478 | pass
479 | elif len(new_serials) == 1 or len(new_serials)== 2:
480 | serial = new_serials[0]
481 | feature = np.load(tracking_dict[serial]["NpyPath"])
482 | else:
483 | freq =1
484 | while len(new_serials)//freq > stack_max_size:
485 | freq += 1
486 | for n, serial in enumerate(new_serials):
487 | if n % freq != 0: continue
488 | feature = np.load(tracking_dict[serial]["NpyPath"])
489 | if n== 0:
490 | feature_stack = np.empty((0,len(feature.flatten())))
491 | feature_stack = np.append(feature_stack , feature.reshape(1, -1), axis=0)
492 | similarity_matrix = cosine_similarity(feature_stack)
493 | similarity_matrix = np.where(similarity_matrix < 1-epsilon, 0, similarity_matrix)
494 | centralities = np.sum(similarity_matrix,axis=0)
495 | idx_max = np.argmax(centralities)
496 | serial = new_serials[idx_max*freq]
497 | feature = feature_stack[idx_max]
498 | return new_serials, serial, feature
499 |
500 | def minimize_similarity_by_sc_overlap(representative_nodes,matrix,tracking_results,clusters,camera_dict,**kwargs):
501 | # minimize similarity if tracklets are overlapping in SCPT results
502 | matrix_type = kwargs.get('matrix_type', "similarity")
503 | if matrix_type == "similarity":
504 | replace_value = -1
505 | elif matrix_type == "distance":
506 | replace_value = np.max(matrix[matrix distance_th, replace_value, similarity_matrix)
537 | return similarity_matrix
538 |
539 | def maximize_similarity_by_wcoordinate(similarity_matrix,distance_matrix,**kwargs):
540 | # replace multiple elements of the similarity matrix with 1 based on the world coordinate
541 | max_distance_th = kwargs.get('max_distance_th', 0.5)
542 | replace_value = kwargs.get('replace_value', 1)
543 | print("maximize_similarity_by_wcoordinate")
544 | similarity_matrix = np.where(distance_matrix < max_distance_th, replace_value, similarity_matrix)
545 | return similarity_matrix
546 |
547 | def replace_similarity(representative_nodes,similarity_matrix,tracking_results,clusters,**kwargs):
548 | # replace multiple elements of the similarity matrix with another value
549 | distance_th = kwargs.get('distance_th', 10)
550 | check_sc_overlap = kwargs.get('check_sc_overlap', False)
551 | replace_similarity_by_wcoordinate = kwargs.get('replace_similarity_by_wcoordinate', False)
552 | distance_type = kwargs.get('distance_type', "min")
553 | short_track_th = kwargs.get("short_track_th",0)
554 | keypoint_condition_th = kwargs.get("keypoint_condition_th",2)
555 | replace_value = kwargs.get('replace_value', -10)
556 | representative_selection_method = kwargs.get('representative_selection_method', 'keypoint')
557 |
558 | if check_sc_overlap:
559 | camera_dict = create_camera_dict(representative_nodes,short_track_th = short_track_th,keypoint_condition_th=keypoint_condition_th,
560 | representative_selection_method=representative_selection_method)
561 | similarity_matrix = minimize_similarity_by_sc_overlap(representative_nodes,similarity_matrix,tracking_results,clusters,camera_dict, matrix_type = "similarity")
562 | if replace_similarity_by_wcoordinate:
563 | min_distance_matrix,max_distance_matrix,mean_distance_matrix = create_distance_matrix(representative_nodes,tracking_results, distance_type = distance_type,short_track_th =short_track_th, keypoint_condition_th = keypoint_condition_th,representative_selection_method =representative_selection_method)
564 | similarity_matrix = maximize_similarity_by_wcoordinate(similarity_matrix, mean_distance_matrix)
565 | similarity_matrix = replace_negative_value_by_wcoordinate(similarity_matrix, min_distance_matrix, distance_th=distance_th,replace_value=replace_value)
566 |
567 | return similarity_matrix
568 |
569 | def measure_euclidean_distance(id1_pos_list,id2_pos_list):
570 | points1 = np.array(id1_pos_list)
571 | points2 = np.array(id2_pos_list)
572 | diff = points1-points2
573 | euclid_distances = np.sqrt(np.sum(diff**2, axis=1))
574 | return euclid_distances
575 |
576 | def create_distance_matrix(representative_nodes,tracking_results, **kwargs):
577 | # create a Euclidean distance matrix showing the Euclidean distance between each tracklet
578 |
579 | distance_type = kwargs.get('distance_type', "max") #distance_type min or max or mean
580 | image_size = kwargs.get('image_size', (1920,1080))
581 | short_track_th = kwargs.get('short_track_th', 0)
582 | representative_selection_method = kwargs.get("representative_selection_method","keypoint")
583 | keypoint_condition_th = kwargs.get("keypoint_condition_th",2)
584 | print("distance_type:",distance_type)
585 | camera_dict = create_camera_dict(representative_nodes,short_track_th = short_track_th,
586 | keypoint_condition_th=keypoint_condition_th, representative_selection_method=representative_selection_method)
587 | shape = np.sum([len(camera_dict[camera_id]["indices"]) for camera_id in camera_dict])
588 | max_distance_matrix = np.full((shape, shape), np.inf, dtype=np.float16)
589 | mean_distance_matrix = np.full((shape, shape), np.inf, dtype=np.float16)
590 | min_distance_matrix = np.full((shape, shape), np.inf, dtype=np.float16)
591 |
592 | index_serials_dict = {index:[] for index in range(len(max_distance_matrix))}
593 | index_frames_dict = {index:[] for index in range(len(max_distance_matrix))}
594 | index_wpos_list_dict = {index:[] for index in range(len(max_distance_matrix))}
595 |
596 | for camera_id in representative_nodes:
597 | tracking_dict = tracking_results[int(camera_id)]
598 | indices = camera_dict[camera_id]["indices"]
599 | unique_local_ids = camera_dict[camera_id]["unique_local_ids"]
600 | local_ids_serials_dict = {local_id:[] for local_id in unique_local_ids}
601 | [local_ids_serials_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] in unique_local_ids]
602 |
603 | for tmp_index in range(len(indices)):
604 | local_id = unique_local_ids[tmp_index]
605 | serials = local_ids_serials_dict[local_id]
606 | frames = [tracking_dict[serial]["Frame"] for serial in serials]
607 | wpos_list = [list(tracking_dict[serial]["WorldCoordinate"].values()) for serial in serials]
608 | index = indices[tmp_index]
609 | index_serials_dict[index] += serials
610 | index_frames_dict[index] += frames
611 | index_wpos_list_dict[index] += wpos_list
612 |
613 | for id1_index in range(len(max_distance_matrix)-1):
614 | id1_frames = index_frames_dict[id1_index]
615 | id1_wpos_list = index_wpos_list_dict[id1_index]
616 | if id1_frames == []:
617 | continue
618 |
619 | for id2_index in range(id1_index+1,len(max_distance_matrix)):
620 | id2_frames = index_frames_dict[id2_index]
621 | if id2_frames == []:
622 | continue
623 | common_frames = set(id1_frames).intersection(set(id2_frames))
624 | if len(common_frames) < 1: continue
625 | id2_wpos_list = index_wpos_list_dict[id2_index]
626 | id1_lap_indices = [i for i,id1_frame in enumerate(id1_frames) if id1_frame in common_frames]
627 | id2_lap_indices = [i for i,id2_frame in enumerate(id2_frames) if id2_frame in common_frames]
628 | id1_lap_wpos_list = [id1_wpos_list[id1_lap_index] for id1_lap_index in id1_lap_indices]
629 | id2_lap_wpos_list = [id2_wpos_list[id2_lap_index] for id2_lap_index in id2_lap_indices]
630 |
631 | euclid_distances = measure_euclidean_distance(id1_lap_wpos_list,id2_lap_wpos_list)
632 | min_distance = np.min(euclid_distances)
633 | mean_distance = np.mean(euclid_distances)
634 | max_distance = np.max(euclid_distances)
635 | min_distance_matrix[id1_index,id2_index] = min_distance
636 | min_distance_matrix[id2_index,id1_index] = min_distance
637 | if len(common_frames) > 120:
638 | mean_distance_matrix[id1_index,id2_index] = mean_distance
639 | mean_distance_matrix[id2_index,id1_index] = mean_distance
640 | max_distance_matrix[id1_index,id2_index] = max_distance
641 | max_distance_matrix[id2_index,id1_index] = max_distance
642 |
643 | return min_distance_matrix,max_distance_matrix,mean_distance_matrix
644 |
645 | def delete_small_global_id(tracking_results,representative_nodes,global_serial_dict,**kwargs):
646 | # delete global id that contains only a little serials from tracking_results
647 | delete_gid_th = kwargs.get('delete_gid_th',10000)
648 | delete_few_camera_cluter = kwargs.get('delete_few_camera_cluter',False)
649 | print("delete_gid_th:",delete_gid_th)
650 | print("delete_few_camera_cluter:",delete_few_camera_cluter)
651 | delete_global_ids = []
652 | save_global_ids = []
653 |
654 | for global_id in global_serial_dict:
655 | serial_counter = 0
656 | camera_ids=[]
657 | for camera_id in global_serial_dict[global_id]:
658 | if global_serial_dict[global_id][camera_id] != []:
659 | camera_ids.append(camera_id)
660 | for local_id,serial in global_serial_dict[global_id][camera_id]:
661 | tmp_all_serials = representative_nodes[camera_id][local_id]["all_serials"]
662 | serial_counter += len(tmp_all_serials)
663 |
664 | if serial_counter < delete_gid_th:
665 | delete_global_ids.append(global_id)
666 | continue
667 | if delete_few_camera_cluter:
668 | if len(set(camera_ids)) < 3:
669 | delete_global_ids.append(global_id)
670 | continue
671 | save_global_ids.append(global_id)
672 |
673 | for camera_id in tracking_results:
674 | tracking_dict = tracking_results[camera_id]
675 | for serial in tracking_dict:
676 | tmp_dict = tracking_dict[serial]
677 | if "GlobalOfflineID" in tmp_dict:
678 | global_id = tmp_dict["GlobalOfflineID"]
679 | if global_id in delete_global_ids:
680 | del tmp_dict["GlobalOfflineID"]
681 | unique_global_ids = sorted(list(set(save_global_ids)))
682 |
683 | return tracking_results, unique_global_ids
684 |
685 | def measure_world_coordinate(scene_id,tracking_results, **kwargs):
686 | # measur world coordinates in each node
687 | mean_world_coordinate_th = kwargs.get("mean_world_coordinate_th",2)
688 | model = kwargs.get("model","mmpose_hrnet")
689 |
690 | for camera_id in tracking_results:
691 | tracking_dict = tracking_results[camera_id]
692 | with open(f"Original/scene_{str(scene_id).zfill(3)}/camera_{str(camera_id).zfill(4)}/calibration.json") as f:
693 | calibration_json = json.load(f)
694 | homography_matrix = np.array(calibration_json['homography matrix'])
695 | for serial in tracking_dict:
696 | value = tracking_dict[serial]
697 | x1,y1,x2,y2 = value["Coordinate"].values()
698 | x,y = (x2+x1)/2,y2
699 | bbox_w_c = translate_world_coordinate(x,y, homography_matrix)
700 | value["WoorldCoordinate"] = {"x":bbox_w_c[0],"y":bbox_w_c[1]}
701 |
702 | for camera_id in tracking_results:
703 | tracking_dict = tracking_results[camera_id]
704 | for serial in tracking_dict:
705 | value = tracking_dict[serial]
706 | return tracking_results
707 |
708 | def eval_noise_level(keypoints):
709 | # evaluate noise level in images based on pose estimation
710 | xs,ys,scores = zip(*keypoints)
711 | th = 0.75
712 | indices = [i for i,score in enumerate(scores) if score > th]
713 | condition = 0
714 | if len(indices)==2:
715 | if min(indices) <= 4:
716 | condition = 0
717 | else:
718 | condition = 2
719 | if len(indices)==1:
720 | condition = 3
721 | if len(indices)==0:
722 | condition =4
723 | return condition
724 |
725 | def remove_noise_images(scene_id,tracking_results,**kwargs):
726 | # remove noise images based on pose estimation
727 | model = kwargs.get("model","mmpose_hrnet")
728 |
729 | del_serials = {camera_id:[] for camera_id in tracking_results}
730 |
731 | for camera_id in tracking_results:
732 | tracking_dict = tracking_results[camera_id]
733 | for serial in tracking_dict:
734 | value = tracking_dict[serial]
735 | if "GlobalOfflineID" not in value:
736 | del_serials[camera_id].append(serial)
737 |
738 | for camera_id in tracking_results:
739 | tracking_dict = tracking_results[camera_id]
740 | for serial in del_serials[camera_id]:
741 | del tracking_dict[serial]
742 |
743 | for camera_id in tracking_results:
744 | tracking_dict = tracking_results[camera_id]
745 | keypoints_results = pose.PoseKeypoints(f"Pose/scene_{str(scene_id).zfill(3)}/camera_{str(camera_id).zfill(4)}/camera_{str(camera_id).zfill(4)}_out_keypoint.json")
746 | keypoints_results.assign_serial_from_tracking_dict(tracking_dict=tracking_dict)
747 | del_serials = []
748 | for serial in tracking_dict:
749 | value = tracking_dict[serial]
750 |
751 | kp = keypoints_results.get_keypoints(serial)
752 | if kp == None:
753 | del_serials.append(serial)
754 | continue
755 | keypoints = kp['Keypoints']
756 | condition = eval_noise_level(keypoints)
757 | coordinate = list(value["Coordinate"].values())
758 | w,h = coordinate[2]-coordinate[0],coordinate[3]-coordinate[1]
759 | if w/h > 3 or h/w > 5:
760 | del_serials.append(serial)
761 | continue
762 | if condition >= 2:
763 | if condition==2 and min(w,h) < 100:
764 | continue
765 | del_serials.append(serial)
766 | for serial in del_serials:
767 | del tracking_dict[serial]
768 |
769 | local_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict]
770 | unique_local_ids = sorted(set(local_ids))
771 | if -1 in unique_local_ids:
772 | unique_local_ids.remove(-1)
773 | local_id_serials_dict = {local_id:[] for local_id in unique_local_ids}
774 | [local_id_serials_dict[local_id].append(serial) for local_id,serial in zip(local_ids,tracking_dict)]
775 | local_id_frames_dict = {local_id:[] for local_id in unique_local_ids}
776 | [local_id_frames_dict[local_id].append(tracking_dict[serial]["Frame"]) for local_id,serial in zip(local_ids,tracking_dict)]
777 |
778 | del_serials = []
779 | for local_id in local_id_serials_dict:
780 | if local_id == -1:
781 | continue
782 | frames, serials = zip(*sorted(zip(local_id_frames_dict[local_id], local_id_serials_dict[local_id])))
783 | for i in range(len(frames[:-1])):
784 | if i == 0:
785 | continue
786 | past_frame = frames[i-1]
787 | frame = frames[i]
788 | future_frame = frames[i+1]
789 | if (frame - past_frame >30) and (future_frame - frame > 30):
790 | del_serials.append(serials[i])
791 |
792 | return tracking_results
793 |
794 |
795 | def delete_distant_persons(tracking_results,**kwargs):
796 | # delete the node that has long distances to other nodes with the same global id
797 |
798 | gid_serials = {}
799 |
800 | for camera_id in tracking_results:
801 | tracking_dict = tracking_results[camera_id]
802 | for serial in tracking_dict:
803 | value = tracking_dict[serial]
804 | gid = value["GlobalOfflineID"]
805 | gid_serials[gid] = []
806 | for camera_id in tracking_results:
807 | tracking_dict = tracking_results[camera_id]
808 | for serial in tracking_dict:
809 | value = tracking_dict[serial]
810 | gid = value["GlobalOfflineID"]
811 | frame = value["Frame"]
812 | gid_serials[gid].append((camera_id,serial,frame))
813 | delete_list= []
814 | for gid in gid_serials:
815 | value = gid_serials[gid]
816 | camera_ids,serials,frames = zip(*value)
817 | frames, serials,camera_ids = zip(*sorted(zip(frames, serials, camera_ids)))
818 |
819 | current_frame = frames[0]
820 | current_serial = serials[0]
821 | current_camera_ids = camera_ids[0]
822 | tmp_frames = []
823 | tmp_serials = []
824 | tmp_camera_ids = []
825 | for frame,serial,camera_id in zip(frames,serials,camera_ids):
826 | if frame !=current_frame:
827 |
828 | if len(tmp_frames) >=2:
829 | world_coordinates = []
830 | for tmp_camera_id,tmp_serial in zip(tmp_camera_ids,tmp_serials):
831 | world_coordinate = tuple(tracking_results[tmp_camera_id][tmp_serial]["WorldCoordinate"].values())
832 | world_coordinates.append(world_coordinate)
833 | world_coordinates = np.array(world_coordinates)
834 | distance_matrix = squareform(pdist(world_coordinates, 'euclidean'))
835 | if len(distance_matrix)>2:
836 | if np.max(distance_matrix) >7:
837 | sum_row = np.sum(distance_matrix,axis=0)
838 | argmax = np.argmax(sum_row)
839 | delete_list.append((tmp_camera_ids[argmax],tmp_serials[argmax]))
840 |
841 | current_frame = frame
842 | current_serial = serial
843 | current_camera_id = camera_id
844 | tmp_frames = [frame]
845 | tmp_serials = [serial]
846 | tmp_camera_ids = [camera_id]
847 | else:
848 | tmp_frames.append(frame)
849 | tmp_serials.append(serial)
850 | tmp_camera_ids.append(camera_id)
851 |
852 | for camera_id,serial in delete_list:
853 | del tracking_results[camera_id][serial]
854 | return tracking_results
855 |
--------------------------------------------------------------------------------