├── tracking ├── config │ ├── parameters_per_scene.py │ └── scene_2_camera_id_file.json ├── requirements.txt ├── src │ ├── run.py │ ├── tracking.py │ ├── utils.py │ ├── pose.py │ ├── scpt.py │ └── mcpt.py └── infer.py ├── ranking.jpg ├── overall-pipeline.png ├── scripts ├── tracking.sh ├── extract_frame.sh ├── detection.sh ├── embedding.sh └── pose.sh ├── poser ├── load_tracking_result.py └── top_down_video_demo_with_track_file.py ├── LICENSE ├── tools ├── extract_frame.py └── generate_submission.py ├── embedder └── aic24_extract.py ├── README.md └── detector └── aic24_get_detection.py /tracking/config/parameters_per_scene.py: -------------------------------------------------------------------------------- 1 | parameters_per_scene = { 2 | } -------------------------------------------------------------------------------- /ranking.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riips/AIC24_Track1_YACHIYO_RIIPS/HEAD/ranking.jpg -------------------------------------------------------------------------------- /overall-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riips/AIC24_Track1_YACHIYO_RIIPS/HEAD/overall-pipeline.png -------------------------------------------------------------------------------- /scripts/tracking.sh: -------------------------------------------------------------------------------- 1 | SCENE=$* 2 | 3 | #cd tracking 4 | 5 | for SCENE in $* 6 | do 7 | echo Procssing scene-$SCENE 8 | python tracking/infer.py -s $SCENE 9 | done -------------------------------------------------------------------------------- /scripts/extract_frame.sh: -------------------------------------------------------------------------------- 1 | conda activate botsort_env 2 | 3 | for SCENE in $* 4 | do 5 | F_SCENE=$(printf "%03d" "$SCENE") 6 | echo Procssing scene-$F_SCENE 7 | python3 tools/extract_frame.py -s scene_$F_SCENE ./ 8 | done -------------------------------------------------------------------------------- /scripts/detection.sh: -------------------------------------------------------------------------------- 1 | cp ./detector/aic24_get_detection.py ./BoT-SORT/tools/ 2 | cd ./BoT-SORT 3 | conda activate botsort_env 4 | 5 | for SCENE in $* 6 | do 7 | F_SCENE=$(printf "%03d" "$SCENE") 8 | echo Procssing scene-$F_SCENE 9 | python3 tools/aic24_get_detection.py -s scene_$F_SCENE ../ 10 | done -------------------------------------------------------------------------------- /scripts/embedding.sh: -------------------------------------------------------------------------------- 1 | cp ./embedder/aic24_extract.py ./deep-person-reid/torchreid/ 2 | cd ./deep-person-reid 3 | conda activate torchreid 4 | 5 | for SCENE in $* 6 | do 7 | F_SCENE=$(printf "%03d" "$SCENE") 8 | echo Procssing scene-$F_SCENE 9 | python3 torchreid/aic24_extract.py -s scene_$F_SCENE ../ 10 | done -------------------------------------------------------------------------------- /tracking/requirements.txt: -------------------------------------------------------------------------------- 1 | contourpy==1.2.1 2 | cycler==0.12.1 3 | fonttools==4.51.0 4 | joblib==1.4.0 5 | kiwisolver==1.4.5 6 | matplotlib==3.8.4 7 | numpy==1.26.4 8 | opencv-python-headless==4.9.0.80 9 | packaging==24.0 10 | pillow==10.3.0 11 | pyparsing==3.1.2 12 | python-dateutil==2.9.0.post0 13 | scikit-learn==1.4.2 14 | scipy==1.13.0 15 | six==1.16.0 16 | threadpoolctl==3.4.0 17 | tqdm==4.66.2 18 | -------------------------------------------------------------------------------- /scripts/pose.sh: -------------------------------------------------------------------------------- 1 | cp ./poser/load_tracking_result.py ./mmpose/demo/ 2 | cp ./poser/top_down_video_demo_with_track_file.py ./mmpose/demo/ 3 | cd ./mmpose 4 | conda activate openmmlab 5 | 6 | for SCENE in $* 7 | do 8 | F_SCENE=$(printf "%03d" "$SCENE") 9 | echo Procssing scene-$F_SCENE 10 | find "../Detection/scene_$F_SCENE" -maxdepth 1 -type f -name "*.txt" | while read -r file; 11 | do 12 | CAMERA=$(basename "$file") 13 | number=$(echo "$CAMERA" | sed 's/camera_\([0-9]\+\).txt/\1/') 14 | python3 demo/top_down_video_demo_with_track_file.py ../Detection/scene_${F_SCENE}/${CAMERA} ./configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth --video-path ../Original/scene_${F_SCENE}/camera_${number}/video.mp4 --out-file ../Pose/scene_${F_SCENE}/camera_${number}/camera_${number}_out_keypoint.json 15 | done 16 | done 17 | -------------------------------------------------------------------------------- /poser/load_tracking_result.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | 5 | def load_tracking(file_name): 6 | tracking_file = open(file_name) 7 | result = {} 8 | for line in tracking_file: 9 | line = line.rstrip().split(',') 10 | frame_id = int(line[1]) 11 | track_id = int(line[2]) 12 | bbox = [float(line[3]), float(line[4]), float(line[5]), float(line[6]), 1.0] 13 | if frame_id not in result.keys(): 14 | result[frame_id] = [] 15 | result[frame_id].append({'bbox': np.array(bbox)}) 16 | return result 17 | 18 | def load_tracking_id(file_name): 19 | tracking_file = open(file_name) 20 | result = {} 21 | for line in tracking_file: 22 | line = line.rstrip().split(',') 23 | frame_id = int(line[1]) 24 | track_id = int(line[2]) 25 | if frame_id not in result.keys(): 26 | result[frame_id] = [] 27 | result[frame_id].append({'track_id': track_id}) 28 | return result 29 | 30 | if __name__ == '__main__': 31 | print("run load_tracking") 32 | #load_tracking('') 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 RIIPS 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tools/extract_frame.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | import PIL.Image as Image 5 | import cv2 6 | from multiprocessing import Pool 7 | from sys import stdout 8 | import argparse 9 | import os.path as osp 10 | 11 | def make_parser(): 12 | parser = argparse.ArgumentParser("reid") 13 | parser.add_argument("root_path", type=str, default=None) 14 | parser.add_argument("-s", "--scene", type=str, default=None) 15 | return parser 16 | 17 | args = make_parser().parse_args() 18 | data_root = osp.join(args.root_path, "Original") 19 | scene = args.scene 20 | 21 | fprint, endl = stdout.write, "\n" 22 | 23 | IMAGE_FORMAT = ".jpg" 24 | 25 | 26 | def video2image(parameter_set): 27 | scenario, camera, camera_dir = parameter_set 28 | fprint(f"[Processing] {scenario} {camera}{endl}") 29 | imgs_dir = f"{camera_dir}/Frame" 30 | if not os.path.exists(imgs_dir): 31 | os.makedirs(imgs_dir) 32 | print("camera_dir:" + camera_dir) 33 | cap = cv2.VideoCapture(f"{camera_dir}/video.mp4") 34 | current_frame = 1 35 | ret, frame = cap.read() 36 | while ret: 37 | frame_file_name = f"{str(current_frame).zfill(6)}{IMAGE_FORMAT}" 38 | cv2.imwrite(f"{imgs_dir}/{frame_file_name}", frame) 39 | ret, frame = cap.read() 40 | current_frame += 1 41 | fprint(f"[Done] {scenario} {camera}{endl}") 42 | 43 | 44 | def main(): 45 | parameter_sets = [] 46 | scenario_dir = osp.join(data_root, scene) 47 | cameras = os.listdir(scenario_dir) 48 | for each_camera in cameras: 49 | cam = each_camera 50 | if "map" in each_camera: 51 | continue 52 | camera_dir = f"{scenario_dir}/{each_camera}" 53 | parameter_sets.append( 54 | [scene, each_camera, camera_dir] 55 | ) 56 | 57 | pool = Pool(processes=len(parameter_sets)) 58 | pool.map(video2image, parameter_sets) 59 | pool.close() 60 | 61 | 62 | if __name__ == "__main__": 63 | main() 64 | 65 | -------------------------------------------------------------------------------- /tools/generate_submission.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | import os 4 | import numpy as np 5 | 6 | def read_json_file(file_path): 7 | with open(file_path, 'r') as file: 8 | data = json.load(file) 9 | return data 10 | 11 | def convert_coordinates_2world(x, y): 12 | vector_xyz = np.array([x, y, z]) 13 | vector_xyz_3d = np.dot(np.linalg.inv(homography_matrix), vector_xyz.T) 14 | vector_xyz_3d = vector_xyz_3d / vector_xyz_3d[2] 15 | return vector_xyz_3d[0], vector_xyz_3d[1] 16 | 17 | def load_calibration(calib_path): 18 | data = read_json_file(calib_path) 19 | global camera_projection_matrix 20 | global homography_matrix 21 | camera_projection_matrix = np.array(data["camera projection matrix"]) 22 | homography_matrix = np.array(data["homography matrix"]) 23 | 24 | def generate_submission(json_path, data_root="", save_path=""): 25 | json_path = os.path.join(data_root, json_path) 26 | submission_path = os.path.join(data_root, save_path ) 27 | if not os.path.exists(submission_path): 28 | os.makedirs(submission_path) 29 | submission_path = os.path.join(submission_path, 'track1.txt') 30 | json_data = read_json_file(json_path) 31 | ret_data = [] 32 | for cam in json_data: 33 | print(f"processing camera : {cam.zfill(3)}") 34 | for seq in json_data[cam]: 35 | item = json_data[cam][seq] 36 | if "GlobalOfflineID" in item: 37 | ret_line = [cam, \ 38 | item["GlobalOfflineID"], \ 39 | (item["Frame"] - 1), \ 40 | item["Coordinate"]["x1"], \ 41 | item["Coordinate"]["y1"], \ 42 | (item["Coordinate"]["x2"] - item["Coordinate"]["x1"]), \ 43 | (item["Coordinate"]["y2"] - item["Coordinate"]["y1"]), \ 44 | "{:.6f}".format(item["WorldCoordinate"]["x"]), \ 45 | "{:.6f}".format(item["WorldCoordinate"]["y"])] 46 | ret_data.append(ret_line) 47 | ret_data = sorted(ret_data, key=lambda x: (int(x[0]), int(x[2]), int(x[1]))) 48 | np.savetxt(submission_path, ret_data, delimiter=' ', fmt="%s") 49 | 50 | 51 | if __name__ == "__main__": 52 | print("create track1.txt") 53 | scenes = os.listdir("./Tracking/") 54 | for sc in scenes: 55 | print(f"processing scene : {sc}") 56 | generate_submission(json_path=os.path.join(f"Tracking", sc,"fixed_whole_tracking_results.json"), save_path=os.path.join(f"Submission", sc)) 57 | 58 | print("merge track1.txt") 59 | with open(os.path.join("Submission", "track1.txt"), "w") as merged_file: 60 | for file_path in scenes: 61 | with open(os.path.join("Submission", f"{file_path}/track1.txt"), "r") as file: 62 | merged_file.write(file.read()) -------------------------------------------------------------------------------- /embedder/aic24_extract.py: -------------------------------------------------------------------------------- 1 | ''' 2 | extract ReID features from testing data. 3 | ''' 4 | import os 5 | import argparse 6 | import os.path as osp 7 | import numpy as np 8 | import torch 9 | import time 10 | import torchvision.transforms as T 11 | from PIL import Image 12 | import sys 13 | from utils import FeatureExtractor 14 | import torchreid 15 | import json 16 | 17 | def make_parser(): 18 | parser = argparse.ArgumentParser("reid") 19 | parser.add_argument("root_path", type=str, default=None) 20 | parser.add_argument("-s", "--scene", type=str, default=None) 21 | return parser 22 | 23 | if __name__ == "__main__": 24 | 25 | args = make_parser().parse_args() 26 | data_root = args.root_path 27 | scene = args.scene 28 | 29 | sys.path.append(data_root+'/deep-person-reid') 30 | 31 | img_dir = os.path.join(data_root,'Original') 32 | det_dir = os.path.join(data_root,'Detection') 33 | out_dir = os.path.join(data_root,'EmbedFeature') 34 | 35 | models = { 36 | 'osnet_x1_0':data_root+'/deep-person-reid/checkpoints/osnet_ms_m_c.pth.tar' 37 | } 38 | 39 | 40 | model_names = ['osnet_x1_0'] 41 | 42 | 43 | val_transforms = T.Compose([ 44 | T.Resize([256, 128]), 45 | T.ToTensor(), 46 | T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 47 | ]) 48 | 49 | for model_idx,name in enumerate(models): 50 | 51 | model_p = models[name] 52 | model_name = model_names[model_idx] 53 | 54 | print('Using model {}'.format(name)) 55 | 56 | extractor = FeatureExtractor( 57 | model_name=model_name, 58 | model_path=model_p, 59 | device='cuda' 60 | ) 61 | 62 | for file in os.listdir(os.path.join(det_dir,scene)): 63 | base, ext = os.path.splitext(file) 64 | if ext == '.txt': 65 | print('processing file {}{}'.format(base,ext)) 66 | det_path = os.path.join(det_dir,scene,'{}.txt'.format(base)) 67 | json_path = os.path.join(det_dir,scene,'{}.json'.format(base)) 68 | dets = np.genfromtxt(det_path,dtype=str,delimiter=',') 69 | with open(json_path) as f: 70 | jf = json.load(f) 71 | cur_frame = 0 72 | u_num = 0 73 | emb = np.array([None]*len(dets)) 74 | start = time.time() 75 | print('processing scene {} cam {} with {} detections'.format(scene,base,len(dets))) 76 | for idx,(cam,frame,_,x1,y1,x2,y2,conf) in enumerate(dets): 77 | u_num += 1 78 | x1,y1,x2,y2 = map(float,[x1,y1,x2,y2]) 79 | if idx%1000 == 0: 80 | if idx !=0: 81 | end = time.time() 82 | print('processing time :',end-start) 83 | start = time.time() 84 | print('process {}/{}'.format(idx,len(dets))) 85 | if cur_frame != int(frame): 86 | cur_frame = int(frame) 87 | if not os.path.isdir(osp.join(out_dir,scene,cam)): 88 | os.makedirs(osp.join(out_dir,scene,cam)) 89 | save_fn = os.path.join(out_dir,scene,cam,'feature_{}_{}_{}_{}_{}_{}_{}.npy'.format(cur_frame,u_num,str(int(x1)),str(int(x2)),str(int(y1)),str(int(y2)),str(conf).replace(".",""))) 90 | jf[str(idx).zfill(8)]['NpyPath'] = os.path.join(scene,cam,'feature_{}_{}_{}_{}_{}_{}_{}.npy'.format(cur_frame,u_num,str(int(x1)),str(int(x2)),str(int(y1)),str(int(y2)),str(conf).replace(".",""))) 91 | img_path = os.path.join(img_dir,scene,cam,'Frame',frame.zfill(6)+'.jpg') 92 | img = Image.open(img_path) 93 | 94 | img_crop = img.crop((x1,y1,x2,y2)) 95 | img_crop = val_transforms(img_crop.convert('RGB')).unsqueeze(0) 96 | feature = extractor(img_crop).cpu().detach().numpy()[0] 97 | 98 | np.save(save_fn,feature) 99 | end = time.time() 100 | print('processing time :',end-start) 101 | start = time.time() 102 | print('process {}/{}'.format(idx+1,len(dets))) 103 | with open(json_path, 'w') as f: 104 | json.dump(jf, f, ensure_ascii=False) 105 | -------------------------------------------------------------------------------- /tracking/src/run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import argparse 4 | from datetime import datetime 5 | 6 | from tracking import Tracker 7 | from utils import DetectedObjects 8 | 9 | def run_scpt(feature_data_root, out_dir="outdir", tracking_params={}): 10 | # Load and generate "detected object list" 11 | tracking_results = {} 12 | if not os.path.isdir(feature_data_root): 13 | raise Exception(f"No such directory: {feature_data_root}") 14 | if os.path.basename(feature_data_root).startswith("camera_"): 15 | camera_ids = [os.path.basename(feature_data_root)] 16 | feature_data_root = os.path.dirname(feature_data_root) 17 | is_multi = False 18 | else: 19 | camera_ids = [cam_id for cam_id in os.listdir(feature_data_root) if cam_id[:7] == "camera_"] 20 | is_multi = True 21 | 22 | # loading detections 23 | for camera_id in camera_ids: 24 | data_dir = os.path.join(feature_data_root, camera_id) 25 | camera_id = int(camera_id[7:]) 26 | detected_objects = load_detections(data_dir) 27 | tracking_results[camera_id] = detected_objects.to_trackingdict() 28 | del detected_objects 29 | 30 | # Run SCT on all detections of all cameras 31 | for camera_id in tracking_results: 32 | tracking_dict = tracking_results[camera_id] 33 | start_time = datetime.now() 34 | tracker = Tracker(tracking_params) 35 | tracking_results[camera_id] = tracker.scpt(tracking_dict) # tracking returns tracking_dict 36 | end_time = datetime.now() 37 | print(f"Camera{camera_id} elapsed time: {end_time - start_time}") 38 | 39 | # Dump the result 40 | out_json = os.path.join(out_dir, f'camera{camera_id:03d}_tracking_results.json') 41 | os.makedirs(os.path.dirname(out_json), exist_ok=True) 42 | with open(out_json, mode='w') as f: 43 | json.dump(tracking_results[camera_id], f) 44 | 45 | def run_mcpt(scene_id, json_dir,out_dir="outdir", tracking_params={}): 46 | start_time = datetime.now() 47 | tracker = Tracker(tracking_params) 48 | whole_tracking_result = tracker.mcpt(scene_id, json_dir,out_dir) 49 | 50 | # Dump the result 51 | out_file = os.path.join(out_dir, 'whole_tracking_results.json') 52 | with open(out_file, mode='w') as f: 53 | json.dump(whole_tracking_result, f) 54 | end_time = datetime.now() 55 | print(f"Elapsed_time: {end_time - start_time}") 56 | 57 | 58 | def correct_scpt_result(scene_id, json_dir, out_dir=None, tracking_params={}): 59 | if not os.path.isdir(json_dir): 60 | raise Exception(f"The directory '{json_dir}' does not exist.") 61 | if out_dir == None: 62 | out_dir = json_dir 63 | 64 | json_files = [f for f in os.listdir(json_dir) if os.path.splitext(f)[1].lower() == ".json" and f.startswith("camera")] 65 | json_files = sorted(json_files) 66 | for json_file in json_files: 67 | camera_id = int(json_file.split("_")[0][6:]) 68 | with open(os.path.join(json_dir, json_file)) as f: 69 | tracking_dict = json.load(f) 70 | tracker = Tracker(tracking_params) 71 | tracking_dict = tracker.correcting_scpt_result(tracking_dict) 72 | out_file = os.path.join(out_dir, "fixed_"+os.path.basename(json_file)) 73 | with open(out_file, mode='w') as f: 74 | json.dump(tracking_dict, f) 75 | 76 | def correct_mcpt_result(scene_id,json_dir,out_dir,tracking_params={}): 77 | with open(os.path.join(json_dir, 'whole_tracking_results.json')) as f: 78 | tracking_results = json.load(f) 79 | with open(os.path.join(json_dir, f"representative_nodes_scene{str(scene_id)}.json")) as f: 80 | representative_nodes = json.load(f) 81 | tracker = Tracker(tracking_params) 82 | tracking_resuluts = tracker.correcting_mcpt_result(scene_id,tracking_results,representative_nodes) 83 | out_file = os.path.join(out_dir, "fixed_whole_tracking_results.json") 84 | with open(out_file, mode='w') as f: 85 | json.dump(tracking_resuluts, f) 86 | 87 | 88 | def load_detections(data_root, debug=False): 89 | print(f"Loading detections from {data_root}.") 90 | detected_objects = DetectedObjects() 91 | detected_objects.load_from_directory(feature_root=data_root) 92 | print(f"Found {len(detected_objects.objects)} frames, and {detected_objects.num_objects} objects.") 93 | if debug: 94 | frames = sorted(detected_objects.objects) 95 | min_num_obj = 9999999 96 | max_num_obj = 0 97 | for frame in frames: 98 | obj = detected_objects[frame] 99 | num = len(obj) 100 | min_num_obj = min(min_num_obj, num) 101 | max_num_obj = max(max_num_obj, num) 102 | print(f"### MIN num detections: {min_num_obj}, MAX num detections: {max_num_obj} ###\n") 103 | 104 | return detected_objects 105 | 106 | def get_args(): 107 | parser = argparse.ArgumentParser(description='Offline Tracker sample app.') 108 | parser.add_argument('-d', '--data', default='EmbedFeature/scene_001', type=str) 109 | parser.add_argument('-o', '--outdir', default='output', type=str) 110 | 111 | return parser.parse_args() 112 | 113 | if __name__ == "__main__": 114 | args = get_args() 115 | 116 | run(feature_data_root=args.data, out_dir=args.outdir, tracking_params={}) 117 | -------------------------------------------------------------------------------- /tracking/infer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | from datetime import datetime 5 | from multiprocessing import Pool 6 | import subprocess 7 | import glob 8 | import tarfile 9 | import argparse 10 | 11 | sys.path.append("tracking") 12 | sys.path.append("tracking/src") 13 | import run 14 | 15 | """ 16 | This file contains functions to execute offline tracking. 17 | """ 18 | 19 | # Single camera people tracking 20 | def scpt(tracking_params={}): 21 | # distributed SCPT processing by simply using multiprocessing pool. 22 | global scene_id 23 | global camera_ids 24 | global exp_root 25 | global tracking_parameters 26 | tracking_parameters = tracking_params 27 | 28 | num_processes = 5 # Could be more than 5, but it depends on machine instance 29 | p = Pool(num_processes) 30 | result = p.map(single_tracking, camera_ids) 31 | 32 | run.correct_scpt_result(scene_id=scene_id, json_dir=exp_root, out_dir=exp_root, 33 | tracking_params=tracking_params) 34 | 35 | def single_tracking(cam_id): 36 | global scene_id 37 | global embed_root 38 | global exp_root 39 | global tracking_parameters 40 | 41 | print(f"Started a background process to camera_{cam_id}\n") 42 | run.run_scpt(feature_data_root=f'{embed_root}/scene_{scene_id:03d}/camera_{cam_id:04d}', out_dir=exp_root, 43 | tracking_params=tracking_parameters) 44 | return 45 | 46 | def get_camera_ids(scene_id, json_f="tracking/config/scene_2_camera_id_file.json"): 47 | with open(json_f) as f: 48 | scene2camera = json.load(f) 49 | camera_ids = [] 50 | for scene_camera in scene2camera: 51 | if scene_camera["scene_name"] == f"scene_{scene_id:03d}": 52 | camera_ids = scene_camera["camera_ids"] 53 | break 54 | return camera_ids 55 | 56 | 57 | # Multi camera tracking, aka ReID 58 | def mcpt(tracking_params={}): 59 | global scene_id 60 | global exp_root 61 | global tracking_parameters 62 | tracking_parameters = tracking_params 63 | 64 | run.run_mcpt(scene_id=scene_id, json_dir=exp_root, out_dir=exp_root, tracking_params=tracking_parameters) 65 | run.correct_mcpt_result(scene_id=scene_id, json_dir=exp_root, out_dir=exp_root, tracking_params=tracking_parameters) 66 | 67 | def run_tracking(scene, embed, output, debug=False, tracking_params={}): 68 | """ 69 | Main routine 70 | """ 71 | global scene_id 72 | global embed_root 73 | global exp_root 74 | global output_root 75 | global camera_ids 76 | global exec_scpt 77 | global exec_mcpt 78 | 79 | if debug: 80 | print(f"### tracking parameters: {tracking_params}", flush=True) 81 | 82 | scene_id = scene 83 | embed_root = embed 84 | camera_ids = get_camera_ids(scene_id) 85 | print(f"Target scene ID: {scene_id}, camera IDs: {camera_ids}") 86 | 87 | # Configure output directory 88 | exp_root = os.path.join(output, f"scene_{scene_id:03d}") 89 | output_root = exp_root 90 | 91 | # Execute SCPT (Single Camera People Tracking) 92 | if exec_scpt: 93 | scpt_started = datetime.now() 94 | print(f"Start SCPT: {scpt_started}", flush=True) 95 | scpt(tracking_params=tracking_params) 96 | print(f"SCPT finished. Elapsed: {datetime.now()-scpt_started}", flush=True) 97 | 98 | # Execute MCPT (Multi Camera People Tracking) aka ReID 99 | if exec_mcpt: 100 | mcpt_started = datetime.now() 101 | print(f"Start MCPT: {mcpt_started}") 102 | mcpt(tracking_params=tracking_params) 103 | print(f"MCPT finished. Elapsed: {datetime.now()-mcpt_started}", flush=True) 104 | 105 | 106 | def get_parameters_to_scene(scene_id, param_file): 107 | if not os.path.isfile(param_file): 108 | print(f"'parameters_per_scene file does not exist. {param_file}") 109 | return {} 110 | 111 | sys.path.append("tracking/config") 112 | import parameters_per_scene as pps 113 | 114 | scene = int(scene_id) 115 | if scene in pps.parameters_per_scene: 116 | return pps.parameters_per_scene[scene] 117 | else: 118 | return {} 119 | 120 | def get_args(): 121 | parser = argparse.ArgumentParser(description='Offline Tracker Inferencing app.') 122 | parser.add_argument('-s', '--scene', type=int, required=True) 123 | parser.add_argument('-o', '--output', default="Tracking", type=str) 124 | parser.add_argument('-all', '--exec_all', action='store_true') 125 | parser.add_argument('-scpt', '--exec_scpt', action='store_true') 126 | parser.add_argument('-mcpt', '--exec_mcpt', action='store_true') 127 | 128 | return parser.parse_args() 129 | 130 | if __name__ == "__main__": 131 | global exec_scpt 132 | global exec_mcpt 133 | 134 | args = get_args() 135 | 136 | if args.exec_all or (not (args.exec_scpt | args.exec_mcpt)): 137 | exec_scpt = exec_mcpt = True 138 | else: 139 | exec_scpt = exec_mcpt = False 140 | if args.exec_scpt: 141 | exec_scpt = True 142 | if args.exec_mcpt: 143 | exec_mcpt = True 144 | 145 | # Default tracking parameter 146 | default_tracking_parameters = { 147 | "epsilon_scpt": 0.10, "time_period":3,"epsilon_mcpt": 0.37, "short_track_th":120, 148 | "keypoint_condition_th":1, "replace_similarity_by_wcoordinate":True, "distance_type":"min", 149 | "distance_th":10, "sim_th":0.85, "delete_gid_th":5000 150 | } 151 | 152 | scene = args.scene 153 | param_file = "tracking/config/parameters_per_scene.py" 154 | parameters = get_parameters_to_scene(scene, param_file) 155 | if len(parameters) > 0: 156 | tracking_parameters = parameters["tracking_parameters"] 157 | else: 158 | # Empty parameters to the scene, so use the default parameters. 159 | tracking_parameters = default_tracking_parameters 160 | embed_path = f"EmbedFeature" 161 | 162 | # Run offline tracking 163 | run_tracking(scene=scene, embed=embed_path, output=args.output, tracking_params=tracking_parameters) 164 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CVPRW2024: Overlap Suppression Clustering for Offline Multi-Camera People Tracking 2 | 3 | The highest HOTA submission in the 8th NVIDIA AI City Challenge (2024) Track 1: Multi-Camera People Tracking. This submission placed 2nd in the competition due to its offline tracking algorithm. 4 | [[Paper]](https://openaccess.thecvf.com/content/CVPR2024W/AICity/papers/Yoshida_Overlap_Suppression_Clustering__for_Offline_Multi-Camera_People_Tracking_CVPRW_2024_paper.pdf) 5 | 6 | ## Dataset Availability 7 | 8 | The official dataset can be downloaded from the AI City Challenge website (https://www.aicitychallenge.org/2024-data-and-evaluation/). You need to fill out the dataset request form to obtain the password to download them. 9 | 10 | Referring to the DATASET LICENSE AGREEMENT from the dataset author(s), we are not allowed to share the dataset. 11 | ``` 12 | 2.c. ... you may not copy, sell, rent, sublicense, transfer or distribute the DATASET, or share with others. 13 | ``` 14 | 15 | 16 | ## Ranking 17 | 18 | 19 | 20 | ## Overall Pipeline 21 | 22 | 23 | 24 | ## Environment Requirements 25 | 26 | The implementation of our work is built upon [BoT-SORT](https://github.com/NirAharon/BoT-SORT), [OpenMMLab](https://github.com/open-mmlab), and [torchreid](https://github.com/KaiyangZhou/deep-person-reid). 27 | 28 | Three different environments are required for the reproduction process. Please install these three environments according to the following repos: 29 | 30 | 1. [Install BoT-SORT for people detection](https://github.com/NirAharon/BoT-SORT#installation) 31 | 2. [Install torchreid for feature extraction](https://github.com/KaiyangZhou/deep-person-reid#installation) 32 | 3. [Install mmpose for pose estimation](https://mmpose.readthedocs.io/en/latest/installation.html) (*Please note that you need to have a version in the 0.x series for this to work.) 33 | 34 | We will provide you with the installation command for mmpose v0.29.0. 35 | Please note that this command may change due to updates or modifications in mmpose. 36 | ``` 37 | #step 1 38 | conda create --name openmmlab python=3.8 -y 39 | conda activate openmmlab 40 | 41 | #step 2 42 | conda install pytorch torchvision -c pytorch 43 | 44 | #step 3 45 | pip install -U openmim 46 | mim install mmengine 47 | mim install "mmcv==1.7.0" 48 | 49 | mim install "mmdet==2.28.2" 50 | 51 | #Build mmpose from source 52 | git clone https://github.com/open-mmlab/mmpose.git -b v0.29.0 --depth 1 53 | cd mmpose 54 | pip install -r requirements.txt 55 | pip install -v -e . 56 | ``` 57 | If you receive an mmcv AssertionError, please reinstall mmcv. 58 | ``` 59 | mim uninstall mmcv 60 | mim install "mmcv==1.7.0" 61 | ``` 62 | Once you installed above to the same machine, you'll see the root folder organized as follows: 63 | ``` 64 | root 65 | │ README.md 66 | │ ranking.jpg 67 | │ overall-pipeline.png 68 | │ 69 | ├─assets 70 | ├─detector 71 | ├─embedder 72 | ├─poser 73 | ├─scripts 74 | ├─tools 75 | ├─tracking 76 | │ 77 | ├─BoT-SORT 78 | ├─deep-person-reid 79 | └─mmpose 80 | ``` 81 | 82 | ## Training 83 | This project executes 84 | 1) Person detection 85 | 2) Feature extraction of each person 86 | 3) Pose estimation of each person, 87 | 88 | However we'll utilize pre-trained models for those. Therefore there's nothing to train. 89 | 90 | ## Running Tracking 91 | 92 | ### Preparation 93 | #### 0. Place your video files. 94 | 95 | Place your video files to under directory that corresponds to scene/camera IDs, such as Original///video.mp4. 96 | 97 | For example, you'd need to place camera-361 of scene-41 video file, run commands like below. Please don't forget to place whole video files of the scene you'd want to process. 98 | ``` 99 | mkdir -p Original/scene_041/camera_0361 100 | cp Original/scene_041/camera_0361/video.mp4 101 | ``` 102 | 103 | #### 1. Frame Extraction 104 | 105 | Run a command below to extract frame images. 106 | ``` 107 | sh scripts/extract_frame.sh 41 108 | ``` 109 | 110 | #### 2. Person Detection 111 | 112 | Run steps below for person detection. 113 | - Install BoT-SORT as instructed in Environment Requirement section above [here](#install). 114 | - Prepare Models. Download the pretrained YOLOX_x model from [ByteTrack [Google Drive]](https://drive.google.com/file/d/1P4mY0Yyd3PPTybgZkjMYhFri88nTmJX5/view), and store it to BoT-SORT directory. 115 | - Run person detection by executing a command below. 116 | ``` 117 | sh scripts/detection.sh 41 118 | ``` 119 | 120 | #### 3. Feature extraction 121 | 122 | Run steps below to get feature extraction. 123 | - Install deep-person-reid as instructed in Environment Requirement section above [here](#install). 124 | - Prepare Models. Download the pretrained deep-person-reid model from [torchreid](https://kaiyangzhou.github.io/deep-person-reid/MODEL_ZOO). By running a script mentioned below will download this pretrained model automatically and will store it accordingly. 125 | - Run feature extraction by executing a command below. 126 | ``` 127 | sh scripts/embedding.sh 41 128 | ``` 129 | 130 | #### 4. Pose estimation 131 | 132 | Run steps below to get pose estimation. 133 | - Install mmpose as instructed in Environment Requirement section above [here](#install). 134 | - Run pose estimation by executing a command below. 135 | ``` 136 | sh scripts/pose.sh 41 137 | ``` 138 | 139 | ### Single Camera People Tracking and Multi Camera People Tracking 140 | 141 | #### 5. Both Single Camera People Tracking and Multi Camera People Tracking 142 | 143 | Run command steps below to run both Single Camera People Tracking and Multi Camera People Tracking at once. 144 | ``` 145 | python3 -m venv .venv 146 | source .venv/bin/activate 147 | pip install -r tracking/requirements.txt 148 | sh scripts/tracking.sh 41 149 | ``` 150 | 151 | #### 6. Combine tracking results of each scenes for submission. 152 | 153 | Run command below to combine all results of each scenes. This will generate track1.txt under "Submission" directory. 154 | ``` 155 | python3 tools/generate_submission.py 156 | ``` 157 | -------------------------------------------------------------------------------- /poser/top_down_video_demo_with_track_file.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os 3 | import warnings 4 | from argparse import ArgumentParser 5 | 6 | import cv2 7 | import mmcv 8 | import json 9 | import numpy as np 10 | 11 | from mmpose.apis import (collect_multi_frames, inference_top_down_pose_model, 12 | init_pose_model, process_mmdet_results, 13 | vis_pose_result) 14 | from mmpose.datasets import DatasetInfo 15 | 16 | try: 17 | from mmdet.apis import inference_detector, init_detector 18 | has_mmdet = True 19 | except (ImportError, ModuleNotFoundError): 20 | has_mmdet = False 21 | 22 | from load_tracking_result import load_tracking 23 | 24 | 25 | def main(): 26 | """Visualize the demo video (support both single-frame and multi-frame). 27 | 28 | Using mmdet to detect the human. 29 | """ 30 | parser = ArgumentParser() 31 | parser.add_argument('track_result', help='Track result file') 32 | parser.add_argument('pose_config', help='Config file for pose') 33 | parser.add_argument('pose_checkpoint', help='Checkpoint file for pose') 34 | parser.add_argument('--video-path', type=str, help='Video path') 35 | parser.add_argument( 36 | '--show', 37 | action='store_true', 38 | default=False, 39 | help='whether to show visualizations.') 40 | parser.add_argument( 41 | '--out-video-root', 42 | default='', 43 | help='Root of the output video file. ' 44 | 'Default not saving the visualization video.') 45 | parser.add_argument( 46 | '--device', default='cuda:0', help='Device used for inference') 47 | parser.add_argument( 48 | '--det-cat-id', 49 | type=int, 50 | default=1, 51 | help='Category id for bounding box detection model') 52 | parser.add_argument( 53 | '--bbox-thr', 54 | type=float, 55 | default=0.3, 56 | help='Bounding box score threshold') 57 | parser.add_argument( 58 | '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold') 59 | parser.add_argument( 60 | '--radius', 61 | type=int, 62 | default=4, 63 | help='Keypoint radius for visualization') 64 | parser.add_argument( 65 | '--thickness', 66 | type=int, 67 | default=1, 68 | help='Link thickness for visualization') 69 | parser.add_argument( 70 | '--use-multi-frames', 71 | action='store_true', 72 | default=False, 73 | help='whether to use multi frames for inference in the pose' 74 | 'estimation stage. Default: False.') 75 | parser.add_argument( 76 | '--online', 77 | action='store_true', 78 | default=False, 79 | help='inference mode. If set to True, can not use future frame' 80 | 'information when using multi frames for inference in the pose' 81 | 'estimation stage. Default: False.') 82 | parser.add_argument( 83 | '--out-file', 84 | type=str 85 | ) 86 | 87 | 88 | assert has_mmdet, 'Please install mmdet to run the demo.' 89 | 90 | args = parser.parse_args() 91 | 92 | # assert args.show or (args.out_video_root != '') 93 | 94 | print('Initializing model...') 95 | # # build the detection model from a config file and a checkpoint file 96 | # det_model = init_detector( 97 | # args.det_config, args.det_checkpoint, device=args.device.lower()) 98 | track_results = load_tracking(args.track_result) 99 | 100 | # build the pose model from a config file and a checkpoint file 101 | pose_model = init_pose_model( 102 | args.pose_config, args.pose_checkpoint, device=args.device.lower()) 103 | 104 | dataset = pose_model.cfg.data['test']['type'] 105 | # get datasetinfo 106 | dataset_info = pose_model.cfg.data['test'].get('dataset_info', None) 107 | if dataset_info is None: 108 | warnings.warn( 109 | 'Please set `dataset_info` in the config.' 110 | 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.', 111 | DeprecationWarning) 112 | else: 113 | dataset_info = DatasetInfo(dataset_info) 114 | 115 | # read video 116 | video = mmcv.VideoReader(args.video_path) 117 | assert video.opened, f'Faild to load video file {args.video_path}' 118 | 119 | if args.out_video_root == '': 120 | save_out_video = False 121 | else: 122 | os.makedirs(args.out_video_root, exist_ok=True) 123 | save_out_video = True 124 | 125 | if save_out_video: 126 | fps = video.fps 127 | size = (video.width, video.height) 128 | fourcc = cv2.VideoWriter_fourcc(*'mp4v') 129 | videoWriter = cv2.VideoWriter( 130 | os.path.join(args.out_video_root, 131 | f'vis_{os.path.basename(args.video_path)}'), fourcc, 132 | fps, size) 133 | 134 | # frame index offsets for inference, used in multi-frame inference setting 135 | if args.use_multi_frames: 136 | assert 'frame_indices_test' in pose_model.cfg.data.test.data_cfg 137 | indices = pose_model.cfg.data.test.data_cfg['frame_indices_test'] 138 | 139 | # whether to return heatmap, optional 140 | return_heatmap = False 141 | 142 | # return the output of some desired layers, 143 | # e.g. use ('backbone', ) to return backbone feature 144 | output_layer_names = None 145 | 146 | save_results = {} 147 | 148 | print('Running inference...') 149 | for frame_id, cur_frame in enumerate(mmcv.track_iter_progress(video)): 150 | # get the detection results of current frame 151 | # the resulting box is (x1, y1, x2, y2) 152 | # mmdet_results = inference_detector(det_model, cur_frame) 153 | 154 | # # keep the person class bounding boxes. 155 | # person_results = process_mmdet_results(mmdet_results, args.det_cat_id) 156 | if frame_id not in track_results.keys(): 157 | continue 158 | person_results = track_results[frame_id] 159 | 160 | if args.use_multi_frames: 161 | frames = collect_multi_frames(video, frame_id, indices, 162 | args.online) 163 | 164 | # test a single image, with a list of bboxes. 165 | pose_results, returned_outputs = inference_top_down_pose_model( 166 | pose_model, 167 | frames if args.use_multi_frames else cur_frame, 168 | person_results, 169 | bbox_thr=args.bbox_thr, 170 | format='xyxy', 171 | dataset=dataset, 172 | dataset_info=dataset_info, 173 | return_heatmap=return_heatmap, 174 | outputs=output_layer_names) 175 | 176 | save_results[frame_id] = pose_results 177 | # show the results 178 | vis_frame = vis_pose_result( 179 | pose_model, 180 | cur_frame, 181 | pose_results, 182 | dataset=dataset, 183 | dataset_info=dataset_info, 184 | kpt_score_thr=args.kpt_thr, 185 | radius=args.radius, 186 | thickness=args.thickness, 187 | show=False) 188 | 189 | if args.show: 190 | cv2.imshow('Frame', vis_frame) 191 | 192 | if save_out_video: 193 | videoWriter.write(vis_frame) 194 | 195 | if args.show and cv2.waitKey(1) & 0xFF == ord('q'): 196 | break 197 | 198 | class NumpyEncoder(json.JSONEncoder): 199 | """ Special json encoder for numpy types """ 200 | def default(self, obj): 201 | if isinstance(obj, np.integer): 202 | return int(obj) 203 | elif isinstance(obj, np.floating): 204 | return float(obj) 205 | elif isinstance(obj, np.ndarray): 206 | return obj.tolist() 207 | return json.JSONEncoder.default(self, obj) 208 | os.makedirs(os.path.dirname(args.out_file), exist_ok=True) 209 | json.dump(save_results, open(args.out_file, 'w'), cls=NumpyEncoder) 210 | 211 | if save_out_video: 212 | videoWriter.release() 213 | if args.show: 214 | cv2.destroyAllWindows() 215 | 216 | 217 | if __name__ == '__main__': 218 | main() 219 | -------------------------------------------------------------------------------- /tracking/src/tracking.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import tqdm 4 | from sklearn.cluster import DBSCAN 5 | from scipy.spatial.distance import cdist 6 | from sklearn.metrics.pairwise import cosine_similarity 7 | 8 | from utils import DetectedObjects 9 | from scpt import * 10 | from mcpt import * 11 | 12 | class Tracker(): 13 | """ 14 | This class represents YOTM, aka Yoshida Offline Tracking Method. 15 | """ 16 | def __init__(self, params={}): 17 | self.camera_ids = [] 18 | self.tracking_dicts = {} 19 | self._init_parameters() 20 | self.update_parameters(**params) 21 | self.frame_period = self.parameters["time_period"] * self.parameters["fps"] 22 | 23 | def _init_parameters(self): 24 | 25 | #self.parameters[""]: = 26 | self.parameters = {} 27 | self.parameters["image_size"] = (1920,1080) 28 | 29 | # sct parameters 30 | self.parameters["time_period"]:int = 3 31 | self.parameters["fps"]:int = 30 32 | self.parameters["epsilon_scpt"]:float = 0.1 33 | self.parameters["min_samples"]:int = 4 34 | self.parameters["remove_noise_cluster"]:bool = True 35 | self.parameters["overlap_suppression"]:bool = True 36 | self.parameters["num_candidates"]:int = 10 37 | self.parameters["clustering_method"]:str = "agglomerative" #agglomerative or dbsacn 38 | self.parameters["debug"]:bool = False 39 | 40 | #fix_sct parameters 41 | self.parameters["sequential_nms"]:bool = True 42 | self.parameters["temporally_snms_th"]:float = 0.6 43 | self.parameters["spatially_snms_th"]:float = 0.6 44 | self.parameters["merge_nonoverlap"]:bool = True 45 | 46 | self.parameters["separate_warp"]:bool = True 47 | self.parameters["warp_th"]:int = 40 48 | self.parameters["alpha"]:float = 0.5 49 | 50 | self.parameters["exclude_short_track"]:bool = False 51 | self.parameters["short_tracklet_th"]:int = 120 52 | 53 | self.parameters["exclude_motionless_track"]:bool = False 54 | self.parameters["stop_track_th"]:int = 25 55 | 56 | # mct parameters 57 | self.parameters["epsilon_mcpt"]:float = 0.4 58 | self.parameters["keypoint_th"]:float = 0.8 59 | self.parameters["keypoint_condition_th"]:float = 1 60 | self.parameters["distance_th"]:int = 5 61 | 62 | self.parameters["check_sc_overlap"]:bool = False 63 | self.parameters["distance_type"]:str = "max" #max or mean or min 64 | self.parameters["replace_similarity_by_wcoordinate"]:bool = False 65 | self.parameters["replace_value"]: float = -10 66 | self.parameters["representative_selection_method"]:str = "keypoint" #keypoint or centrality 67 | self.parameters["aspect_th"]:float =0.5 68 | 69 | # fix mct parameters 70 | self.parameters["reassign_global_id"]:bool = True 71 | self.parameters["short_track_th"]:int = 120 72 | self.parameters["delete_gid_th"]:int = 6000 73 | self.parameters["assign_all_tracklet"]:bool = False 74 | self.parameters["sim_th"]:float = 0.75 75 | self.parameters["delete_few_camera_cluster"]:bool = False 76 | 77 | self.parameters["measure_wcoordinate"]:bool = False 78 | 79 | self.parameters["remove_noise_image"]:bool = True 80 | 81 | self.parameters["delete_distant_person"]:bool = True 82 | 83 | self.parameters["interpolate_track"]:bool = True 84 | self.parameters["max_interpolate_interval"]:int = 15 85 | 86 | 87 | def update_parameter(self, parameter, value): 88 | if not parameter in self.parameters: 89 | print(f"Unknown parameter: {parameter}.") 90 | sys.exit() 91 | return 92 | self.parameters[parameter] = value 93 | 94 | def update_parameters(self, **params): 95 | for key in params: 96 | self.update_parameter(key, params[key]) 97 | 98 | def scpt(self, tracking_dict): 99 | """ 100 | This performs object tracking with single camera dataset. 101 | Most of code below are just copied from '20240214_OfflineTracking-Debug.ipynb' and tweaked few. 102 | """ 103 | 104 | frame_period = self.parameters["time_period"] * self.parameters["fps"] 105 | epsilon = self.parameters["epsilon_scpt"] 106 | 107 | max_offlineid = -1 108 | last_frame = get_max_value_of_dict(tracking_dict, "Frame") 109 | time_section_serial_dict = {timesection:[] for timesection in range(last_frame//frame_period+1) } 110 | 111 | for serial in tracking_dict.keys(): 112 | frame = tracking_dict[serial]["Frame"] 113 | time_section = frame // frame_period 114 | time_section_serial_dict[time_section].append(serial) 115 | 116 | for time_section in range(last_frame//frame_period+1): 117 | serials = time_section_serial_dict[time_section] 118 | if len(serials) == 0: continue 119 | clusters = tracking_by_clustering(tracking_dict,serials, **self.parameters) 120 | 121 | clusters = [cluster+max_offlineid+1 if cluster != -1 else -i for i,cluster in enumerate(clusters)] 122 | max_offlineid = max(clusters) if max(clusters) > 0 else max_offlineid 123 | 124 | if time_section == 0: 125 | for serial,cluster in zip(serials,clusters): 126 | tracking_dict[serial]["OfflineID"] = int(cluster) 127 | elif time_section > 0: 128 | past_serials = time_section_serial_dict[time_section-1] 129 | tracking_dict = associate_cluster_between_period(tracking_dict, clusters, serials, past_serials, **self.parameters) 130 | 131 | # We have tracking results in TrackingDict, yet will gather results for debugging. Could be deleted. 132 | offline_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict] 133 | new_offline_ids_dict = {key:i for i,key in enumerate(set(offline_ids)) if key != -1} 134 | new_offline_ids_dict[-1] = -1 135 | 136 | for serial in tracking_dict: 137 | offline_id = tracking_dict[serial]["OfflineID"] 138 | tracking_dict[serial]["OfflineID"] = new_offline_ids_dict[offline_id] 139 | 140 | return tracking_dict 141 | 142 | def correcting_scpt_result(self,tracking_dict,**kwargs): 143 | 144 | sequential_nms = self.parameters["sequential_nms"] 145 | separate_warp = self.parameters["separate_warp"] 146 | exclude_short_track = self.parameters["exclude_short_track"] 147 | exclude_motionless_track = self.parameters["exclude_motionless_track"] 148 | print("sequential_nms:",sequential_nms) 149 | print("separate_warp:",separate_warp) 150 | print("exclude_short_track:",exclude_short_track) 151 | print("exclude_motionless_track:",exclude_motionless_track) 152 | 153 | if sequential_nms: 154 | tracking_dict = sequential_non_maximum_suppression(tracking_dict, **self.parameters) 155 | if separate_warp: 156 | tracking_dict = separate_warp_tracklet(tracking_dict, **self.parameters) 157 | if exclude_short_track: 158 | tracking_dict = exclude_short_tracklet(tracking_dict, **self.parameters) 159 | if exclude_motionless_track: 160 | tracking_dict = exclude_motionless_tracklet(tracking_dict, **self.parameters) 161 | return tracking_dict 162 | 163 | def mcpt(self,scene_id, json_dir,out_dir): 164 | epsilon = self.parameters["epsilon_mcpt"] 165 | 166 | if not os.path.isdir(json_dir): 167 | raise Exception(f"The directory '{json_dir}' does not exist.") 168 | if out_dir == None: 169 | out_dir = json_dir 170 | tracking_results = {} 171 | json_files = [f for f in os.listdir(json_dir) if os.path.splitext(f)[1].lower() == ".json" and f.startswith("fixed_camera")] 172 | json_files = sorted(json_files) 173 | for json_file in json_files: 174 | camera_id = int(json_file.split("_")[1][6:]) 175 | with open(os.path.join(json_dir, json_file)) as f: 176 | tracking_dict = json.load(f) 177 | print(f"{json_file} len(serials):{len(tracking_dict)}") 178 | tracking_results[camera_id] = tracking_dict 179 | tracking_results = multi_camera_people_tracking(tracking_results, scene_id=scene_id, json_dir=json_dir, out_dir=out_dir, **self.parameters) 180 | 181 | return tracking_results 182 | 183 | def correcting_mcpt_result(self,scene_id,tracking_results,represntative_nodes,**kwargs): 184 | reassign_global_id = self.parameters["reassign_global_id"] 185 | measure_wcoordinate = self.parameters["measure_wcoordinate"] 186 | interpolate_track = self.parameters["interpolate_track"] 187 | remove_noise_image = self.parameters["remove_noise_image"] 188 | delete_distant_person = self.parameters["delete_distant_person"] 189 | print("reassign_global_id:",reassign_global_id) 190 | print("measure_wcoordinate:",measure_wcoordinate) 191 | print("interpolate_track:",interpolate_track) 192 | print("delete_distant_person:",delete_distant_person) 193 | 194 | if reassign_global_id: 195 | tracking_results = global_id_reassignment(tracking_results,represntative_nodes,scene_id,**self.parameters) 196 | if measure_wcoordinate: 197 | tracking_results = measure_world_coordinate(scene_id,tracking_results,**self.parameters) 198 | if remove_noise_image: 199 | tracking_results = remove_noise_images(scene_id,tracking_results,**self.parameters) 200 | if delete_distant_person: 201 | tracking_results = delete_distant_persons(tracking_results,**self.parameters) 202 | if interpolate_track: 203 | tracking_results = interpolate_tracklet(tracking_results,represntative_nodes,**self.parameters) 204 | 205 | return tracking_results 206 | -------------------------------------------------------------------------------- /tracking/src/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import json 4 | import glob 5 | 6 | 7 | class DetectedObjects: 8 | """ 9 | Represents whole detected objects to track. 10 | Object dict is built by frame_id as a key and its entity contains a list of all Detected objects of the frame. 11 | """ 12 | def __init__(self): 13 | self.num_objects = 0 14 | self.objects = {} 15 | self._objects_registered = {} 16 | #self.scene_id = scene_id 17 | #self.camera_id = -1 18 | self.camera_projection_matrix = None 19 | self.homography_matrix = None 20 | 21 | def __str__(self): 22 | return f"DetectedObjects: scene_id:{self.scene_id}, camera_id:{self.camera_id}, num_objects:{self.num_objects}" 23 | 24 | def load_from_directory(self, feature_root, calibration_path="Calibration"): 25 | if not os.path.isdir(feature_root): 26 | raise Exception(f'There is no directory to read from. {feature_root}') 27 | npys = sorted(glob.glob(os.path.join(feature_root, "**/*.npy"), recursive=True)) 28 | scene_id = None 29 | camera_id = None 30 | path_list = feature_root.split("/") 31 | for dir in path_list: 32 | if dir.startswith("scene_"): 33 | scene_id = int(dir.replace("scene_","")) 34 | if dir.startswith("camera_"): 35 | camera_id = int(dir.replace("camera_","")) 36 | if scene_id is not None and camera_id is not None: 37 | calibration_path = f"Original/scene_{scene_id:03d}/camera_{camera_id:04d}/calibration.json" 38 | self.load_calibration(calibration_path) 39 | else: 40 | print(f'\033[33mwarning\033[0m : failed to get scene_id and camera_id from feature path.') 41 | print(f'\033[33mwarning\033[0m : world coordinate calculations are ignored.') 42 | 43 | 44 | # Below is to parse camera id from the path, we're probably not going to use it though. 45 | #camera_id = None 46 | #dirs = npys[0].split("/") 47 | #if len(dirs) < 2: 48 | # print(f"Cannot prop camera id from input path. {feature_path}") 49 | #else: 50 | # camera_id = dirs[-1] 51 | # if "Camera" in camera_id: 52 | # self.camera_id = int(camera_id[len("Camera"):]) 53 | 54 | for f in npys: 55 | self.add_object_from_image_path(f) 56 | 57 | def add_object(self, frame_id, coordinate, world_coordinate, confidence, feature_path, image_path=None): 58 | if isinstance(frame_id, str): 59 | frame_id = int(frame_id) 60 | 61 | # Check if coordinate is reasonable 62 | if coordinate.x1 >= coordinate.x2 or coordinate.y1 >= coordinate.y2: 63 | print(f"Unnatural coordinate found in frame {frame_id}: {coordinate}") 64 | return 65 | 66 | detected_obj = DetectedObject(object_id=self.num_objects, frame_id=frame_id, coordinate=coordinate, worldcoordinate=world_coordinate, 67 | confidence=confidence, feature_path=feature_path) 68 | key = f"{coordinate.x1}_{coordinate.y1}_{coordinate.x2}_{coordinate.y2}" 69 | if frame_id in self.objects: 70 | if not key in self._objects_registered[frame_id]: 71 | objects_per_frame = self.objects[frame_id].append(detected_obj) 72 | self._objects_registered[frame_id].append(key) 73 | else: 74 | print(f"Duplicate coord found in frame {frame_id}: {coordinate}") 75 | return 76 | else: 77 | objects_per_frame = self.objects[frame_id] = [detected_obj] 78 | self._objects_registered[frame_id] = [key] 79 | self.num_objects += 1 80 | 81 | def add_object_from_image_path(self, feature_path, image_path=None, calibration_path="Calibration"): 82 | file_path = os.path.basename(feature_path) 83 | if file_path.startswith("feature_"): 84 | _, frame_id, serial_no, x1, x2, y1, y2, conf = os.path.splitext(file_path)[0].split("_") 85 | conf = conf if len(conf) == 1 else conf[0]+"."+conf[1:] 86 | else: 87 | serial_no, frame_id, x1, x2, y1, y2 = os.path.splitext(file_path)[0].split("_") 88 | x1, x2, y1, y2 = int(x1.replace("x","")), int(x2), int(y1.replace("y","")), int(y2) 89 | conf = 0.98765 # Dummy 90 | World_coordinate = None 91 | if self.homography_matrix is not None: 92 | w_x, w_y = self.convert_coordinates_2world((int(float(x1)) + int(float(x2))) / 2, int(float(y2))) 93 | World_coordinate = WorldCoordinate(w_x, w_y) 94 | 95 | self.add_object(frame_id=int(frame_id), coordinate=Coordinate(x1, y1, x2, y2), world_coordinate=World_coordinate, 96 | confidence=float(conf), feature_path=feature_path, image_path=image_path) 97 | 98 | def get_objects_of_frames(self, start_frame, end_frame): 99 | if start_frame > self.num_frames() or end_frame > self.num_frames(): 100 | return None 101 | object_dict = {} 102 | for frame_id in range(start_frame, end_frame): 103 | if frame_id in self.objects: 104 | object_dict[frame_id] = self[frame_id] 105 | #else: 106 | # print(f"There is no such frame in the DetectedObjects, will be ignored. frame_id: {frame_id}") 107 | return object_dict 108 | 109 | def get_object_ids_of_frames(self, start_frame, end_frame): 110 | """ 111 | Returns a list of detected object IDs that appeared within the specified frame window. 112 | """ 113 | if start_frame > self.num_frames() or end_frame > self.num_frames(): 114 | return None 115 | object_ids = [] 116 | for frame_id in range(start_frame, end_frame): 117 | if frame_id in self.objects: 118 | for det in self[frame_id]: 119 | object_ids.append(det.object_id) 120 | return sorted(object_ids) 121 | 122 | def __getitem__(self, frame_id): 123 | if frame_id in self.objects: 124 | return self.objects[frame_id] 125 | else: 126 | return None 127 | 128 | def num_frames(self): 129 | """ 130 | Returns number of frames that currently holding. 131 | """ 132 | return len(self.objects) 133 | 134 | def last_frame_id(self): 135 | """ 136 | Returns the last frame id. 137 | """ 138 | return max(self.objects.keys()) 139 | 140 | def to_trackingdict(self): 141 | """ 142 | Compatibility function to convert detections in TrackingDict format. 143 | """ 144 | track_dict = {} 145 | for frame_id in self.objects: 146 | for detected_object in self.objects[frame_id]: 147 | serial_no = detected_object.object_id 148 | coordinate = json.loads(detected_object.coordinate.__str__()) 149 | if detected_object.worldcoordinate.__str__() != "None": 150 | world_coordinate = json.loads(detected_object.worldcoordinate.__str__()) 151 | else: 152 | world_coordinate = None 153 | new_object = { "Frame": frame_id, "NpyPath": detected_object.feature_path, 154 | "Coordinate": coordinate, "WorldCoordinate": world_coordinate, "OfflineID": -1 } #"ClusterID": None, 155 | track_dict[serial_no] = new_object 156 | return track_dict 157 | 158 | def load_calibration(self, calib_path): 159 | if os.path.isfile(calib_path): 160 | with open(calib_path, 'r') as file: 161 | data = json.load(file) 162 | self.camera_projection_matrix = np.array(data["camera projection matrix"]) 163 | self.homography_matrix = np.array(data["homography matrix"]) 164 | else: 165 | print(f'\033[33mwarning\033[0m : not found Calibration File.') 166 | print(f'\033[33mwarning\033[0m : world coordinate calculations are ignored.') 167 | 168 | def convert_coordinates_2world(self, x, y): 169 | vector_xyz = np.array([x, y, 1]) # z=1 170 | vector_xyz_3d = np.dot(np.linalg.inv(self.homography_matrix), vector_xyz.T) 171 | return vector_xyz_3d[0] / vector_xyz_3d[2], vector_xyz_3d[1] / vector_xyz_3d[2] 172 | 173 | class DetectedObject: 174 | """ 175 | Represents individual detected object to track. 176 | """ 177 | def __init__(self, object_id, frame_id, coordinate, confidence, worldcoordinate, feature_path, image_path=None): 178 | self.object_id = f"{object_id:08d}" # AKA serial number 179 | self.frame_id = frame_id 180 | self.feature_path = feature_path 181 | self.confidence = confidence 182 | self.image_path = image_path 183 | if isinstance(coordinate, Coordinate): 184 | self.coordinate = coordinate 185 | elif isinstance(coordinate, (list, tuple)) and len(coordinate) == 4: 186 | self.coordinate = Coordinate(*coordinate) 187 | else: 188 | raise Exception(f"Unknown coordinate format: {coordinate}") 189 | 190 | if isinstance(worldcoordinate, WorldCoordinate): 191 | self.worldcoordinate = worldcoordinate 192 | elif isinstance(worldcoordinate, (list, tuple)) and len(worldcoordinate) == 4: 193 | self.worldcoordinate = WorldCoordinate(*worldcoordinate) 194 | else: 195 | self.worldcoordinate = None 196 | 197 | class Coordinate: 198 | def __init__(self, x1, y1, x2, y2): 199 | self.x1 = int(float(x1)) 200 | self.y1 = int(float(y1)) 201 | self.x2 = int(float(x2)) 202 | self.y2 = int(float(y2)) 203 | 204 | def __str__(self): 205 | return(f'{{"x1":{self.x1}, "y1":{self.y1}, "x2":{self.x2}, "y2":{self.y2}}}') 206 | 207 | class WorldCoordinate: 208 | def __init__(self, x, y): 209 | self.x = float(x) 210 | self.y = float(y) 211 | 212 | def __str__(self): 213 | return(f'{{"x":{self.x}, "y":{self.y}}}') 214 | 215 | class TrackingCluster: 216 | def __init__(self, camera_id, offline_id): 217 | self.camera_id = camera_id 218 | self.offline_id = 0 219 | self.global_offline_id = -1 220 | self.clusters = {} 221 | self.serials = [] 222 | 223 | def add(self, serial): 224 | if serial in self.serials: 225 | raise Exception("DUP!") 226 | self.serials.append(serial) 227 | 228 | 229 | class TrackingClusters: 230 | def __init__(self, camera_id): 231 | self.camera_id = camera_id 232 | self.clusters = [] 233 | self.offline_ids = [] 234 | 235 | def add(self, cluster: TrackingCluster): 236 | cl_id = cluster.offline_id 237 | if cl_id in self.offline_ids: 238 | raise Exception("DUP!") 239 | else: 240 | self.clusters.append(cluster) 241 | 242 | def get(self, cluster_id): 243 | if not cluster_id in self.offline_ids: 244 | raise Exception("No cluster_id registered. {cluster_id}") 245 | else: 246 | return self.clusters[offline_ids.index(cluster_id)] 247 | 248 | class feature_vector_shed: 249 | def __init__(self): 250 | self.features = {} 251 | 252 | def add_vector(self, camera_id, serial_no, npy_path): 253 | key = camera_id + "_" + serial_no 254 | if key in self.features: 255 | print(f"Feature vector of camera ID '{camera_id}' and serial no '{serial_no}' is already exist. ") 256 | return 257 | 258 | if not os.path.isfile(npy_path): 259 | print(f"The feature vector file '{npy_path}' does not exist. ") 260 | return 261 | feature = np.load(npy_path) 262 | self.features[key] = feature 263 | 264 | def get(self, camera_id, serial_no): 265 | key = camera_id + "_" + serial_no 266 | return self.features[key] 267 | -------------------------------------------------------------------------------- /tracking/src/pose.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import json 4 | import cv2 5 | 6 | class PoseKeypoints: 7 | def __init__(self, keypoint_json): 8 | self.kp_indice_foot = [15, 16] # ankles 9 | self.kp_indice_torso = [5, 6, 11, 12, 13, 14] # shoulders, hips, knees 10 | self.kp_indice_torso_legs = [5, 6, 11, 12, 13, 14, 15, 16] # shoulders, hips, knees, ankles 11 | 12 | self._parse_keypoint_json(keypoint_json) 13 | self.serial_dict = {} 14 | 15 | def _parse_keypoint_json(self, file_path): 16 | if os.path.isfile(file_path): 17 | with open(file_path, 'r') as file: 18 | data = json.load(file) 19 | self.keypoints = data 20 | else: 21 | raise Exception(f"Keypoint json file '{file_path}' does not exist.") 22 | 23 | def filter(self, keypoints=None, score_thr=0.3, target_parts="torso_legs", max_frames=0): 24 | filtered = {} 25 | if keypoints == None: 26 | keypoints = self.keypoints 27 | for i, frame in enumerate(keypoints): 28 | if max_frames != 0 and i >= max_frames: 29 | break 30 | detections = keypoints[frame] 31 | target_indices = self.kp_indice_torso if target_parts == "torso" else self.kp_indice_torso_legs 32 | for det in detections: 33 | kps = det["keypoints"] 34 | confidences = [k for i2, k in enumerate(kps) if i2 in target_indices and k[2] >= score_thr] 35 | if len(confidences) < (len(target_indices)): 36 | continue 37 | 38 | pose_entity = [det["bbox"], ] 39 | if int(frame) in filtered: 40 | filtered[frame].append(det) 41 | else: 42 | filtered[frame] = [det] 43 | print(f"Num of filtered results: {len(filtered)}") 44 | return filtered 45 | 46 | def summary(self): # Just show top_n data 47 | if len(self.keypoints) <= 0: 48 | print(f"Empty keypoints") 49 | return 50 | print(f"Number of frames: {len(self.keypoints)}") 51 | 52 | def get_keypoints(self, serial:str): 53 | """ 54 | This must be called after assign_serial_from_tracking_dict() was called, 55 | as it builds a dictionary with "serial" number as keys. 56 | 57 | serial: zero-filled 8-digit string 58 | """ 59 | if isinstance(serial, int): 60 | serial = f"{serial:08d}" 61 | elif isinstance(serial, str) and len(serial) != 8: 62 | serial = f"{int(serial):08d}" 63 | if len(self.serial_dict) <= 0: 64 | raise Exception(f"Serial based dictionary is not built yet.") 65 | if serial in self.serial_dict: 66 | return self.serial_dict[serial] 67 | else: 68 | return None 69 | 70 | def _build_serial_dict(self, keypoints=None): 71 | """ 72 | This must be called after assign_serial_from_tracking_dict() was called, 73 | as it builds a dictionary with "serial" number as keys. 74 | """ 75 | if len(keypoints) == 0: 76 | return None 77 | serial_dict = {} 78 | if keypoints == None: 79 | keypoints = self.keypoints 80 | for i, frame in enumerate(keypoints): 81 | detections = keypoints[frame] 82 | for det in detections: 83 | if "serial" in det: 84 | serial = det["serial"] 85 | if serial in serial_dict: 86 | print(f"DUP in serial numbers!!") 87 | else: 88 | serial_dict[serial] = {"bbox": det["bbox"], "Keypoints": det["keypoints"]} 89 | self.serial_dict = serial_dict 90 | 91 | return self.serial_dict 92 | 93 | def assign_serial_from_tracking_dict(self, tracking_dict, keypoints=None): 94 | """ 95 | tracking_dict: dictionary of tracking_dict or path to tracking_dict json file. 96 | """ 97 | if keypoints == None: 98 | keypoints = self.keypoints 99 | if isinstance(tracking_dict, str): 100 | if os.path.isfile(tracking_dict): 101 | with open(tracking_dict) as f: 102 | tracking_dict = json.load(f) 103 | tracking_coord = {} 104 | for serial in tracking_dict: 105 | td_coord = tracking_dict[serial]["Coordinate"] 106 | td_frame = tracking_dict[serial]["Frame"] 107 | key = f"{td_frame}_{td_coord['x1']}_{td_coord['y1']}_{td_coord['x2']}_{td_coord['y2']}" 108 | if key in tracking_coord: 109 | continue #raise Exception(f"DUP! {key}") 110 | tracking_coord[key] = serial 111 | for frame in keypoints: 112 | detections = keypoints[frame] 113 | 114 | for det in detections: 115 | bbox = det["bbox"] 116 | key = f"{int(frame)}_{int(bbox[0])}_{int(bbox[1])}_{int(bbox[2])}_{int(bbox[3])}" 117 | if key in tracking_coord: 118 | det["serial"] = tracking_coord[key] 119 | else: 120 | #print(f"No tracking found for bbox: {key}, {bbox}") 121 | pass 122 | 123 | # Build dict with serial as key 124 | return self._build_serial_dict(keypoints=keypoints) 125 | 126 | def show_footpoints(self, keypoints=None, frame_img_root="Frames", output_mp4=None, score_thr=0.3, target_parts="torso_legs", max_frames=0): # Generate mp4 127 | # Creating mp4 128 | if output_mp4 == None: 129 | output_mp4 = f"foot_points.mp4" 130 | fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') 131 | video_wtr = cv2.VideoWriter(output_mp4, fourcc=fourcc, fps=30.0, frameSize=(1280, 960)) 132 | if not video_wtr.isOpened(): 133 | print(f"Cannot open video writer.") 134 | return 135 | 136 | filtered = self.filter(keypoints=keypoints, score_thr=score_thr, target_parts=target_parts, max_frames=max_frames) 137 | for frame in filtered: 138 | # Read frame image file 139 | frame_img_path = os.path.join(frame_img_root, f"{int(frame):06d}.jpg") 140 | frame_img = cv2.imread(frame_img_path) 141 | detections = self.keypoints[frame] 142 | target_indices = self.kp_indice_torso if target_parts == "torso" else self.kp_indice_torso_legs 143 | for det in detections: 144 | keypoints = det["keypoints"] 145 | left_ankle, right_ankle = keypoints[self.kp_indice_foot[0]], keypoints[self.kp_indice_foot[1]] 146 | if float(left_ankle[2]) >= score_thr: 147 | color = (0, 255, 0) 148 | else: 149 | #print(f"Low confidence on KP[15]: {float(fp1[2])}") 150 | color = (0, 0, 255) 151 | cv2.circle(frame_img, (int(left_ankle[0]), int(left_ankle[1])), 5, color, 3) 152 | 153 | if float(right_ankle[2]) >= score_thr: 154 | color = (0, 255, 0) 155 | else: 156 | #print(f"Low confidence on KP[16]: {float(fp2[2])}") 157 | color = (0, 0, 255) 158 | cv2.circle(frame_img, (int(right_ankle[0]), int(right_ankle[1])), 5, color, 3) 159 | frame_img = cv2.resize(frame_img, (1280, 960)) 160 | video_wtr.write(frame_img) 161 | video_wtr.release() 162 | print(f"Saved video file: {output_mp4}\n") 163 | 164 | def show_footpoints_custom(self, frame_img_root="Frames", output_mp4=None, score_thr=0.3, target_parts="torso_legs"): # Generate mp4 165 | # Creating mp4 166 | if output_mp4 == None: 167 | output_mp4 = f"foot_points.mp4" 168 | fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') 169 | video_wtr = cv2.VideoWriter(output_mp4, fourcc=fourcc, fps=30.0, frameSize=(1280, 960)) 170 | if not video_wtr.isOpened(): 171 | print(f"Cannot open video writer.") 172 | return 173 | 174 | for i, frame in enumerate(self.keypoints): 175 | if i >= 300: # only 10-sec, just for debug 176 | break 177 | # Read frame image file 178 | frame_img_path = os.path.join(frame_img_root, f"{int(frame):06d}.jpg") 179 | frame_img = cv2.imread(frame_img_path) 180 | detections = self.keypoints[frame] 181 | target_indices = self.kp_indice_torso if target_parts == "torso" else self.kp_indice_torso_legs 182 | for det in detections: 183 | keypoints = det["keypoints"] 184 | confidences = [k for i2, k in enumerate(keypoints) if i2 in target_indices and k[2] >= score_thr] 185 | if len(confidences) < (len(target_indices)): 186 | # Show bbox in red if doesn't meet the criteria 187 | bbox = det["bbox"] 188 | cv2.rectangle(frame_img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), thickness=2) 189 | 190 | left_ankle, right_ankle = keypoints[self.kp_indice_foot[0]], keypoints[self.kp_indice_foot[1]] 191 | if float(left_ankle[2]) >= score_thr: 192 | color = (0, 255, 0) 193 | else: 194 | #print(f"Low confidence on KP[15]: {float(fp1[2])}") 195 | color = (0, 0, 255) 196 | cv2.circle(frame_img, (int(left_ankle[0]), int(left_ankle[1])), 5, color, 3) 197 | 198 | if float(right_ankle[2]) >= score_thr: 199 | color = (0, 255, 0) 200 | else: 201 | #print(f"Low confidence on KP[16]: {float(fp2[2])}") 202 | color = (0, 0, 255) 203 | cv2.circle(frame_img, (int(right_ankle[0]), int(right_ankle[1])), 5, color, 3) 204 | frame_img = cv2.resize(frame_img, (1280, 960)) 205 | video_wtr.write(frame_img) 206 | video_wtr.release() 207 | print(f"Saved video file: {output_mp4}\n") 208 | 209 | def draw_keypoints(self, frame_img, frame_id, out_file="kp_img.jpg"): 210 | def draw_line(img, s1, s2, bbox): 211 | color = (255, 0, 0) # Blue 212 | cv2.line(img, (int(s1[0]), int(s1[1])), 213 | (int(s2[0]), int(s2[1])), color, thickness=2) 214 | 215 | def draw_dot(img, src, bbox): 216 | color = (0, 255, 0) # Green 217 | cv2.circle(img, (int(src[0]), int(src[1])), 5, color, 2) 218 | 219 | frame_id = str(frame_id) 220 | if not frame_id in self.keypoints: 221 | print(f"There's no record asssiate with frame {frame_id} in the keypoint data.") 222 | return 223 | 224 | # Read frame image file 225 | if os.path.isfile(frame_img): 226 | img = cv2.imread(frame_img) 227 | else: 228 | print(f"There's no such image file {frame_img}.") 229 | return 230 | 231 | detections = self.keypoints[str(frame_id)] 232 | for det in detections: 233 | keypoints = det["keypoints"] 234 | bbox = det["bbox"] 235 | 236 | # draw lines 237 | # 0 to 1, 2 238 | draw_line(img, keypoints[0], keypoints[1], bbox) 239 | draw_line(img, keypoints[0], keypoints[2], bbox) 240 | # 1 to 2, 3 241 | draw_line(img, keypoints[1], keypoints[2], bbox) 242 | draw_line(img, keypoints[1], keypoints[3], bbox) 243 | # 2 to 4 244 | draw_line(img, keypoints[2], keypoints[4], bbox) 245 | # 3 to 5 246 | draw_line(img, keypoints[3], keypoints[5], bbox) 247 | # 4 to 6 248 | draw_line(img, keypoints[4], keypoints[6], bbox) 249 | # 5 to 6, 7, 11 250 | draw_line(img, keypoints[5], keypoints[6], bbox) 251 | draw_line(img, keypoints[5], keypoints[7], bbox) 252 | draw_line(img, keypoints[5], keypoints[11], bbox) 253 | # 6 to 8, 12 254 | draw_line(img, keypoints[6], keypoints[8], bbox) 255 | draw_line(img, keypoints[6], keypoints[12], bbox) 256 | # 7 to 9 257 | draw_line(img, keypoints[7], keypoints[9], bbox) 258 | # 8 to 10 259 | draw_line(img, keypoints[8], keypoints[10], bbox) 260 | # 11 to 12, 13 261 | draw_line(img, keypoints[11], keypoints[12], bbox) 262 | draw_line(img, keypoints[11], keypoints[13], bbox) 263 | # 12 to 14 264 | draw_line(img, keypoints[12], keypoints[14], bbox) 265 | # 13 to 15 266 | draw_line(img, keypoints[13], keypoints[15], bbox) 267 | # 14 to 16 268 | draw_line(img, keypoints[14], keypoints[16], bbox) 269 | 270 | # Draw dots 271 | for kp in keypoints: 272 | draw_dot(img, (int(kp[0]), int(kp[1])), bbox) 273 | 274 | cv2.imwrite(out_file, img) 275 | print(f"Saved keypoint file: {out_file}") 276 | -------------------------------------------------------------------------------- /tracking/config/scene_2_camera_id_file.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "scene_name": "scene_001", 4 | "camera_ids": [ 5 | 1, 6 | 2, 7 | 3, 8 | 4, 9 | 5, 10 | 6, 11 | 7, 12 | 8, 13 | 9, 14 | 10 15 | ] 16 | }, 17 | { 18 | "scene_name": "scene_041", 19 | "camera_ids": [ 20 | 361, 21 | 362, 22 | 363, 23 | 364, 24 | 365, 25 | 366, 26 | 367, 27 | 368, 28 | 369, 29 | 370 30 | ] 31 | }, 32 | { 33 | "scene_name": "scene_042", 34 | "camera_ids": [ 35 | 371, 36 | 372, 37 | 373, 38 | 374, 39 | 375, 40 | 376, 41 | 377, 42 | 378, 43 | 379 44 | ] 45 | }, 46 | { 47 | "scene_name": "scene_043", 48 | "camera_ids": [ 49 | 380, 50 | 381, 51 | 382, 52 | 383, 53 | 384, 54 | 385, 55 | 386, 56 | 387, 57 | 388, 58 | 389 59 | ] 60 | }, 61 | { 62 | "scene_name": "scene_044", 63 | "camera_ids": [ 64 | 390, 65 | 391, 66 | 392, 67 | 393, 68 | 394, 69 | 395, 70 | 396 71 | ] 72 | }, 73 | { 74 | "scene_name": "scene_045", 75 | "camera_ids": [ 76 | 397, 77 | 398, 78 | 399, 79 | 400, 80 | 401, 81 | 402, 82 | 403, 83 | 404 84 | ] 85 | }, 86 | { 87 | "scene_name": "scene_046", 88 | "camera_ids": [ 89 | 405, 90 | 406, 91 | 407, 92 | 408, 93 | 409, 94 | 410, 95 | 411 96 | ] 97 | }, 98 | { 99 | "scene_name": "scene_047", 100 | "camera_ids": [ 101 | 412, 102 | 413, 103 | 414, 104 | 415, 105 | 416, 106 | 417, 107 | 418, 108 | 419, 109 | 420, 110 | 421 111 | ] 112 | }, 113 | { 114 | "scene_name": "scene_048", 115 | "camera_ids": [ 116 | 422, 117 | 423, 118 | 424, 119 | 425, 120 | 426, 121 | 427, 122 | 428, 123 | 429 124 | ] 125 | }, 126 | { 127 | "scene_name": "scene_049", 128 | "camera_ids": [ 129 | 430, 130 | 431, 131 | 432, 132 | 433, 133 | 434, 134 | 435, 135 | 436, 136 | 437, 137 | 438, 138 | 439 139 | ] 140 | }, 141 | { 142 | "scene_name": "scene_050", 143 | "camera_ids": [ 144 | 440, 145 | 441, 146 | 442, 147 | 443, 148 | 444, 149 | 445, 150 | 446, 151 | 447 152 | ] 153 | }, 154 | { 155 | "scene_name": "scene_051", 156 | "camera_ids": [ 157 | 448, 158 | 449, 159 | 450, 160 | 451, 161 | 452, 162 | 453, 163 | 454 164 | ] 165 | }, 166 | { 167 | "scene_name": "scene_052", 168 | "camera_ids": [ 169 | 455, 170 | 456, 171 | 457, 172 | 458, 173 | 459, 174 | 460, 175 | 461, 176 | 462 177 | ] 178 | }, 179 | { 180 | "scene_name": "scene_053", 181 | "camera_ids": [ 182 | 463, 183 | 464, 184 | 465, 185 | 466, 186 | 467, 187 | 468, 188 | 469, 189 | 470, 190 | 471 191 | ] 192 | }, 193 | { 194 | "scene_name": "scene_054", 195 | "camera_ids": [ 196 | 472, 197 | 473, 198 | 474, 199 | 475, 200 | 476, 201 | 477, 202 | 478, 203 | 479 204 | ] 205 | }, 206 | { 207 | "scene_name": "scene_055", 208 | "camera_ids": [ 209 | 480, 210 | 481, 211 | 482, 212 | 483, 213 | 484, 214 | 485, 215 | 486 216 | ] 217 | }, 218 | { 219 | "scene_name": "scene_056", 220 | "camera_ids": [ 221 | 487, 222 | 488, 223 | 489, 224 | 490, 225 | 491, 226 | 492, 227 | 493, 228 | 494, 229 | 495 230 | ] 231 | }, 232 | { 233 | "scene_name": "scene_057", 234 | "camera_ids": [ 235 | 496, 236 | 497, 237 | 498, 238 | 499, 239 | 500, 240 | 501, 241 | 502, 242 | 503, 243 | 504, 244 | 505 245 | ] 246 | }, 247 | { 248 | "scene_name": "scene_058", 249 | "camera_ids": [ 250 | 506, 251 | 507, 252 | 508, 253 | 509, 254 | 510, 255 | 511, 256 | 512, 257 | 513, 258 | 514 259 | ] 260 | }, 261 | { 262 | "scene_name": "scene_059", 263 | "camera_ids": [ 264 | 515, 265 | 516, 266 | 517, 267 | 518, 268 | 519, 269 | 520, 270 | 521, 271 | 522, 272 | 523, 273 | 524 274 | ] 275 | }, 276 | { 277 | "scene_name": "scene_060", 278 | "camera_ids": [ 279 | 525, 280 | 526, 281 | 527, 282 | 528, 283 | 529, 284 | 530, 285 | 531, 286 | 532, 287 | 533, 288 | 534 289 | ] 290 | }, 291 | { 292 | "scene_name": "scene_061", 293 | "camera_ids": [ 294 | 535, 295 | 536, 296 | 537, 297 | 538, 298 | 539, 299 | 540, 300 | 541, 301 | 542, 302 | 543, 303 | 544 304 | ] 305 | }, 306 | { 307 | "scene_name": "scene_062", 308 | "camera_ids": [ 309 | 545, 310 | 546, 311 | 547, 312 | 548, 313 | 549, 314 | 550, 315 | 551, 316 | 552, 317 | 553, 318 | 554 319 | ] 320 | }, 321 | { 322 | "scene_name": "scene_063", 323 | "camera_ids": [ 324 | 555, 325 | 556, 326 | 557, 327 | 558, 328 | 559, 329 | 560, 330 | 561, 331 | 562, 332 | 563, 333 | 564 334 | ] 335 | }, 336 | { 337 | "scene_name": "scene_064", 338 | "camera_ids": [ 339 | 565, 340 | 566, 341 | 567, 342 | 568, 343 | 569, 344 | 570, 345 | 571, 346 | 572, 347 | 573, 348 | 574 349 | ] 350 | }, 351 | { 352 | "scene_name": "scene_065", 353 | "camera_ids": [ 354 | 575, 355 | 576, 356 | 577, 357 | 578, 358 | 579, 359 | 580, 360 | 581, 361 | 582, 362 | 583, 363 | 584 364 | ] 365 | }, 366 | { 367 | "scene_name": "scene_066", 368 | "camera_ids": [ 369 | 585, 370 | 586, 371 | 587, 372 | 588, 373 | 589, 374 | 590, 375 | 591, 376 | 592, 377 | 593, 378 | 594 379 | ] 380 | }, 381 | { 382 | "scene_name": "scene_067", 383 | "camera_ids": [ 384 | 595, 385 | 596, 386 | 597, 387 | 598, 388 | 599, 389 | 600, 390 | 601, 391 | 602, 392 | 603, 393 | 604 394 | ] 395 | }, 396 | { 397 | "scene_name": "scene_068", 398 | "camera_ids": [ 399 | 605, 400 | 606, 401 | 607, 402 | 608, 403 | 609, 404 | 610, 405 | 611, 406 | 612, 407 | 613, 408 | 614 409 | ] 410 | }, 411 | { 412 | "scene_name": "scene_069", 413 | "camera_ids": [ 414 | 615, 415 | 616, 416 | 617, 417 | 618, 418 | 619, 419 | 620, 420 | 621, 421 | 622, 422 | 623, 423 | 624 424 | ] 425 | }, 426 | { 427 | "scene_name": "scene_070", 428 | "camera_ids": [ 429 | 625, 430 | 626, 431 | 627, 432 | 628, 433 | 629, 434 | 630, 435 | 631, 436 | 632, 437 | 633, 438 | 634 439 | ] 440 | }, 441 | { 442 | "scene_name": "scene_071", 443 | "camera_ids": [ 444 | 635, 445 | 636, 446 | 637, 447 | 638, 448 | 639, 449 | 640, 450 | 641, 451 | 642, 452 | 643, 453 | 644, 454 | 645, 455 | 646, 456 | 647, 457 | 648, 458 | 650 459 | ] 460 | }, 461 | { 462 | "scene_name": "scene_072", 463 | "camera_ids": [ 464 | 651, 465 | 652, 466 | 653, 467 | 654, 468 | 655, 469 | 656, 470 | 657, 471 | 658, 472 | 659, 473 | 660, 474 | 661, 475 | 662, 476 | 663, 477 | 664, 478 | 665, 479 | 666 480 | ] 481 | }, 482 | { 483 | "scene_name": "scene_073", 484 | "camera_ids": [ 485 | 667, 486 | 668, 487 | 669, 488 | 670, 489 | 671, 490 | 672, 491 | 673, 492 | 674, 493 | 675, 494 | 676, 495 | 677, 496 | 678, 497 | 679, 498 | 680, 499 | 681, 500 | 682 501 | ] 502 | }, 503 | { 504 | "scene_name": "scene_074", 505 | "camera_ids": [ 506 | 683, 507 | 684, 508 | 685, 509 | 686, 510 | 687, 511 | 688, 512 | 689, 513 | 690, 514 | 691, 515 | 692, 516 | 693, 517 | 694, 518 | 695, 519 | 696, 520 | 697, 521 | 698 522 | ] 523 | }, 524 | { 525 | "scene_name": "scene_075", 526 | "camera_ids": [ 527 | 699, 528 | 700, 529 | 701, 530 | 702, 531 | 703, 532 | 704, 533 | 705, 534 | 706, 535 | 707, 536 | 708, 537 | 709, 538 | 710, 539 | 711, 540 | 712, 541 | 713, 542 | 714 543 | ] 544 | }, 545 | { 546 | "scene_name": "scene_076", 547 | "camera_ids": [ 548 | 715, 549 | 716, 550 | 717, 551 | 718, 552 | 719, 553 | 720, 554 | 721, 555 | 722, 556 | 723, 557 | 724, 558 | 725, 559 | 726, 560 | 727, 561 | 728, 562 | 729, 563 | 730 564 | ] 565 | }, 566 | { 567 | "scene_name": "scene_077", 568 | "camera_ids": [ 569 | 731, 570 | 732, 571 | 733, 572 | 734, 573 | 735, 574 | 736, 575 | 737, 576 | 738, 577 | 739, 578 | 740, 579 | 741, 580 | 742, 581 | 743, 582 | 744, 583 | 745, 584 | 746 585 | ] 586 | }, 587 | { 588 | "scene_name": "scene_078", 589 | "camera_ids": [ 590 | 747, 591 | 748, 592 | 749, 593 | 750, 594 | 751, 595 | 752, 596 | 753, 597 | 754, 598 | 755, 599 | 756, 600 | 757, 601 | 758, 602 | 759, 603 | 760, 604 | 761, 605 | 762 606 | ] 607 | }, 608 | { 609 | "scene_name": "scene_079", 610 | "camera_ids": [ 611 | 763, 612 | 764, 613 | 765, 614 | 766, 615 | 767, 616 | 768, 617 | 769, 618 | 770, 619 | 771, 620 | 772, 621 | 773, 622 | 774, 623 | 775, 624 | 776, 625 | 777, 626 | 778 627 | ] 628 | }, 629 | { 630 | "scene_name": "scene_080", 631 | "camera_ids": [ 632 | 779, 633 | 780, 634 | 781, 635 | 782, 636 | 783, 637 | 784, 638 | 785, 639 | 786, 640 | 787, 641 | 788, 642 | 789, 643 | 790, 644 | 791, 645 | 792, 646 | 793, 647 | 794 648 | ] 649 | }, 650 | { 651 | "scene_name": "scene_081", 652 | "camera_ids": [ 653 | 795, 654 | 796, 655 | 797, 656 | 798, 657 | 799, 658 | 800, 659 | 801, 660 | 802, 661 | 803, 662 | 804, 663 | 805, 664 | 806, 665 | 807, 666 | 808, 667 | 809, 668 | 810 669 | ] 670 | }, 671 | { 672 | "scene_name": "scene_082", 673 | "camera_ids": [ 674 | 811, 675 | 812, 676 | 813, 677 | 814, 678 | 815, 679 | 816, 680 | 817, 681 | 818, 682 | 819, 683 | 820, 684 | 821, 685 | 822, 686 | 823, 687 | 824, 688 | 825, 689 | 826 690 | ] 691 | }, 692 | { 693 | "scene_name": "scene_083", 694 | "camera_ids": [ 695 | 827, 696 | 828, 697 | 829, 698 | 830, 699 | 831, 700 | 832, 701 | 833, 702 | 834, 703 | 835, 704 | 836, 705 | 837, 706 | 838, 707 | 839, 708 | 840, 709 | 841, 710 | 842 711 | ] 712 | }, 713 | { 714 | "scene_name": "scene_084", 715 | "camera_ids": [ 716 | 843, 717 | 844, 718 | 845, 719 | 846, 720 | 847, 721 | 848, 722 | 849, 723 | 850, 724 | 851, 725 | 852, 726 | 853, 727 | 854, 728 | 855, 729 | 856, 730 | 857, 731 | 858 732 | ] 733 | }, 734 | { 735 | "scene_name": "scene_085", 736 | "camera_ids": [ 737 | 859, 738 | 860, 739 | 861, 740 | 862, 741 | 863, 742 | 864, 743 | 865, 744 | 866, 745 | 867, 746 | 868, 747 | 869, 748 | 870, 749 | 871, 750 | 872, 751 | 873, 752 | 874 753 | ] 754 | }, 755 | { 756 | "scene_name": "scene_086", 757 | "camera_ids": [ 758 | 875, 759 | 876, 760 | 877, 761 | 878, 762 | 879, 763 | 880, 764 | 881, 765 | 882, 766 | 883, 767 | 884, 768 | 885, 769 | 886, 770 | 887, 771 | 888, 772 | 889, 773 | 890 774 | ] 775 | }, 776 | { 777 | "scene_name": "scene_087", 778 | "camera_ids": [ 779 | 891, 780 | 892, 781 | 893, 782 | 894, 783 | 895, 784 | 896, 785 | 897, 786 | 898, 787 | 899, 788 | 900, 789 | 901, 790 | 902, 791 | 903, 792 | 904, 793 | 905, 794 | 906 795 | ] 796 | }, 797 | { 798 | "scene_name": "scene_088", 799 | "camera_ids": [ 800 | 907, 801 | 908, 802 | 909, 803 | 910, 804 | 911, 805 | 912, 806 | 913, 807 | 914, 808 | 915, 809 | 916, 810 | 917, 811 | 918, 812 | 919, 813 | 920, 814 | 921, 815 | 922 816 | ] 817 | }, 818 | { 819 | "scene_name": "scene_089", 820 | "camera_ids": [ 821 | 923, 822 | 924, 823 | 925, 824 | 926, 825 | 927, 826 | 928, 827 | 929, 828 | 930, 829 | 931, 830 | 932, 831 | 933, 832 | 934, 833 | 935, 834 | 936, 835 | 937 836 | ] 837 | }, 838 | { 839 | "scene_name": "scene_090", 840 | "camera_ids": [ 841 | 938, 842 | 939, 843 | 940, 844 | 941, 845 | 942, 846 | 943, 847 | 944, 848 | 945, 849 | 946, 850 | 947, 851 | 948, 852 | 949, 853 | 950, 854 | 951, 855 | 952, 856 | 953 857 | ] 858 | } 859 | ] -------------------------------------------------------------------------------- /detector/aic24_get_detection.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import os 4 | import os.path as osp 5 | import time 6 | import cv2 7 | import torch 8 | import numpy as np 9 | import json 10 | 11 | from loguru import logger 12 | 13 | sys.path.append('.') 14 | 15 | from yolox.data.data_augment import preproc 16 | from yolox.exp import get_exp 17 | from yolox.utils import fuse_model, get_model_info, postprocess 18 | from yolox.utils.visualize import plot_tracking 19 | from tracker.bot_sort import BoTSORT 20 | from tracker.tracking_utils.timer import Timer 21 | 22 | 23 | IMAGE_EXT = [".jpg"] 24 | def make_parser(): 25 | parser = argparse.ArgumentParser("BoT-SORT Demo!") 26 | parser.add_argument("root_path", type=str, default=None) 27 | parser.add_argument("-s","--scene", default=None, type=str) 28 | #parser.add_argument("demo", default="image", help="demo type, eg. image, video and webcam") 29 | parser.add_argument("-expn", "--experiment-name", type=str, default=None) 30 | parser.add_argument("-n", "--name", type=str, default=None, help="model name") 31 | parser.add_argument("--path", default="", help="path to images or video") 32 | parser.add_argument("--camid", type=int, default=0, help="webcam demo camera id") 33 | parser.add_argument("--save_result", action="store_true",help="whether to save the inference result of image/video") 34 | parser.add_argument("-f", "--exp_file", default="yolox/exps/example/mot/yolox_x_mix_det.py", type=str, help="pls input your expriment description file") 35 | parser.add_argument("-c", "--ckpt", default="bytetrack_x_mot17.pth.tar", type=str, help="ckpt for eval") 36 | parser.add_argument("--device", default="gpu", type=str, help="device to run our model, can either be cpu or gpu") 37 | parser.add_argument("--conf", default=None, type=float, help="test conf") 38 | parser.add_argument("--nms", default=None, type=float, help="test nms threshold") 39 | parser.add_argument("--tsize", default=None, type=int, help="test img size") 40 | parser.add_argument("--fps", default=30, type=int, help="frame rate (fps)") 41 | parser.add_argument("--fp16", dest="fp16", default=False, action="store_true",help="Adopting mix precision evaluating.") 42 | parser.add_argument("--fuse", dest="fuse", default=False, action="store_true", help="Fuse conv and bn for testing.") 43 | parser.add_argument("--trt", dest="trt", default=False, action="store_true", help="Using TensorRT model for testing.") 44 | 45 | # tracking args 46 | parser.add_argument("--track_high_thresh", type=float, default=0.6, help="tracking confidence threshold") 47 | parser.add_argument("--track_low_thresh", default=0.1, type=float, help="lowest detection threshold") 48 | parser.add_argument("--new_track_thresh", default=0.7, type=float, help="new track thresh") 49 | parser.add_argument("--track_buffer", type=int, default=30, help="the frames for keep lost tracks") 50 | parser.add_argument("--match_thresh", type=float, default=0.8, help="matching threshold for tracking") 51 | parser.add_argument("--aspect_ratio_thresh", type=float, default=1.6, help="threshold for filtering out boxes of which aspect ratio are above the given value.") 52 | parser.add_argument('--min_box_area', type=float, default=10, help='filter out tiny boxes') 53 | parser.add_argument("--fuse-score", dest="fuse_score", default=False, action="store_true", help="fuse score and iou for association") 54 | 55 | # CMC 56 | parser.add_argument("--cmc-method", default="orb", type=str, help="cmc method: files (Vidstab GMC) | orb | ecc") 57 | 58 | # ReID 59 | parser.add_argument("--with-reid", dest="with_reid", default=False, action="store_true", help="test mot20.") 60 | parser.add_argument("--fast-reid-config", dest="fast_reid_config", default=r"fast_reid/configs/MOT17/sbs_S50.yml", type=str, help="reid config file path") 61 | parser.add_argument("--fast-reid-weights", dest="fast_reid_weights", default=r"pretrained/mot17_sbs_S50.pth", type=str,help="reid config file path") 62 | parser.add_argument('--proximity_thresh', type=float, default=0.5, help='threshold for rejecting low overlap reid matches') 63 | parser.add_argument('--appearance_thresh', type=float, default=0.25, help='threshold for rejecting low appearance similarity reid matches') 64 | return parser 65 | 66 | 67 | def get_image_list(path): 68 | image_names = [] 69 | for maindir, subdir, file_name_list in os.walk(path): 70 | for filename in file_name_list: 71 | apath = osp.join(maindir, filename) 72 | ext = osp.splitext(apath)[1] 73 | if ext in IMAGE_EXT: 74 | image_names.append(apath) 75 | return image_names 76 | 77 | 78 | def write_results(filename, results): 79 | save_format = '{frame},{id},{x1},{y1},{w},{h},{s},-1,-1,-1\n' 80 | with open(filename, 'w') as f: 81 | for frame_id, tlwhs, track_ids, scores in results: 82 | for tlwh, track_id, score in zip(tlwhs, track_ids, scores): 83 | if track_id < 0: 84 | continue 85 | x1, y1, w, h = tlwh 86 | line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1), s=round(score, 2)) 87 | f.write(line) 88 | logger.info('save results to {}'.format(filename)) 89 | 90 | 91 | class Predictor(object): 92 | def __init__( 93 | self, 94 | model, 95 | exp, 96 | trt_file=None, 97 | decoder=None, 98 | device=torch.device("cpu"), 99 | fp16=False 100 | ): 101 | self.model = model 102 | self.decoder = decoder 103 | self.num_classes = exp.num_classes 104 | self.confthre = exp.test_conf 105 | self.nmsthre = exp.nmsthre 106 | self.test_size = exp.test_size 107 | self.device = device 108 | self.fp16 = fp16 109 | if trt_file is not None: 110 | from torch2trt import TRTModule 111 | 112 | model_trt = TRTModule() 113 | model_trt.load_state_dict(torch.load(trt_file)) 114 | 115 | x = torch.ones((1, 3, exp.test_size[0], exp.test_size[1]), device=device) 116 | self.model(x) 117 | self.model = model_trt 118 | self.rgb_means = (0.485, 0.456, 0.406) 119 | self.std = (0.229, 0.224, 0.225) 120 | 121 | def inference(self, img, timer): 122 | img_info = {"id": 0} 123 | if isinstance(img, str): 124 | img_info["file_name"] = osp.basename(img) 125 | img = cv2.imread(img) 126 | else: 127 | img_info["file_name"] = None 128 | 129 | height, width = img.shape[:2] 130 | img_info["height"] = height 131 | img_info["width"] = width 132 | img_info["raw_img"] = img 133 | 134 | img, ratio = preproc(img, self.test_size, self.rgb_means, self.std) 135 | img_info["ratio"] = ratio 136 | img = torch.from_numpy(img).unsqueeze(0).float().to(self.device) 137 | if self.fp16: 138 | img = img.half() # to FP16 139 | 140 | with torch.no_grad(): 141 | timer.tic() 142 | outputs = self.model(img) 143 | if self.decoder is not None: 144 | outputs = self.decoder(outputs, dtype=outputs.type()) 145 | outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) 146 | return outputs, img_info 147 | 148 | 149 | def image_demo(predictor, vis_folder, current_time, args): 150 | 151 | root_path = args.root_path 152 | scene = args.scene 153 | input = osp.join(root_path, "Original", scene) 154 | cameras = [] 155 | for f in os.listdir(input): 156 | if os.path.isdir(os.path.join(input, f)): 157 | cameras.append(f) 158 | cameras = sorted(cameras) 159 | scale = min(800/1080,1440/1920) 160 | for cam in cameras: 161 | imgs = sorted(os.listdir(osp.join(input, cam, 'Frame'))) 162 | timer = Timer() 163 | output = osp.join(root_path,'Detection', '{}.txt'.format(osp.join(scene, cam))) 164 | outjson = osp.join(root_path,'Detection', '{}.json'.format(osp.join(scene, cam))) 165 | if not os.path.isdir(osp.join(root_path,'Detection',scene)): 166 | os.makedirs(osp.join(root_path,'Detection',scene)) 167 | u_num = 0 168 | ret_json = {} 169 | results = [] 170 | for frame_id, img_path in enumerate(imgs, 1): 171 | img_path = osp.join(input, cam, 'Frame',img_path) 172 | 173 | # Detect objects 174 | outputs, img_info = predictor.inference(img_path, timer) 175 | 176 | detections = [] 177 | if outputs[0] is not None: 178 | outputs = outputs[0].cpu().numpy() 179 | detections = outputs[:, :7] 180 | detections[:, :4] /= scale 181 | detections = detections[detections[:,4]>0.1] 182 | timer.toc() 183 | else: 184 | timer.toc() 185 | 186 | for det in detections: 187 | x1,y1,x2,y2,score,_,_ = det 188 | x1 = max(0,x1) 189 | y1 = max(0,y1) 190 | x2 = min(1920,x2) 191 | y2 = min(1080,y2) 192 | results.append([cam,frame_id,1,int(x1),int(y1),int(x2),int(y2),score]) 193 | det_json = {} 194 | det_json['Frame'] = frame_id 195 | det_json['ImgPath'] = img_path.replace(root_path + '/','') 196 | det_json['NpyPath'] = '' 197 | Coordinate = {'x1':int(x1), 'y1':int(y1), 'x2': int(x2), 'y2': int(y2)} 198 | det_json['Coordinate'] = Coordinate 199 | det_json['ClusterID'] = None 200 | det_json['OfflineID'] = None 201 | ret_json[str(u_num).zfill(8)] = det_json 202 | u_num += 1 203 | 204 | if frame_id % 1000 == 0: 205 | logger.info('Processing cam {} frame {} ({:.2f} fps)'.format(cam, frame_id, 1. / max(1e-5, timer.average_time))) 206 | 207 | with open(output,'a') as f: 208 | for cam,frame_id,cls,x1,y1,x2,y2,score in results: 209 | f.write('{},{},{},{},{},{},{},{}\n'.format(cam,frame_id,cls,x1,y1,x2,y2,score)) 210 | with open(outjson, 'a') as f: 211 | json.dump(ret_json, f, ensure_ascii=False) 212 | 213 | def imageflow_demo(predictor, vis_folder, current_time, args): 214 | cap = cv2.VideoCapture(args.path if args.demo == "video" else args.camid) 215 | width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float 216 | height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float 217 | fps = cap.get(cv2.CAP_PROP_FPS) 218 | timestamp = time.strftime("%Y_%m_%d_%H_%M_%S", current_time) 219 | save_folder = osp.join(vis_folder, timestamp) 220 | os.makedirs(save_folder, exist_ok=True) 221 | if args.demo == "video": 222 | save_path = osp.join(save_folder, args.path.split("/")[-1]) 223 | else: 224 | save_path = osp.join(save_folder, "camera.mp4") 225 | logger.info(f"video save_path is {save_path}") 226 | vid_writer = cv2.VideoWriter( 227 | save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)) 228 | ) 229 | tracker = BoTSORT(args, frame_rate=args.fps) 230 | timer = Timer() 231 | frame_id = 0 232 | results = [] 233 | while True: 234 | if frame_id % 20 == 0: 235 | logger.info('Processing frame {} ({:.2f} fps)'.format(frame_id, 1. / max(1e-5, timer.average_time))) 236 | ret_val, frame = cap.read() 237 | if ret_val: 238 | # Detect objects 239 | outputs, img_info = predictor.inference(frame, timer) 240 | scale = min(exp.test_size[0] / float(img_info['height'], ), exp.test_size[1] / float(img_info['width'])) 241 | 242 | if outputs[0] is not None: 243 | outputs = outputs[0].cpu().numpy() 244 | detections = outputs[:, :7] 245 | detections[:, :4] /= scale 246 | 247 | # Run tracker 248 | online_targets = tracker.update(detections, img_info["raw_img"]) 249 | 250 | online_tlwhs = [] 251 | online_ids = [] 252 | online_scores = [] 253 | for t in online_targets: 254 | tlwh = t.tlwh 255 | tid = t.track_id 256 | vertical = tlwh[2] / tlwh[3] > args.aspect_ratio_thresh 257 | if tlwh[2] * tlwh[3] > args.min_box_area and not vertical: 258 | online_tlwhs.append(tlwh) 259 | online_ids.append(tid) 260 | online_scores.append(t.score) 261 | results.append( 262 | f"{frame_id},{tid},{tlwh[0]:.2f},{tlwh[1]:.2f},{tlwh[2]:.2f},{tlwh[3]:.2f},{t.score:.2f},-1,-1,-1\n" 263 | ) 264 | timer.toc() 265 | online_im = plot_tracking( 266 | img_info['raw_img'], online_tlwhs, online_ids, frame_id=frame_id + 1, fps=1. / timer.average_time 267 | ) 268 | else: 269 | timer.toc() 270 | online_im = img_info['raw_img'] 271 | if args.save_result: 272 | vid_writer.write(online_im) 273 | ch = cv2.waitKey(1) 274 | if ch == 27 or ch == ord("q") or ch == ord("Q"): 275 | break 276 | else: 277 | break 278 | frame_id += 1 279 | 280 | if args.save_result: 281 | res_file = osp.join(vis_folder, f"{timestamp}.txt") 282 | with open(res_file, 'w') as f: 283 | f.writelines(results) 284 | logger.info(f"save results to {res_file}") 285 | 286 | 287 | def main(exp, args): 288 | if not args.experiment_name: 289 | args.experiment_name = exp.exp_name 290 | 291 | output_dir = osp.join(exp.output_dir, args.experiment_name) 292 | os.makedirs(output_dir, exist_ok=True) 293 | 294 | if args.save_result: 295 | vis_folder = osp.join(output_dir, "track_vis") 296 | os.makedirs(vis_folder, exist_ok=True) 297 | 298 | if args.trt: 299 | args.device = "gpu" 300 | args.device = torch.device("cuda" if args.device == "gpu" else "cpu") 301 | 302 | logger.info("Args: {}".format(args)) 303 | 304 | if args.conf is not None: 305 | exp.test_conf = args.conf 306 | if args.nms is not None: 307 | exp.nmsthre = args.nms 308 | if args.tsize is not None: 309 | exp.test_size = (args.tsize, args.tsize) 310 | 311 | model = exp.get_model().to(args.device) 312 | logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size))) 313 | model.eval() 314 | 315 | if not args.trt: 316 | if args.ckpt is None: 317 | ckpt_file = osp.join(output_dir, "best_ckpt.pth.tar") 318 | else: 319 | ckpt_file = args.ckpt 320 | logger.info("loading checkpoint") 321 | ckpt = torch.load(ckpt_file, map_location="cpu") 322 | # load the model state dict 323 | model.load_state_dict(ckpt["model"]) 324 | logger.info("loaded checkpoint done.") 325 | 326 | if args.fuse: 327 | logger.info("\tFusing model...") 328 | model = fuse_model(model) 329 | 330 | if args.fp16: 331 | model = model.half() # to FP16 332 | 333 | if args.trt: 334 | assert not args.fuse, "TensorRT model is not support model fusing!" 335 | trt_file = osp.join(output_dir, "model_trt.pth") 336 | assert osp.exists( 337 | trt_file 338 | ), "TensorRT model is not found!\n Run python3 tools/trt.py first!" 339 | model.head.decode_in_inference = False 340 | decoder = model.head.decode_outputs 341 | logger.info("Using TensorRT to inference") 342 | else: 343 | trt_file = None 344 | decoder = None 345 | 346 | predictor = Predictor(model, exp, trt_file, decoder, args.device, args.fp16) 347 | current_time = time.localtime() 348 | 349 | image_demo(predictor, None, current_time, args) 350 | 351 | 352 | if __name__ == "__main__": 353 | args = make_parser().parse_args() 354 | exp = get_exp(args.exp_file, args.name) 355 | 356 | args.ablation = False 357 | args.mot20 = not args.fuse_score 358 | 359 | main(exp, args) 360 | -------------------------------------------------------------------------------- /tracking/src/scpt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import sys 4 | from sklearn.cluster import DBSCAN 5 | from sklearn.metrics.pairwise import cosine_similarity 6 | from itertools import combinations, permutations, product, chain 7 | from scipy.cluster.hierarchy import dendrogram, linkage, fcluster 8 | from scipy.spatial.distance import squareform 9 | from scipy.interpolate import RegularGridInterpolator 10 | from collections import Counter 11 | 12 | 13 | def create_centrality_matrix(clusters, similarity_matrix,frames,**kwargs): 14 | # translate the similarity matrix between each node into the centrality matrix between each cluster 15 | remove_noise_cluster = kwargs.get('remove_noise_cluster', True) 16 | epsilon = kwargs.get('epsilon', 0.3) 17 | 18 | unique_clusters = sorted(list(set(clusters))) 19 | if remove_noise_cluster: 20 | if -1 in unique_clusters: 21 | unique_clusters.remove(-1) 22 | 23 | centrality_matrix = np.ones((len(unique_clusters),len(unique_clusters)))*-1 24 | np.fill_diagonal(centrality_matrix, 0) 25 | 26 | cluster_frames_dict = {cluster:[] for cluster in unique_clusters} 27 | if remove_noise_cluster: 28 | [cluster_frames_dict[cluster].append(frame) for frame,cluster in zip(frames,clusters) if cluster != -1] 29 | else: 30 | [cluster_frames_dict[cluster].append(frame) for frame,cluster in zip(frames,clusters)] 31 | 32 | for i in range(len(unique_clusters)): 33 | cluster1 = unique_clusters[i] 34 | cluster1_frames = cluster_frames_dict[cluster1] 35 | cluster1_indices = [k for k,cluster in enumerate(clusters) if cluster ==cluster1] #indices of similarity_matrix 36 | for j in range(i+1,len(unique_clusters)): 37 | cluster2 = unique_clusters[j] 38 | cluster2_frames = cluster_frames_dict[cluster2] 39 | common_frames = set(cluster1_frames).intersection(set(cluster2_frames)) 40 | if len(common_frames) > 0: continue 41 | cluster2_indices = [k for k,cluster in enumerate(clusters) if cluster ==cluster2] 42 | similarities = similarity_matrix[np.ix_(cluster1_indices, cluster2_indices)] 43 | centrality = np.sum(similarities[similarities > (1 - epsilon)]) 44 | centrality_matrix[i,j] = centrality 45 | centrality_matrix[j,i] = centrality 46 | return centrality_matrix 47 | 48 | def associate_cluster(clusters,centrality_matrix,**kwargs): 49 | # perform hierarchical clustering that targets clusters. 50 | epsilon = kwargs.get('epsilon', 0.3) 51 | remove_noise_cluster = kwargs.get('remove_noise_cluster', True) 52 | cost_function = kwargs.get('cost_function', 1) 53 | minimize = kwargs.get("minimize",True) 54 | """ 55 | cost_function:1 ⇒ single linkage like 56 | cost_function:2 ⇒ average linkage like 57 | """ 58 | np.fill_diagonal(centrality_matrix, 0) 59 | clusters = np.array(clusters) 60 | unique_clusters = np.sort(np.unique(clusters)) 61 | if remove_noise_cluster: 62 | if -1 in unique_clusters: 63 | unique_clusters = unique_clusters[unique_clusters != -1] 64 | 65 | if cost_function == 1: 66 | pass 67 | elif cost_function == 2: 68 | count = Counter(clusters) 69 | if remove_noise_cluster: 70 | if -1 in count.keys(): 71 | del count[-1] 72 | centrality = np.max(centrality_matrix) 73 | 74 | th = 1 - epsilon 75 | while centrality > th: 76 | if cost_function == 1: 77 | max_index = np.argmax(centrality_matrix) 78 | elif cost_function == 2: 79 | len_element_matrix = np.outer(list(count.values()),list(count.values())) 80 | averaged_centrality_matrix = np.multiply(centrality_matrix,1/len_element_matrix) 81 | np.fill_diagonal(averaged_centrality_matrix, 0) 82 | max_index = np.argmax(averaged_centrality_matrix) 83 | 84 | cluster1_index, cluster2_index = np.unravel_index(max_index, centrality_matrix.shape) 85 | cluster1 = unique_clusters[cluster1_index] 86 | cluster2 = unique_clusters[cluster2_index] 87 | if cost_function == 1 or cost_function == 3: 88 | centrality = centrality_matrix[cluster1_index, cluster2_index] 89 | elif cost_function == 2: 90 | centrality = averaged_centrality_matrix[cluster1_index, cluster2_index] 91 | 92 | if centrality > th: 93 | target_row = centrality_matrix[[cluster1_index,cluster2_index],:] 94 | sum_row = np.sum(target_row,axis=0) 95 | if minimize: 96 | mask = np.min(target_row, axis=0) 97 | sum_row = np.where(mask < 0, -1, sum_row) 98 | centrality_matrix[:, cluster1_index] = sum_row 99 | centrality_matrix[cluster1_index,:] = sum_row 100 | 101 | next_indices = np.arange(len(unique_clusters)) 102 | next_indices = next_indices[next_indices != cluster2_index] 103 | centrality_matrix = centrality_matrix[np.ix_(next_indices,next_indices)] 104 | np.fill_diagonal(centrality_matrix, 0) 105 | clusters = np.where(clusters == cluster2, cluster1, clusters) 106 | unique_clusters = unique_clusters[unique_clusters != cluster2] 107 | 108 | if cost_function == 2: 109 | count[cluster1] += count[cluster2] 110 | del count[cluster2] 111 | else: 112 | break 113 | return clusters 114 | 115 | 116 | def get_initial_index(distance_matrix,overlap_indices_list): 117 | # determines the initial index for the assignment problem. 118 | distances = [] 119 | for overlap_indices in overlap_indices_list: 120 | min_distance = 2 121 | for index1,index2 in combinations(overlap_indices,2): # 122 | distance = distance_matrix[index1,index2] 123 | min_distance = distance if distance < min_distance else min_distance 124 | distances.append(min_distance) 125 | max_index = np.argmax(distances) 126 | return max_index 127 | 128 | 129 | def fill_none(lst): 130 | # fill "None" to the missing value in sequential number list 131 | used_nums = [num for num in lst if num is not None] 132 | unused_nums = [num for num in range(len(lst)) if num not in used_nums] 133 | for i in range(len(lst)): 134 | if lst[i] is None: 135 | lst[i] = unused_nums.pop(0) 136 | return lst 137 | 138 | def get_candidates_indices_list(similarity_matrix,subcluster_indices_list,overlap_indices_list,epsilon,**kwargs): 139 | # get candidates of the assignment problem 140 | num_candidates = kwargs.get('num_candidates', 10) 141 | 142 | if len(overlap_indices_list) < num_candidates: 143 | candidates_indices_list = overlap_indices_list 144 | else: 145 | np.fill_diagonal(similarity_matrix, 0) 146 | flatten_subcluster_indices = list(chain.from_iterable(subcluster_indices_list)) 147 | tmp_similarity_matrix = similarity_matrix[flatten_subcluster_indices] 148 | 149 | max_similarities =np.max(tmp_similarity_matrix,axis=0) 150 | neighbor_indices = np.where(max_similarities > (1-epsilon))[0] 151 | sorted_indices = np.argsort(max_similarities[neighbor_indices])[::-1] 152 | neighbor_indices = neighbor_indices[sorted_indices] 153 | 154 | if len(neighbor_indices) > num_candidates: 155 | neighbor_indices = neighbor_indices[:num_candidates] 156 | neighbor_indices = neighbor_indices.tolist() 157 | 158 | candidates_indices_list = [] 159 | for neighbor_index in neighbor_indices: 160 | for overlap_indices in overlap_indices_list: 161 | if neighbor_index not in overlap_indices: continue 162 | candidates_indices_list.append(overlap_indices) 163 | for overlap_index in overlap_indices: 164 | try: 165 | neighbor_indices.remove(overlap_index) 166 | except: 167 | pass 168 | return candidates_indices_list 169 | 170 | def agglomerative_clustering(distance_matrix,**kwargs): 171 | # perform agglomerative hierarchical clustering 172 | epsilon = kwargs.get('epsilon', 0.3) 173 | metric = kwargs.get('metric','cosine') 174 | np.fill_diagonal(distance_matrix, 0) 175 | linked = linkage(squareform(distance_matrix), method='single', metric=metric) 176 | clusters = list(fcluster(linked, epsilon, criterion='distance')) # min(clusters)=1 177 | return clusters 178 | 179 | def bipartite_matching(new_key,centrality_dict,centrality_matrix,overlap_indices,**kwargs): 180 | # bipartite matching between unclustered overlap nodes and clustered overlap nodes 181 | epsilon = kwargs.get('epsilon', 0.3) 182 | 183 | sum_centrality = 0 184 | subcluster_indices = [None]*len(overlap_indices) 185 | th = 1-epsilon 186 | while np.max(centrality_matrix) > th: 187 | max_index = np.argmax(centrality_matrix) 188 | row_index, col_index = np.unravel_index(max_index, centrality_matrix.shape) 189 | centrality = centrality_matrix[row_index, col_index] 190 | sum_centrality += centrality 191 | subcluster_indices[row_index] = col_index 192 | centrality_matrix[row_index,:]=0 193 | centrality_matrix[:,col_index]=0 194 | centrality_dict[new_key] = {"overlap_indices":overlap_indices,"indices":subcluster_indices,"centrality":sum_centrality} 195 | 196 | return centrality_dict 197 | 198 | 199 | def separate_into_subcluster(tmp_clusters, overlap_indices_list, distance_matrix,**kwargs): 200 | # overlap nodes are separated into subclusters 201 | epsilon = kwargs.get('epsilon', 0.3) 202 | matching_algo_th = kwargs.get('matching_algo_th', 0) 203 | debug = kwargs.get('debug', False) 204 | 205 | max_overlap = max([len(i) for i in overlap_indices_list]) #the number of overlap in the same frame 206 | initial_index = get_initial_index(distance_matrix,overlap_indices_list) #index of overlap_indices_list 207 | initial_node_indices = overlap_indices_list[initial_index] 208 | del overlap_indices_list[initial_index] 209 | 210 | subcluster_indices_list = [[] for _ in range(max_overlap)] 211 | [subcluster_indices_list[i].append(initial_node_index) for i,initial_node_index in enumerate(initial_node_indices)] 212 | 213 | similarity_matrix = 1-distance_matrix 214 | np.fill_diagonal(similarity_matrix, 0) 215 | 216 | # separte overlap nodes into several groups 217 | while len(overlap_indices_list) != 0: 218 | centrality_dict = {} 219 | max_centrality = 0 220 | 221 | candidates_indices_list = get_candidates_indices_list(similarity_matrix,subcluster_indices_list,overlap_indices_list,epsilon) 222 | for i,overlap_indices in enumerate(candidates_indices_list): 223 | centrality_matrix = np.zeros((len(overlap_indices),len(subcluster_indices_list))) #can not use create_centrality_matrix 224 | for j, overlap_index in enumerate(overlap_indices): 225 | tmp_similarity_matrix = similarity_matrix[overlap_index] 226 | for k, subcluster_indices in enumerate(subcluster_indices_list): 227 | similarities = tmp_similarity_matrix[subcluster_indices] 228 | centrality = np.sum(similarities[similarities > (1 - epsilon)]) 229 | centrality_matrix[j,k] = centrality 230 | 231 | centrality_dict = bipartite_matching(i,centrality_dict,centrality_matrix,overlap_indices,epsilon=epsilon) 232 | 233 | max_centrality = 0 if centrality_dict == {} else np.max([value["centrality"] for value in centrality_dict.values()]) 234 | 235 | if max_centrality == 0: 236 | max_index = get_initial_index(distance_matrix,overlap_indices_list) 237 | max_subcluster_indices = list(range(max_overlap)) 238 | overlap_indices = overlap_indices_list[max_index] 239 | else: 240 | max_index = [key for key,value in zip(centrality_dict,centrality_dict.values()) if value["centrality"]==max_centrality][0] 241 | max_subcluster_indices = list(centrality_dict[max_index]["indices"]) 242 | if None in max_subcluster_indices: 243 | max_subcluster_indices = fill_none(max_subcluster_indices) 244 | overlap_indices = centrality_dict[max_index]["overlap_indices"] 245 | [subcluster_indices_list[max_subcluster_index].append(overlap_index) for max_subcluster_index,overlap_index in zip(max_subcluster_indices,overlap_indices)] 246 | overlap_indices_list.remove(overlap_indices) 247 | 248 | # assign cluster ID 249 | for subcluster_indices in subcluster_indices_list: 250 | if len(subcluster_indices) == 1: 251 | tmp_clusters[subcluster_indices[0]] = np.max(tmp_clusters)+1 252 | else: 253 | sub_clusters = agglomerative_clustering(distance_matrix[np.ix_(subcluster_indices, subcluster_indices)],epsilon=epsilon) 254 | sub_clusters = [sub_cluster+max(tmp_clusters) for sub_cluster in sub_clusters] 255 | for sub_cluster,sub_cluster_index in zip(sub_clusters,subcluster_indices): 256 | tmp_clusters[sub_cluster_index] = sub_cluster 257 | return tmp_clusters 258 | 259 | def overlap_suppression_clustering(distance_matrix,frames,nonoverlap_indices,overlap_indices_list,**kwargs): #overlap_indices_list, 260 | epsilon = kwargs.get('epsilon', 0.3) 261 | debug = kwargs.get('debug', False) 262 | clusters = [-1]*len(frames) 263 | 264 | # clustering for non-overlapping nodes 265 | if nonoverlap_indices != []: 266 | if len(nonoverlap_indices) > 1: 267 | nonoverlap_clusters = agglomerative_clustering(distance_matrix[np.ix_(nonoverlap_indices,nonoverlap_indices)], epsilon=epsilon) 268 | else: 269 | nonoverlap_clusters = [0] 270 | for k,target_index in enumerate(nonoverlap_indices): 271 | clusters[target_index] = nonoverlap_clusters[k] 272 | 273 | # clustering for overlapping nodes 274 | clusters = separate_into_subcluster(clusters, overlap_indices_list, distance_matrix,epsilon=epsilon,debug=debug) 275 | 276 | similarity_matrix = 1 - distance_matrix 277 | centrality_matrix = create_centrality_matrix(clusters,similarity_matrix,frames,epsilon=epsilon) 278 | 279 | # merging for subcluster 280 | clusters = associate_cluster(clusters,centrality_matrix, epsilon=epsilon) 281 | 282 | return clusters 283 | 284 | def divide_overlap_or_nonoverlap(cluster_frames,cluster_indices): 285 | 286 | frame_indices_dict = {frame:[] for frame in sorted(list(set(cluster_frames)))} 287 | [frame_indices_dict[frame].append(index) for index,frame in zip(cluster_indices,cluster_frames)] 288 | overlap_indices_list = [indices for indices in frame_indices_dict.values() if len(indices) > 1] 289 | flattened_overlap_indices = sum(overlap_indices_list, []) 290 | nonoverlap_indices = [index for index in cluster_indices if index not in flattened_overlap_indices] 291 | 292 | return overlap_indices_list, nonoverlap_indices 293 | 294 | def reclustering_overlap_cluster(distance_matrix,tracking_dict,serials,clusters,**kwargs): 295 | epsilon = kwargs.get('epsilon', 0.3) 296 | debug = kwargs.get('debug', False) 297 | 298 | frames = [tracking_dict[serial]["Frame"] for serial in serials] 299 | 300 | cluster_frame_dict = {cluster:[] for cluster in set(clusters)} #20240418 add set() 301 | [cluster_frame_dict[cluster].append(frame) for cluster,frame in zip(clusters,frames)] 302 | cluster_indices_dict = {cluster:[] for cluster in set(clusters)} #20240418 add set() 303 | [cluster_indices_dict[cluster].append(i) for i,cluster in enumerate(clusters)] 304 | 305 | for cluster in cluster_frame_dict: 306 | cluster_frames = cluster_frame_dict[cluster] 307 | if len(list(set(cluster_frames))) == len(cluster_frames):continue 308 | cluster_indices = cluster_indices_dict[cluster] 309 | 310 | #divide overlap/nonoverlap 311 | overlap_indices_list, nonovelap_indices = divide_overlap_or_nonoverlap(cluster_frames,cluster_indices) 312 | 313 | tmp_clusters = overlap_suppression_clustering(distance_matrix,frames,nonovelap_indices,overlap_indices_list,epsilon=epsilon,debug=debug) 314 | 315 | max_cluster_id = np.max(clusters) 316 | for index,tmp_cluster in enumerate(tmp_clusters): 317 | if clusters[index] != cluster: continue 318 | clusters[index] = max_cluster_id + tmp_cluster + 1 319 | return clusters 320 | 321 | def create_similarity_matrix_scpt(serials, tracking_dict, epsilon): 322 | # create a similarity matrix from features 323 | for n,serial in enumerate(serials): 324 | feature = np.load(tracking_dict[serial]["NpyPath"]) 325 | if n==0: feature_stack = np.empty((0,len(feature.flatten()))) 326 | feature_stack = np.append(feature_stack , feature.reshape(1,-1) , axis=0) 327 | similarity_matrix = cosine_similarity(feature_stack) 328 | similarity_matrix = similarity_matrix.astype(np.float16) 329 | 330 | similarity_matrix = np.where(similarity_matrix < (1-epsilon),0,similarity_matrix) 331 | return similarity_matrix 332 | 333 | 334 | def tracking_by_clustering(tracking_dict,serials,**kwargs): 335 | min_samples = kwargs.get('min_samples', 4) 336 | epsilon = kwargs.get('epsilon_scpt', 0.3) 337 | overlap_suppression = kwargs.get('overlap_suppression', True) 338 | debug = kwargs.get('debug', False) 339 | clustering_method = kwargs.get('clustering_method', "agglomerative") 340 | 341 | if len(serials) ==1: 342 | clusters = [0] 343 | else: 344 | similarity_matrix = create_similarity_matrix_scpt(serials,tracking_dict,epsilon) 345 | 346 | np.fill_diagonal(similarity_matrix, 1) 347 | distance_matrix = 1 - similarity_matrix 348 | if clustering_method == "agglomerative": 349 | clusters = agglomerative_clustering(distance_matrix,epsion=epsilon) #min(clusters)=1 350 | 351 | elif clustering_method == "dbscan": 352 | dbscan = DBSCAN(eps=epsilon,min_samples=min_samples,metric="precomputed") 353 | clusters = dbscan.fit_predict(distance_matrix) 354 | coreindices = dbscan.core_sample_indices_ 355 | clusters = [cluster if cluster != -1 else -i for i,cluster in enumerate(clusters)] 356 | unique_clusters = list(set(clusters)) 357 | new_clusterid_dict = {key:i for i,key in enumerate(unique_clusters)} 358 | clusters = [new_clusterid_dict[old_cluster] for old_cluster in clusters] 359 | 360 | if overlap_suppression == True: 361 | clusters = reclustering_overlap_cluster(distance_matrix,tracking_dict,serials,clusters,epsilon=epsilon,debug=debug) 362 | 363 | unique_clusters = list(set(clusters)) 364 | new_clusterid_dict = {key:i for i,key in enumerate(unique_clusters)} 365 | clusters = [new_clusterid_dict[old_cluster] for old_cluster in clusters] 366 | 367 | return clusters 368 | 369 | def associate_cluster_between_period(tracking_dict,clusters,serials,past_serials,**kwargs): 370 | # associate clusters between adjacent time periods 371 | epsilon = kwargs.get('epsilon_scpt', 0.3) 372 | frames = [tracking_dict[serial]["Frame"] for serial in serials] 373 | past_frames = [tracking_dict[serial]["Frame"] for serial in past_serials] 374 | offline_ids = [tracking_dict[serial]["OfflineID"] for serial in past_serials] 375 | 376 | unique_offline_ids = list(set(offline_ids)) 377 | unique_clusters = list(set(clusters)) 378 | 379 | all_serials = past_serials + serials 380 | all_clusters = offline_ids + clusters 381 | all_unique_clusters = sorted(unique_offline_ids + unique_clusters) 382 | all_frames = past_frames + frames 383 | 384 | similarity_matrix = create_similarity_matrix_scpt(all_serials, tracking_dict,epsilon) 385 | 386 | centrality_matrix = create_centrality_matrix(all_clusters, similarity_matrix,all_frames,epsilon=epsilon) 387 | del similarity_matrix 388 | np.fill_diagonal(centrality_matrix, 0) 389 | 390 | all_clusters = associate_cluster(all_clusters,centrality_matrix,epsilon=epsilon) 391 | 392 | for serial,cluster in zip(all_serials,all_clusters): 393 | tracking_dict[serial]["OfflineID"] = int(cluster) 394 | return tracking_dict 395 | 396 | def get_overlap_coefficient(rectangle1, rectangle2): 397 | # meaure spatially overlap_coefficient 398 | overlap_width = min(rectangle1[2], rectangle2[2]) - max(rectangle1[0], rectangle2[0]) 399 | overlap_height = min(rectangle1[3], rectangle2[3]) - max(rectangle1[1], rectangle2[1]) 400 | overlap_area = max(overlap_width, 0) * max(overlap_height, 0) 401 | rectangle1_area = (rectangle1[2] - rectangle1[0]) * (rectangle1[3] - rectangle1[1]) 402 | rectangle2_area = (rectangle2[2] - rectangle2[0]) * (rectangle2[3] - rectangle2[1]) 403 | #iou = overlap_area / (rectangle1_area + rectangle2_area - overlap_area) 404 | overlap_coefficient = overlap_area / min(rectangle1_area,rectangle2_area) 405 | return overlap_coefficient 406 | 407 | def sequential_non_maximum_suppression(tracking_dict,**kwargs): 408 | #Sequential NMS is perfomed in this function. 409 | #Sequential NMS calculates the overlap coefficient both temporally and spatially. 410 | temporally_snms_th = kwargs.get('temporally_snms_th', 0.6) 411 | spatially_snms_th = kwargs.get('spatially_snms_th', 0.6) 412 | remove_noise_cluster = kwargs.get('remove_noise_cluster', True) 413 | merge_nonoverlap = kwargs.get('merge_nonoverlap', True) 414 | 415 | offline_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict.keys()] 416 | unique_offline_ids = sorted(list(set(offline_ids))) 417 | if remove_noise_cluster: 418 | if min(unique_offline_ids) == -1: 419 | unique_offline_ids.remove(-1) 420 | 421 | offline_id_serial_dict = {offline_id:[] for offline_id in unique_offline_ids} 422 | [offline_id_serial_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict.keys() if tracking_dict[serial]["OfflineID"] != -1] 423 | offline_id_frame_dict = {offline_id:[] for offline_id in unique_offline_ids} 424 | [offline_id_frame_dict[tracking_dict[serial]["OfflineID"]].append(tracking_dict[serial]["Frame"]) for serial in tracking_dict.keys() if tracking_dict[serial]["OfflineID"] != -1] 425 | 426 | for offline_id1, offline_id2 in combinations(unique_offline_ids,2): 427 | 428 | id1_frames = offline_id_frame_dict[offline_id1] 429 | id2_frames = offline_id_frame_dict[offline_id2] 430 | overlap_frames = set(id1_frames).intersection(set(id2_frames)) 431 | 432 | if len(id1_frames) < len(id2_frames): 433 | (offline_id1,offline_id2) = (offline_id2,offline_id1) 434 | (id1_frames,id2_frames) = (id2_frames,id1_frames) 435 | 436 | if max(len(overlap_frames)/len(id1_frames),len(overlap_frames)/len(id2_frames)) 1: 491 | weighted_cumsum = alpha*weighted_cumsum+(1-alpha)*np.array([delta_x[t-1],delta_y[t-1]]) 492 | if frame not in frames: 493 | continue 494 | current_position = interpolaterd_trajectory[t] 495 | past_position = interpolaterd_trajectory[t-1] 496 | pred_current_position = current_position + weighted_cumsum 497 | distance = np.sqrt(np.square(current_position[0] - pred_current_position[0])+np.square(current_position[1] - pred_current_position[1])) 498 | if distance > warp_th: 499 | break 500 | last_frame = frame 501 | if last_frame != max(frames): 502 | split_index = frames.index(last_frame) 503 | return split_index 504 | 505 | def separate_warp_tracklet(tracking_dict,**kwargs): 506 | # separate warp tracklets based on motion feature. 507 | remove_noise_cluster = kwargs.get('remove_noise_cluster', True) 508 | warp_th = kwargs.get('warp_th', 50) 509 | 510 | offline_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict.keys()] 511 | unique_offline_ids = sorted(list(set(offline_ids))) 512 | if remove_noise_cluster: 513 | if min(unique_offline_ids) == -1: 514 | unique_offline_ids.remove(-1) 515 | 516 | offline_id_serial_dict = {offline_id:[] for offline_id in unique_offline_ids} 517 | [offline_id_serial_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1] 518 | 519 | max_offline_id = max(unique_offline_ids) 520 | 521 | while len(unique_offline_ids) > 0: 522 | offline_id = unique_offline_ids.pop(0) 523 | serials = offline_id_serial_dict[offline_id] 524 | if len(serials) <= 2: 525 | continue 526 | frames = [tracking_dict[serial]["Frame"] for serial in serials] 527 | if len(frames) != len(set(frames)): 528 | print(f"offline_id{offline_id} contains overlap") 529 | continue 530 | frames, serials = zip(*sorted(zip(frames, serials))) #sort by frame 531 | pos_list = [tracking_dict[serial]["Coordinate"] for serial in serials] 532 | trajectory = [((pos["x1"]+pos["x2"])/2,pos["y2"]) for pos in pos_list] 533 | split_index = get_warp_index(frames,trajectory,warp_th=warp_th) 534 | 535 | if split_index != None: 536 | split_serials = serials[split_index:] 537 | max_offline_id += 1 538 | unique_offline_ids.append(max_offline_id) 539 | offline_id_serial_dict[max_offline_id] = split_serials 540 | for serial in split_serials: 541 | tracking_dict[serial]["OfflineID"] = max_offline_id 542 | return tracking_dict 543 | 544 | def exclude_short_tracklet(tracking_dict,**kwargs): 545 | # exclude tracklet that contains only a little serials from tracking_dict 546 | short_tracklet_th = kwargs.get('short_tracklet_th', 5) 547 | 548 | offline_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict] 549 | unique_offline_ids = sorted(list(set(offline_ids))) 550 | if min(unique_offline_ids) == -1: unique_offline_ids.remove(-1) 551 | 552 | offline_id_serial_dict = {offlineID:[] for offlineID in unique_offline_ids} #OnlineIDからserialを検索するDict 553 | [offline_id_serial_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1] 554 | 555 | for offline_id in unique_offline_ids: 556 | serials = offline_id_serial_dict[offline_id] 557 | if len(serials) <= short_tracklet_th: 558 | for serial in serials: 559 | tracking_dict[serial]["OfflineID"] = -1 560 | return tracking_dict 561 | 562 | def exclude_motionless_tracklet(tracking_dict,**kwargs): 563 | # exclude tracklet from tracking_dict 564 | stop_track_th = kwargs.get('stop_track_th', 25) 565 | 566 | offline_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict] 567 | unique_offline_ids = sorted(list(set(offline_ids))) 568 | if min(unique_offline_ids) == -1: unique_offline_ids.remove(-1) 569 | 570 | offline_id_serial_dict = {offlineID:[] for offlineID in unique_offline_ids} #OnlineIDからserialを検索するDict 571 | [offline_id_serial_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1] 572 | 573 | for offline_id in unique_offline_ids: 574 | serials = offline_id_serial_dict[offline_id] 575 | pos_list = [tracking_dict[serial]["Coordinate"] for serial in serials] 576 | x_pos_list = [(pos["x1"]+pos["x2"])/2 for pos in pos_list] 577 | y_pos_list = [pos["y2"] for pos in pos_list] 578 | x_min = np.min(x_pos_list) 579 | x_max = np.max(x_pos_list) 580 | y_min = np.min(y_pos_list) 581 | y_max = np.max(y_pos_list) 582 | if (x_max-x_min < stop_track_th) and (y_max-y_min < stop_track_th): 583 | for serial in serials: 584 | tracking_dict[serial]["OfflineID"] = -1 585 | 586 | return tracking_dict 587 | -------------------------------------------------------------------------------- /tracking/src/mcpt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import json 4 | import numpy as np 5 | from datetime import datetime 6 | from collections import Counter 7 | from sklearn.cluster import DBSCAN, AgglomerativeClustering 8 | from sklearn.metrics.pairwise import cosine_similarity 9 | from sklearn.metrics import pairwise_distances 10 | from scipy.interpolate import RegularGridInterpolator 11 | from itertools import combinations 12 | from scipy.stats import mode 13 | from scipy.spatial.distance import pdist, squareform 14 | 15 | from scpt import associate_cluster,agglomerative_clustering 16 | import pose 17 | 18 | """ 19 | Definitions for clustering to muilt-camera tracking. 20 | """ 21 | 22 | def get_max_value_of_dict(dictionary, key): 23 | # get max value of any key from nested dictionary 24 | max_value = float('-inf') 25 | for k, v in dictionary.items(): 26 | if isinstance(v, dict): 27 | max_value = max(max_value, get_max_value_of_dict(v, key)) 28 | elif k == key: 29 | max_value = max(max_value, v) 30 | return max_value 31 | 32 | def create_similarity_matrix_mcpt(representative_nodes,**kwargs): 33 | # create similarity matrix from representative feature 34 | short_track_th = kwargs.get('short_track_th', 0) 35 | representative_selection_method = kwargs.get("representative_selection_method","keypoint") 36 | keypoint_condition_th = kwargs.get("keypoint_condition_th",2) 37 | feature_stack = None 38 | for camera_id in representative_nodes: 39 | tmp_representative_nodes = representative_nodes[camera_id] 40 | for local_id in tmp_representative_nodes: 41 | value = tmp_representative_nodes[local_id] 42 | representative_node = value["representative_node"] 43 | serials = value["all_serials"] 44 | 45 | if len(serials) < short_track_th: 46 | continue 47 | if representative_selection_method == "keypoint": 48 | score = representative_node["score"] 49 | if score > keypoint_condition_th: 50 | continue 51 | 52 | feature = np.load(representative_node["npy_path"]) 53 | if feature_stack is None: 54 | feature_stack = np.empty((0, len(feature.flatten()))) 55 | feature_stack = np.append(feature_stack , feature.reshape(1, -1) , axis=0) 56 | similarity_matrix = cosine_similarity(feature_stack) 57 | similarity_matrix = similarity_matrix.astype(np.float16) 58 | return similarity_matrix 59 | 60 | def measure_intersect_area(rectangle1, rectangle2): 61 | # measure intersect area 62 | intersect_width = min(rectangle1[2], rectangle2[2]) - max(rectangle1[0], rectangle2[0]) 63 | intersect_height = min(rectangle1[3], rectangle2[3]) - max(rectangle1[1], rectangle2[1]) 64 | intersect_area = max(intersect_width, 0) * max(intersect_height, 0) 65 | return intersect_area 66 | 67 | def eval_keypoints(serial,other_serials,keypoints_results,**kwargs): 68 | # evaluate results of pose estimation 69 | """ 70 | condition = 1: All keypoints has high confidence 71 | condition = 2: half of keypoint has high confidence (left or right half of the body) 72 | condition = 3: part of the keypoint has high confidence in left or right half of the body 73 | condition = 4: almost keypoints has low confidence 74 | """ 75 | keypoint_th = kwargs.get("keypoint_th",0.7) 76 | 77 | kp = keypoints_results.get_keypoints(serial) 78 | if kp == None: 79 | condition, intersect_ratio, score,area = 4, 1 , 0, 0 80 | else: 81 | x1,y1,x2,y2,bbox_confidence = kp["bbox"] 82 | keypoints = kp["Keypoints"] 83 | area = (x2-x1)*(y2-y1) 84 | x_list, y_list, scores = zip(*keypoints) 85 | 86 | intersect_area = 0 87 | for other_serial in other_serials : 88 | other_kp =keypoints_results.get_keypoints(other_serial) 89 | if other_kp==None: continue 90 | x1_,y1_,x2_,y2_,bbox_confidence = other_kp["bbox"] 91 | tmp_intersect_area = measure_intersect_area([x1,y1,x2,y2],[x1_,y1_,x2_,y2_]) 92 | intersect_area = max(intersect_area,tmp_intersect_area) 93 | intersect_ratio = intersect_area/((x2-x1)*(y2-y1)) 94 | 95 | if np.min(scores) >= keypoint_th: 96 | score = np.mean(scores) 97 | condition = 1 98 | else: 99 | right_scores = [score for i,score in enumerate(scores) if i%2==0] 100 | left_scores = [score for i,score in enumerate(scores) if i%2==1] 101 | nose_score = right_scores.pop(0) 102 | min_right_scores = np.min(right_scores) 103 | min_left_scores = np.min(left_scores) 104 | target_scores = left_scores if min_left_scores > min_right_scores else right_scores 105 | min_score = np.min(target_scores) 106 | score = np.mean(target_scores) 107 | if min_score >= keypoint_th: 108 | condition = 2 109 | else: 110 | count = len([tmp_score for tmp_score in target_scores if tmp_score >= keypoint_th]) 111 | if count/len(target_scores) > 0.7: 112 | condition =3 113 | else: 114 | condition = 4 115 | return condition, intersect_ratio, score, area 116 | 117 | def find_high_confidence_keypoint_node(tracking_dict,serials,keypoints_results,frame_serials_dict,**kwargs): 118 | keypoint_th = kwargs.get("keypoint_th",0.7) 119 | 120 | conditions = [] 121 | intersects = [] 122 | image_scores = [] 123 | areas = [] 124 | 125 | for k,serial in enumerate(serials): 126 | frame = tracking_dict[serial]["Frame"] 127 | other_serials = frame_serials_dict[frame] 128 | other_serials.remove(serial) 129 | 130 | condition,intersect_ratio ,image_score,area = eval_keypoints(serial,other_serials,keypoints_results) 131 | conditions.append(condition) 132 | intersects.append(intersect_ratio) 133 | image_scores.append(image_score) 134 | areas.append(area) 135 | min_condition = np.min(conditions) 136 | index_area = np.array([(i,area) for i,(condition,area) in enumerate(zip(conditions,areas)) if condition == min_condition]) 137 | max_index = np.argmax(index_area[:,1]) 138 | index,max_area = index_area[max_index] 139 | 140 | serial = serials[int(index)] 141 | feature = np.load(tracking_dict[serial]["NpyPath"]) 142 | return serial, feature, int(min_condition) 143 | 144 | def decide_representative_nodes(tracking_results,out_dir,scene_id,**kwargs): 145 | # decide representative nodes from each tracklet 146 | epsilon = kwargs.get('epsilon_mcpt', 0.3) 147 | representative_selection_method = kwargs.get("representative_selection_method","centrality") 148 | short_track_th = kwargs.get("short_track_th",20) 149 | model = kwargs.get("model","mmpose_hrnet") 150 | keypoint_th = kwargs.get("keypoint_th",0.7) 151 | 152 | representative_nodes = {} 153 | for camera_id in tracking_results: 154 | representative_nodes[camera_id] = {} 155 | tracking_dict = tracking_results[camera_id] 156 | if representative_selection_method == "keypoint": 157 | keypoints_results = pose.PoseKeypoints(f"Pose/scene_{str(scene_id).zfill(3)}/camera_{str(camera_id).zfill(4)}/camera_{str(camera_id).zfill(4)}_out_keypoint.json") 158 | keypoints_results.assign_serial_from_tracking_dict(tracking_dict=tracking_dict) 159 | max_frame = get_max_value_of_dict(tracking_dict,"Frame") 160 | frame_serials_dict = {n+1:[] for n in range(max_frame)} 161 | [frame_serials_dict[tracking_dict[serial]["Frame"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1] 162 | 163 | # Get each clusters, we need to iterate tracking_dict to extract cluster-wise data 164 | local_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict] 165 | unique_local_ids = sorted(set(local_ids)) 166 | if -1 in unique_local_ids: 167 | unique_local_ids.remove(-1) 168 | local_id_serials_dict = {local_id:[] for local_id in unique_local_ids} 169 | [local_id_serials_dict[local_id].append(serial) for local_id,serial in zip(local_ids,tracking_dict) if local_id >= 0] 170 | 171 | # Get the representative node of each clusters 172 | for local_id in local_id_serials_dict: 173 | serials = local_id_serials_dict[local_id] 174 | if representative_selection_method == "centrality": 175 | serials, serial, feature = find_highest_centrality_node(tracking_dict, serials, epsilon=epsilon) 176 | if serial != None: 177 | representative_node = {"serial": serial, "npy_path": tracking_dict[serial]["NpyPath"]} 178 | elif representative_selection_method == "keypoint": 179 | serial, feature, score = find_high_confidence_keypoint_node(tracking_dict,serials,keypoints_results,frame_serials_dict,keypoint_th = keypoint_th) 180 | representative_node = {"serial": serial,"score":score, "npy_path": tracking_dict[serial]["NpyPath"]} 181 | else: 182 | print("representative_selection_method is wrong") 183 | sys.exit() 184 | # Save result out to json 185 | if serials !=[]: 186 | 187 | representative_nodes[camera_id][local_id] = {"representative_node": representative_node, "all_serials": serials} 188 | json_path = os.path.join(out_dir, f"representative_nodes_scene{scene_id}.json") 189 | with open(json_path, "w") as f: 190 | json.dump(representative_nodes, f) 191 | 192 | return representative_nodes 193 | 194 | def multi_camera_people_tracking(tracking_results, scene_id, json_dir, out_dir,**kwargs): 195 | # perform mcpt using tracking_results 196 | # tracking_results contains tracking_dict, which contains results of scpt in each camera 197 | print("running multi_camera_people_tracking") 198 | 199 | appearance_based_tracking = kwargs.get("appearance_based_tracking",True) 200 | distance_type = kwargs.get("distance_type","max") 201 | distance_th = kwargs.get("distance_th",5) 202 | epsilon = kwargs.get("epsilon_mcpt",0.4) 203 | representative_selection_method = kwargs.get("representative_selection_method","keypoint") 204 | short_track_th = kwargs.get("short_track_th",0) 205 | keypoint_th = kwargs.get("keypoint_th",0.7) 206 | keypoint_condition_th = kwargs.get("keypoint_condition_th",2) 207 | replace_similarity_by_wcoordinate = kwargs.get("replace_similarity_by_wcoordinate",True) 208 | replace_value = kwargs.get('replace_value', -10) 209 | 210 | print("representative_selection_method:",representative_selection_method) 211 | print("short_track_th:",short_track_th) 212 | print("epsilon:",epsilon) 213 | if representative_selection_method == "keypoint": 214 | print("keypoint_condition_th:",keypoint_condition_th) 215 | 216 | # Representative image extraction 217 | representative_nodes = get_representative_nodes_cache(scene_id=scene_id, out_dir=out_dir) 218 | if representative_nodes == None: 219 | representative_nodes = decide_representative_nodes(tracking_results,out_dir,scene_id,epsilon=epsilon,representative_selection_method=representative_selection_method,short_track_th=short_track_th,keypoint_th=keypoint_th) 220 | else: 221 | print(f"Found repsentative_nodes cache file. Got {len(representative_nodes)} camera(s) info.") 222 | print("representative feature is selected") 223 | 224 | similarity_matrix = create_similarity_matrix_mcpt(representative_nodes,short_track_th=short_track_th,representative_selection_method=representative_selection_method,keypoint_condition_th=keypoint_condition_th) 225 | similarity_matrix[similarity_matrix < (1-epsilon)] = 0 226 | clusters = list(range(len(similarity_matrix))) 227 | print("number of tracklet:",len(set(clusters))) 228 | similarity_matrix = replace_similarity(representative_nodes,similarity_matrix,tracking_results,clusters,distance_th=distance_th, 229 | distance_type=distance_type,replace_similarity_by_wcoordinate=replace_similarity_by_wcoordinate, 230 | short_track_th = short_track_th, keypoint_condition_th=keypoint_condition_th, 231 | representative_selection_method=representative_selection_method) 232 | # perform Re-identification using hieralchical clustering with average linkage 233 | clusters = associate_cluster(clusters, similarity_matrix, epsilon=epsilon, cost_function=2, minimize=False) 234 | del similarity_matrix 235 | 236 | print("unique_clusters:",len(set(clusters))) 237 | 238 | camera_dict = create_camera_dict(representative_nodes,short_track_th = short_track_th, 239 | keypoint_condition_th=keypoint_condition_th, representative_selection_method=representative_selection_method) 240 | 241 | for camera_id in camera_dict: 242 | tracking_dict = tracking_results[int(camera_id)] 243 | indices = camera_dict[camera_id]["indices"] 244 | local_ids = camera_dict[camera_id]["unique_local_ids"] 245 | tmp_clusters = [clusters[index] for index in indices] 246 | local_id_cluster_dict = {local_id:cluster for local_id,cluster in zip(local_ids,tmp_clusters)} 247 | 248 | local_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict] 249 | unique_local_ids = sorted(set(local_ids)) 250 | if -1 in unique_local_ids: 251 | unique_local_ids.remove(-1) 252 | local_id_serials_dict = {local_id:[] for local_id in unique_local_ids} 253 | [local_id_serials_dict[local_id].append(serial) for local_id,serial in zip(local_ids,tracking_dict) if local_id >= 0] 254 | for local_id in unique_local_ids: 255 | for serial in local_id_serials_dict[local_id]: 256 | value = tracking_dict[serial] 257 | if local_id in local_id_cluster_dict: 258 | value["GlobalOfflineID"] = int(local_id_cluster_dict[local_id]) 259 | return tracking_results 260 | 261 | def get_representative_nodes_cache(scene_id, out_dir): 262 | # Get cached representative nodes info if any 263 | representative_node_json = os.path.join(out_dir, f"representative_nodes_scene{scene_id}.json") 264 | if os.path.isfile(representative_node_json): 265 | with open(representative_node_json, "r") as f: 266 | representative_nodes = json.load(f) 267 | return representative_nodes 268 | return None 269 | 270 | def get_unique_global_ids(tracking_results,representative_nodes): 271 | # get unique global ids from tracking_results 272 | global_ids = [] 273 | for camera_id in representative_nodes: 274 | tracking_dict = tracking_results[camera_id] 275 | for local_id in representative_nodes[camera_id]: 276 | serial = representative_nodes[camera_id][local_id]["representative_node"]["serial"] 277 | if "GlobalOfflineID" in tracking_dict[serial]: 278 | global_ids.append(tracking_dict[serial]["GlobalOfflineID"]) 279 | unique_global_ids = sorted(list(set(global_ids))) 280 | return unique_global_ids 281 | 282 | def get_serials_each_global_id(tracking_results,representative_nodes,unique_global_ids): 283 | # get serials assigned each global id 284 | global_serial_dict = {} #global_id: {camera_id:(local_id, serial)} 285 | for global_id in unique_global_ids: 286 | tmp_dict = {} 287 | for camera_id in representative_nodes: 288 | tmp_dict[camera_id] = [] 289 | global_serial_dict[global_id] = tmp_dict 290 | for camera_id in representative_nodes: 291 | tracking_dict = tracking_results[camera_id] 292 | for local_id in representative_nodes[camera_id]: 293 | serial = representative_nodes[camera_id][local_id]["representative_node"]["serial"] 294 | if "GlobalOfflineID" in tracking_dict[serial]: 295 | global_id = tracking_dict[serial]["GlobalOfflineID"] 296 | global_serial_dict[global_id][camera_id].append((local_id,serial)) 297 | return global_serial_dict 298 | 299 | def create_camera_dict(representative_nodes,**kwargs): 300 | # 301 | short_track_th = kwargs.get('short_track_th', 0) 302 | representative_selection_method = kwargs.get("representative_selection_method","keypoint") 303 | keypoint_condition_th = kwargs.get("keypoint_condition_th",2) 304 | 305 | camera_dict = {camera_id:{"indices":[],"unique_local_ids":[]} for camera_id in representative_nodes} 306 | max_id = 0 307 | for camera_id in representative_nodes: 308 | tmp_representative_nodes = representative_nodes[camera_id] 309 | local_ids = [] 310 | for local_id in tmp_representative_nodes: 311 | serials = tmp_representative_nodes[local_id]["all_serials"] 312 | if len(serials) < short_track_th: 313 | continue 314 | if representative_selection_method == "keypoint": 315 | score = tmp_representative_nodes[local_id]["representative_node"]["score"] 316 | if score > keypoint_condition_th: 317 | continue 318 | local_ids.append(int(local_id)) 319 | unique_local_ids = sorted(list(set(local_ids))) 320 | camera_dict[camera_id]["indices"] += list(range(max_id,max_id+len(unique_local_ids))) 321 | camera_dict[camera_id]["unique_local_ids"] += unique_local_ids 322 | max_id += len(unique_local_ids) 323 | return camera_dict 324 | 325 | def create_mcpt_feature_stack(tracking_results,target_list): 326 | feature_stack = None 327 | for camera_id, serial in target_list: 328 | feature = np.load(tracking_results[camera_id][serial]["NpyPath"]) 329 | if feature_stack is None: 330 | feature_stack = np.empty((0, len(feature.flatten()))) 331 | feature_stack = np.append(feature_stack , feature.reshape(1, -1), axis=0) 332 | return feature_stack 333 | 334 | 335 | 336 | def assign_global_id(tracking_results,representative_nodes,**kwargs): 337 | # assign unclustered tracklets to global id 338 | 339 | epsilon = kwargs.get('epsilon_mcpt', 0.3) 340 | assign_all_tracklet = kwargs.get('assign_all_tracklet', False) 341 | sim_th = kwargs.get('sim_th', 0.9) 342 | print("sim_th:",sim_th) 343 | print("assign_all_tracklet:",assign_all_tracklet) 344 | model = kwargs.get("model","mmpose_hrnet") 345 | 346 | counter = 0 347 | assigned_tracks = [] 348 | unassigned_tracks = [] 349 | 350 | for camera_id in representative_nodes: 351 | tracking_dict = tracking_results[camera_id] 352 | for local_id in representative_nodes[camera_id]: 353 | serial = representative_nodes[camera_id][local_id]["representative_node"]["serial"] 354 | if "GlobalOfflineID" in tracking_dict[serial]: 355 | global_id = tracking_dict[serial]["GlobalOfflineID"] 356 | assigned_tracks.append((global_id,camera_id,local_id,serial)) 357 | else: 358 | unassigned_tracks.append((camera_id,local_id)) 359 | 360 | target_list = [(camera_id,serial) for global_id,camera_id,local_id,serial in assigned_tracks] 361 | feature_stack = create_mcpt_feature_stack(tracking_results,target_list) 362 | feature_stack_T = feature_stack.T 363 | feature_stack_norm = np.linalg.norm(feature_stack, axis=1) 364 | global_ids = [global_id for global_id,camera_id,local_id,serial in assigned_tracks] 365 | 366 | for k,(camera_id,local_id) in enumerate(unassigned_tracks): 367 | npy_path = representative_nodes[camera_id][local_id]["representative_node"]["npy_path"] 368 | feature = np.load(npy_path) 369 | cos_sims = np.dot(feature,feature_stack_T)/ (np.linalg.norm(feature)*feature_stack_norm) 370 | 371 | if assign_all_tracklet == False: 372 | max_sim = np.max(cos_sims) 373 | if max_sim < sim_th: 374 | continue 375 | 376 | similar_indices = list(np.where(cos_sims >= sim_th)[0]) 377 | if len(similar_indices) == 0: 378 | continue 379 | 380 | tmp_global_ids = [global_id for i,global_id in enumerate(global_ids) if i in similar_indices] 381 | global_id = mode(tmp_global_ids).mode 382 | 383 | counter += 1 384 | serials = representative_nodes[camera_id][local_id]["all_serials"] 385 | for serial in serials: 386 | tracking_results[camera_id][serial]["GlobalOfflineID"] = int(global_id) 387 | print(f"{counter} tracklets are reassigned") 388 | return tracking_results 389 | 390 | def global_id_reassignment(tracking_results, representative_nodes,scene_id,**kwargs): 391 | # perform delete_small_global_id() and assign_global_id() for reassigning unclustered tracklets 392 | epsilon = kwargs.get("epsilon_mcpt",0.3) 393 | representative_selection_method = kwargs.get("representative_selection_method","centrality") 394 | delete_gid_th = kwargs.get("delete_gid_th",10000) 395 | assign_all_tracklet = kwargs.get("assign_all_tracklet",True) 396 | sim_th = kwargs.get("sim_th",0.8) 397 | delete_few_camera_cluter = kwargs.get('delete_few_camera_cluter',False) 398 | 399 | unique_global_ids = get_unique_global_ids(tracking_results,representative_nodes) 400 | 401 | global_serial_dict = get_serials_each_global_id(tracking_results,representative_nodes,unique_global_ids) 402 | 403 | tracking_results, unique_global_ids = delete_small_global_id(tracking_results,representative_nodes,global_serial_dict, 404 | delete_gid_th = delete_gid_th,delete_few_camera_cluter=delete_few_camera_cluter) 405 | 406 | tracking_results = assign_global_id(tracking_results,representative_nodes, 407 | delete_gid_th=delete_gid_th, assign_all_tracklet=assign_all_tracklet,sim_th=sim_th) 408 | 409 | return tracking_results 410 | 411 | def translate_world_coordinate(x, y, homography_matrix): 412 | # translate camera coordinate to world coordinate 413 | vector_xyz = np.array([x, y, 1]) # z=1 414 | vector_xyz_3d = np.dot(np.linalg.inv(homography_matrix), vector_xyz.T) 415 | return vector_xyz_3d[0] / vector_xyz_3d[2], vector_xyz_3d[1] / vector_xyz_3d[2] 416 | 417 | 418 | def interpolate_tracklet(tracking_results,representative_nodes,**kwargs): 419 | # interpolate missing detections for each tracklet 420 | max_interpolate_interval = kwargs.get("max_interpolate_interval",150) 421 | frame_sampling_freq = kwargs.get("frame_sampling_freq",1) 422 | for camera_id in tracking_results: 423 | tracking_dict = tracking_results[camera_id] 424 | local_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict] 425 | unique_local_ids = sorted(list(set(local_ids))) 426 | if min(unique_local_ids) == -1: unique_local_ids.remove(-1) 427 | local_id_serial_dict = {local_id:[] for local_id in unique_local_ids} 428 | [local_id_serial_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1] 429 | local_id_frame_dict = {local_id:[] for local_id in unique_local_ids} 430 | [local_id_frame_dict[tracking_dict[serial]["OfflineID"]].append(tracking_dict[serial]["Frame"]) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1] 431 | 432 | max_serial = int(max(tracking_dict.keys())) 433 | for local_id in unique_local_ids: 434 | frames, serials = zip(*sorted(zip(local_id_frame_dict[local_id], local_id_serial_dict[local_id]))) 435 | missing_frames = [] 436 | for frame,next_frame in zip(frames[:-1],frames[1:]): 437 | diff = next_frame - frame 438 | if diff > max_interpolate_interval: continue 439 | while diff > frame_sampling_freq: 440 | diff -= frame_sampling_freq 441 | missing_frame = next_frame - diff 442 | missing_frames.append(missing_frame) 443 | if missing_frames==0: continue 444 | global_id = tracking_dict[serials[0]]["GlobalOfflineID"] if "GlobalOfflineID" in tracking_dict[serials[0]] else None 445 | 446 | coordinates = [list(tracking_dict[serial]["Coordinate"].values())+list(tracking_dict[serial]["WorldCoordinate"].values()) for serial in serials] 447 | interpolator = RegularGridInterpolator((np.array(frames),), np.array(coordinates), method='linear') 448 | for frame in missing_frames: 449 | x1,y1,x2,y2,w_x,w_y = interpolator([frame])[0] 450 | (x1, y1, x2, y2), (w_x,w_y) = map(int, [x1, y1, x2, y2]),map(float,[w_x,w_y]) 451 | max_serial += 1 452 | if global_id != None: 453 | tracking_dict[str(max_serial).zfill(8)] = {"Frame": frame, "Coordinate": {'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2}, "WorldCoordinate": {'x': w_x, 'y': w_y}, "OfflineID": local_id, "GlobalOfflineID": global_id} 454 | else: 455 | tracking_dict[str(max_serial).zfill(8)] = {"Frame": frame, "Coordinate": {'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2}, "WorldCoordinate": {'x': w_x, 'y': w_y}, "OfflineID": local_id} 456 | return tracking_results 457 | 458 | 459 | def find_highest_centrality_node(tracking_dict, serials, **kwargs): 460 | # find highest centrality node from each tracklet 461 | epsilon = kwargs.get('epsilon_mcpt', 0.3) 462 | stack_max_size = kwargs.get('stack_max_size', 2000) 463 | image_size = kwargs.get('image_size', (1920,1080)) 464 | aspect_th = kwargs.get('aspect_th', 1.6) 465 | 466 | pos_list = [list(tracking_dict[serial]["Coordinate"].values()) for serial in serials] 467 | pos_list = np.array(pos_list) 468 | aspects = (pos_list[:,3]-pos_list[:,1])/(pos_list[:,2]-pos_list[:,0]) 469 | pos_list[:, 2] = image_size[0] - pos_list[:, 2] 470 | pos_list[:, 3] = image_size[1] - pos_list[:, 3] 471 | edge_distances = np.min(pos_list,axis = 1) 472 | new_serials = [] 473 | for i, (serial,aspect, edge_distance) in enumerate(zip(serials,aspects,edge_distances)): 474 | if (aspect >= aspect_th): # (edge_distance <= 1) and 475 | new_serials.append(serial) 476 | if len(new_serials) == 0: 477 | serial,feature = None,None 478 | pass 479 | elif len(new_serials) == 1 or len(new_serials)== 2: 480 | serial = new_serials[0] 481 | feature = np.load(tracking_dict[serial]["NpyPath"]) 482 | else: 483 | freq =1 484 | while len(new_serials)//freq > stack_max_size: 485 | freq += 1 486 | for n, serial in enumerate(new_serials): 487 | if n % freq != 0: continue 488 | feature = np.load(tracking_dict[serial]["NpyPath"]) 489 | if n== 0: 490 | feature_stack = np.empty((0,len(feature.flatten()))) 491 | feature_stack = np.append(feature_stack , feature.reshape(1, -1), axis=0) 492 | similarity_matrix = cosine_similarity(feature_stack) 493 | similarity_matrix = np.where(similarity_matrix < 1-epsilon, 0, similarity_matrix) 494 | centralities = np.sum(similarity_matrix,axis=0) 495 | idx_max = np.argmax(centralities) 496 | serial = new_serials[idx_max*freq] 497 | feature = feature_stack[idx_max] 498 | return new_serials, serial, feature 499 | 500 | def minimize_similarity_by_sc_overlap(representative_nodes,matrix,tracking_results,clusters,camera_dict,**kwargs): 501 | # minimize similarity if tracklets are overlapping in SCPT results 502 | matrix_type = kwargs.get('matrix_type', "similarity") 503 | if matrix_type == "similarity": 504 | replace_value = -1 505 | elif matrix_type == "distance": 506 | replace_value = np.max(matrix[matrix distance_th, replace_value, similarity_matrix) 537 | return similarity_matrix 538 | 539 | def maximize_similarity_by_wcoordinate(similarity_matrix,distance_matrix,**kwargs): 540 | # replace multiple elements of the similarity matrix with 1 based on the world coordinate 541 | max_distance_th = kwargs.get('max_distance_th', 0.5) 542 | replace_value = kwargs.get('replace_value', 1) 543 | print("maximize_similarity_by_wcoordinate") 544 | similarity_matrix = np.where(distance_matrix < max_distance_th, replace_value, similarity_matrix) 545 | return similarity_matrix 546 | 547 | def replace_similarity(representative_nodes,similarity_matrix,tracking_results,clusters,**kwargs): 548 | # replace multiple elements of the similarity matrix with another value 549 | distance_th = kwargs.get('distance_th', 10) 550 | check_sc_overlap = kwargs.get('check_sc_overlap', False) 551 | replace_similarity_by_wcoordinate = kwargs.get('replace_similarity_by_wcoordinate', False) 552 | distance_type = kwargs.get('distance_type', "min") 553 | short_track_th = kwargs.get("short_track_th",0) 554 | keypoint_condition_th = kwargs.get("keypoint_condition_th",2) 555 | replace_value = kwargs.get('replace_value', -10) 556 | representative_selection_method = kwargs.get('representative_selection_method', 'keypoint') 557 | 558 | if check_sc_overlap: 559 | camera_dict = create_camera_dict(representative_nodes,short_track_th = short_track_th,keypoint_condition_th=keypoint_condition_th, 560 | representative_selection_method=representative_selection_method) 561 | similarity_matrix = minimize_similarity_by_sc_overlap(representative_nodes,similarity_matrix,tracking_results,clusters,camera_dict, matrix_type = "similarity") 562 | if replace_similarity_by_wcoordinate: 563 | min_distance_matrix,max_distance_matrix,mean_distance_matrix = create_distance_matrix(representative_nodes,tracking_results, distance_type = distance_type,short_track_th =short_track_th, keypoint_condition_th = keypoint_condition_th,representative_selection_method =representative_selection_method) 564 | similarity_matrix = maximize_similarity_by_wcoordinate(similarity_matrix, mean_distance_matrix) 565 | similarity_matrix = replace_negative_value_by_wcoordinate(similarity_matrix, min_distance_matrix, distance_th=distance_th,replace_value=replace_value) 566 | 567 | return similarity_matrix 568 | 569 | def measure_euclidean_distance(id1_pos_list,id2_pos_list): 570 | points1 = np.array(id1_pos_list) 571 | points2 = np.array(id2_pos_list) 572 | diff = points1-points2 573 | euclid_distances = np.sqrt(np.sum(diff**2, axis=1)) 574 | return euclid_distances 575 | 576 | def create_distance_matrix(representative_nodes,tracking_results, **kwargs): 577 | # create a Euclidean distance matrix showing the Euclidean distance between each tracklet 578 | 579 | distance_type = kwargs.get('distance_type', "max") #distance_type min or max or mean 580 | image_size = kwargs.get('image_size', (1920,1080)) 581 | short_track_th = kwargs.get('short_track_th', 0) 582 | representative_selection_method = kwargs.get("representative_selection_method","keypoint") 583 | keypoint_condition_th = kwargs.get("keypoint_condition_th",2) 584 | print("distance_type:",distance_type) 585 | camera_dict = create_camera_dict(representative_nodes,short_track_th = short_track_th, 586 | keypoint_condition_th=keypoint_condition_th, representative_selection_method=representative_selection_method) 587 | shape = np.sum([len(camera_dict[camera_id]["indices"]) for camera_id in camera_dict]) 588 | max_distance_matrix = np.full((shape, shape), np.inf, dtype=np.float16) 589 | mean_distance_matrix = np.full((shape, shape), np.inf, dtype=np.float16) 590 | min_distance_matrix = np.full((shape, shape), np.inf, dtype=np.float16) 591 | 592 | index_serials_dict = {index:[] for index in range(len(max_distance_matrix))} 593 | index_frames_dict = {index:[] for index in range(len(max_distance_matrix))} 594 | index_wpos_list_dict = {index:[] for index in range(len(max_distance_matrix))} 595 | 596 | for camera_id in representative_nodes: 597 | tracking_dict = tracking_results[int(camera_id)] 598 | indices = camera_dict[camera_id]["indices"] 599 | unique_local_ids = camera_dict[camera_id]["unique_local_ids"] 600 | local_ids_serials_dict = {local_id:[] for local_id in unique_local_ids} 601 | [local_ids_serials_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] in unique_local_ids] 602 | 603 | for tmp_index in range(len(indices)): 604 | local_id = unique_local_ids[tmp_index] 605 | serials = local_ids_serials_dict[local_id] 606 | frames = [tracking_dict[serial]["Frame"] for serial in serials] 607 | wpos_list = [list(tracking_dict[serial]["WorldCoordinate"].values()) for serial in serials] 608 | index = indices[tmp_index] 609 | index_serials_dict[index] += serials 610 | index_frames_dict[index] += frames 611 | index_wpos_list_dict[index] += wpos_list 612 | 613 | for id1_index in range(len(max_distance_matrix)-1): 614 | id1_frames = index_frames_dict[id1_index] 615 | id1_wpos_list = index_wpos_list_dict[id1_index] 616 | if id1_frames == []: 617 | continue 618 | 619 | for id2_index in range(id1_index+1,len(max_distance_matrix)): 620 | id2_frames = index_frames_dict[id2_index] 621 | if id2_frames == []: 622 | continue 623 | common_frames = set(id1_frames).intersection(set(id2_frames)) 624 | if len(common_frames) < 1: continue 625 | id2_wpos_list = index_wpos_list_dict[id2_index] 626 | id1_lap_indices = [i for i,id1_frame in enumerate(id1_frames) if id1_frame in common_frames] 627 | id2_lap_indices = [i for i,id2_frame in enumerate(id2_frames) if id2_frame in common_frames] 628 | id1_lap_wpos_list = [id1_wpos_list[id1_lap_index] for id1_lap_index in id1_lap_indices] 629 | id2_lap_wpos_list = [id2_wpos_list[id2_lap_index] for id2_lap_index in id2_lap_indices] 630 | 631 | euclid_distances = measure_euclidean_distance(id1_lap_wpos_list,id2_lap_wpos_list) 632 | min_distance = np.min(euclid_distances) 633 | mean_distance = np.mean(euclid_distances) 634 | max_distance = np.max(euclid_distances) 635 | min_distance_matrix[id1_index,id2_index] = min_distance 636 | min_distance_matrix[id2_index,id1_index] = min_distance 637 | if len(common_frames) > 120: 638 | mean_distance_matrix[id1_index,id2_index] = mean_distance 639 | mean_distance_matrix[id2_index,id1_index] = mean_distance 640 | max_distance_matrix[id1_index,id2_index] = max_distance 641 | max_distance_matrix[id2_index,id1_index] = max_distance 642 | 643 | return min_distance_matrix,max_distance_matrix,mean_distance_matrix 644 | 645 | def delete_small_global_id(tracking_results,representative_nodes,global_serial_dict,**kwargs): 646 | # delete global id that contains only a little serials from tracking_results 647 | delete_gid_th = kwargs.get('delete_gid_th',10000) 648 | delete_few_camera_cluter = kwargs.get('delete_few_camera_cluter',False) 649 | print("delete_gid_th:",delete_gid_th) 650 | print("delete_few_camera_cluter:",delete_few_camera_cluter) 651 | delete_global_ids = [] 652 | save_global_ids = [] 653 | 654 | for global_id in global_serial_dict: 655 | serial_counter = 0 656 | camera_ids=[] 657 | for camera_id in global_serial_dict[global_id]: 658 | if global_serial_dict[global_id][camera_id] != []: 659 | camera_ids.append(camera_id) 660 | for local_id,serial in global_serial_dict[global_id][camera_id]: 661 | tmp_all_serials = representative_nodes[camera_id][local_id]["all_serials"] 662 | serial_counter += len(tmp_all_serials) 663 | 664 | if serial_counter < delete_gid_th: 665 | delete_global_ids.append(global_id) 666 | continue 667 | if delete_few_camera_cluter: 668 | if len(set(camera_ids)) < 3: 669 | delete_global_ids.append(global_id) 670 | continue 671 | save_global_ids.append(global_id) 672 | 673 | for camera_id in tracking_results: 674 | tracking_dict = tracking_results[camera_id] 675 | for serial in tracking_dict: 676 | tmp_dict = tracking_dict[serial] 677 | if "GlobalOfflineID" in tmp_dict: 678 | global_id = tmp_dict["GlobalOfflineID"] 679 | if global_id in delete_global_ids: 680 | del tmp_dict["GlobalOfflineID"] 681 | unique_global_ids = sorted(list(set(save_global_ids))) 682 | 683 | return tracking_results, unique_global_ids 684 | 685 | def measure_world_coordinate(scene_id,tracking_results, **kwargs): 686 | # measur world coordinates in each node 687 | mean_world_coordinate_th = kwargs.get("mean_world_coordinate_th",2) 688 | model = kwargs.get("model","mmpose_hrnet") 689 | 690 | for camera_id in tracking_results: 691 | tracking_dict = tracking_results[camera_id] 692 | with open(f"Original/scene_{str(scene_id).zfill(3)}/camera_{str(camera_id).zfill(4)}/calibration.json") as f: 693 | calibration_json = json.load(f) 694 | homography_matrix = np.array(calibration_json['homography matrix']) 695 | for serial in tracking_dict: 696 | value = tracking_dict[serial] 697 | x1,y1,x2,y2 = value["Coordinate"].values() 698 | x,y = (x2+x1)/2,y2 699 | bbox_w_c = translate_world_coordinate(x,y, homography_matrix) 700 | value["WoorldCoordinate"] = {"x":bbox_w_c[0],"y":bbox_w_c[1]} 701 | 702 | for camera_id in tracking_results: 703 | tracking_dict = tracking_results[camera_id] 704 | for serial in tracking_dict: 705 | value = tracking_dict[serial] 706 | return tracking_results 707 | 708 | def eval_noise_level(keypoints): 709 | # evaluate noise level in images based on pose estimation 710 | xs,ys,scores = zip(*keypoints) 711 | th = 0.75 712 | indices = [i for i,score in enumerate(scores) if score > th] 713 | condition = 0 714 | if len(indices)==2: 715 | if min(indices) <= 4: 716 | condition = 0 717 | else: 718 | condition = 2 719 | if len(indices)==1: 720 | condition = 3 721 | if len(indices)==0: 722 | condition =4 723 | return condition 724 | 725 | def remove_noise_images(scene_id,tracking_results,**kwargs): 726 | # remove noise images based on pose estimation 727 | model = kwargs.get("model","mmpose_hrnet") 728 | 729 | del_serials = {camera_id:[] for camera_id in tracking_results} 730 | 731 | for camera_id in tracking_results: 732 | tracking_dict = tracking_results[camera_id] 733 | for serial in tracking_dict: 734 | value = tracking_dict[serial] 735 | if "GlobalOfflineID" not in value: 736 | del_serials[camera_id].append(serial) 737 | 738 | for camera_id in tracking_results: 739 | tracking_dict = tracking_results[camera_id] 740 | for serial in del_serials[camera_id]: 741 | del tracking_dict[serial] 742 | 743 | for camera_id in tracking_results: 744 | tracking_dict = tracking_results[camera_id] 745 | keypoints_results = pose.PoseKeypoints(f"Pose/scene_{str(scene_id).zfill(3)}/camera_{str(camera_id).zfill(4)}/camera_{str(camera_id).zfill(4)}_out_keypoint.json") 746 | keypoints_results.assign_serial_from_tracking_dict(tracking_dict=tracking_dict) 747 | del_serials = [] 748 | for serial in tracking_dict: 749 | value = tracking_dict[serial] 750 | 751 | kp = keypoints_results.get_keypoints(serial) 752 | if kp == None: 753 | del_serials.append(serial) 754 | continue 755 | keypoints = kp['Keypoints'] 756 | condition = eval_noise_level(keypoints) 757 | coordinate = list(value["Coordinate"].values()) 758 | w,h = coordinate[2]-coordinate[0],coordinate[3]-coordinate[1] 759 | if w/h > 3 or h/w > 5: 760 | del_serials.append(serial) 761 | continue 762 | if condition >= 2: 763 | if condition==2 and min(w,h) < 100: 764 | continue 765 | del_serials.append(serial) 766 | for serial in del_serials: 767 | del tracking_dict[serial] 768 | 769 | local_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict] 770 | unique_local_ids = sorted(set(local_ids)) 771 | if -1 in unique_local_ids: 772 | unique_local_ids.remove(-1) 773 | local_id_serials_dict = {local_id:[] for local_id in unique_local_ids} 774 | [local_id_serials_dict[local_id].append(serial) for local_id,serial in zip(local_ids,tracking_dict)] 775 | local_id_frames_dict = {local_id:[] for local_id in unique_local_ids} 776 | [local_id_frames_dict[local_id].append(tracking_dict[serial]["Frame"]) for local_id,serial in zip(local_ids,tracking_dict)] 777 | 778 | del_serials = [] 779 | for local_id in local_id_serials_dict: 780 | if local_id == -1: 781 | continue 782 | frames, serials = zip(*sorted(zip(local_id_frames_dict[local_id], local_id_serials_dict[local_id]))) 783 | for i in range(len(frames[:-1])): 784 | if i == 0: 785 | continue 786 | past_frame = frames[i-1] 787 | frame = frames[i] 788 | future_frame = frames[i+1] 789 | if (frame - past_frame >30) and (future_frame - frame > 30): 790 | del_serials.append(serials[i]) 791 | 792 | return tracking_results 793 | 794 | 795 | def delete_distant_persons(tracking_results,**kwargs): 796 | # delete the node that has long distances to other nodes with the same global id 797 | 798 | gid_serials = {} 799 | 800 | for camera_id in tracking_results: 801 | tracking_dict = tracking_results[camera_id] 802 | for serial in tracking_dict: 803 | value = tracking_dict[serial] 804 | gid = value["GlobalOfflineID"] 805 | gid_serials[gid] = [] 806 | for camera_id in tracking_results: 807 | tracking_dict = tracking_results[camera_id] 808 | for serial in tracking_dict: 809 | value = tracking_dict[serial] 810 | gid = value["GlobalOfflineID"] 811 | frame = value["Frame"] 812 | gid_serials[gid].append((camera_id,serial,frame)) 813 | delete_list= [] 814 | for gid in gid_serials: 815 | value = gid_serials[gid] 816 | camera_ids,serials,frames = zip(*value) 817 | frames, serials,camera_ids = zip(*sorted(zip(frames, serials, camera_ids))) 818 | 819 | current_frame = frames[0] 820 | current_serial = serials[0] 821 | current_camera_ids = camera_ids[0] 822 | tmp_frames = [] 823 | tmp_serials = [] 824 | tmp_camera_ids = [] 825 | for frame,serial,camera_id in zip(frames,serials,camera_ids): 826 | if frame !=current_frame: 827 | 828 | if len(tmp_frames) >=2: 829 | world_coordinates = [] 830 | for tmp_camera_id,tmp_serial in zip(tmp_camera_ids,tmp_serials): 831 | world_coordinate = tuple(tracking_results[tmp_camera_id][tmp_serial]["WorldCoordinate"].values()) 832 | world_coordinates.append(world_coordinate) 833 | world_coordinates = np.array(world_coordinates) 834 | distance_matrix = squareform(pdist(world_coordinates, 'euclidean')) 835 | if len(distance_matrix)>2: 836 | if np.max(distance_matrix) >7: 837 | sum_row = np.sum(distance_matrix,axis=0) 838 | argmax = np.argmax(sum_row) 839 | delete_list.append((tmp_camera_ids[argmax],tmp_serials[argmax])) 840 | 841 | current_frame = frame 842 | current_serial = serial 843 | current_camera_id = camera_id 844 | tmp_frames = [frame] 845 | tmp_serials = [serial] 846 | tmp_camera_ids = [camera_id] 847 | else: 848 | tmp_frames.append(frame) 849 | tmp_serials.append(serial) 850 | tmp_camera_ids.append(camera_id) 851 | 852 | for camera_id,serial in delete_list: 853 | del tracking_results[camera_id][serial] 854 | return tracking_results 855 | --------------------------------------------------------------------------------