├── tracking
    ├── config
    │   ├── parameters_per_scene.py
    │   └── scene_2_camera_id_file.json
    ├── requirements.txt
    ├── src
    │   ├── run.py
    │   ├── tracking.py
    │   ├── utils.py
    │   ├── pose.py
    │   ├── scpt.py
    │   └── mcpt.py
    └── infer.py
├── ranking.jpg
├── overall-pipeline.png
├── scripts
    ├── tracking.sh
    ├── extract_frame.sh
    ├── detection.sh
    ├── embedding.sh
    └── pose.sh
├── poser
    ├── load_tracking_result.py
    └── top_down_video_demo_with_track_file.py
├── LICENSE
├── tools
    ├── extract_frame.py
    └── generate_submission.py
├── embedder
    └── aic24_extract.py
├── README.md
└── detector
    └── aic24_get_detection.py


/tracking/config/parameters_per_scene.py:
--------------------------------------------------------------------------------
1 | parameters_per_scene = {
2 | }


--------------------------------------------------------------------------------
/ranking.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riips/AIC24_Track1_YACHIYO_RIIPS/HEAD/ranking.jpg


--------------------------------------------------------------------------------
/overall-pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riips/AIC24_Track1_YACHIYO_RIIPS/HEAD/overall-pipeline.png


--------------------------------------------------------------------------------
/scripts/tracking.sh:
--------------------------------------------------------------------------------
1 | SCENE=$*
2 | 
3 | #cd tracking
4 | 
5 | for SCENE in $*
6 | do
7 |     echo Procssing scene-$SCENE
8 |     python tracking/infer.py -s $SCENE
9 | done


--------------------------------------------------------------------------------
/scripts/extract_frame.sh:
--------------------------------------------------------------------------------
1 | conda activate botsort_env
2 | 
3 | for SCENE in $*
4 | do
5 |     F_SCENE=$(printf "%03d" "$SCENE")
6 |     echo Procssing scene-$F_SCENE
7 |     python3 tools/extract_frame.py -s scene_$F_SCENE ./
8 | done


--------------------------------------------------------------------------------
/scripts/detection.sh:
--------------------------------------------------------------------------------
 1 | cp ./detector/aic24_get_detection.py ./BoT-SORT/tools/
 2 | cd ./BoT-SORT
 3 | conda activate botsort_env
 4 | 
 5 | for SCENE in $*
 6 | do
 7 |     F_SCENE=$(printf "%03d" "$SCENE")
 8 |     echo Procssing scene-$F_SCENE
 9 |     python3 tools/aic24_get_detection.py -s scene_$F_SCENE ../
10 | done


--------------------------------------------------------------------------------
/scripts/embedding.sh:
--------------------------------------------------------------------------------
 1 | cp ./embedder/aic24_extract.py ./deep-person-reid/torchreid/
 2 | cd ./deep-person-reid
 3 | conda activate torchreid
 4 | 
 5 | for SCENE in $*
 6 | do
 7 |     F_SCENE=$(printf "%03d" "$SCENE")
 8 |     echo Procssing scene-$F_SCENE
 9 |     python3 torchreid/aic24_extract.py -s scene_$F_SCENE ../
10 | done


--------------------------------------------------------------------------------
/tracking/requirements.txt:
--------------------------------------------------------------------------------
 1 | contourpy==1.2.1
 2 | cycler==0.12.1
 3 | fonttools==4.51.0
 4 | joblib==1.4.0
 5 | kiwisolver==1.4.5
 6 | matplotlib==3.8.4
 7 | numpy==1.26.4
 8 | opencv-python-headless==4.9.0.80
 9 | packaging==24.0
10 | pillow==10.3.0
11 | pyparsing==3.1.2
12 | python-dateutil==2.9.0.post0
13 | scikit-learn==1.4.2
14 | scipy==1.13.0
15 | six==1.16.0
16 | threadpoolctl==3.4.0
17 | tqdm==4.66.2
18 | 


--------------------------------------------------------------------------------
/scripts/pose.sh:
--------------------------------------------------------------------------------
 1 | cp ./poser/load_tracking_result.py ./mmpose/demo/
 2 | cp ./poser/top_down_video_demo_with_track_file.py ./mmpose/demo/
 3 | cd ./mmpose
 4 | conda activate openmmlab
 5 | 
 6 | for SCENE in $*
 7 | do
 8 |     F_SCENE=$(printf "%03d" "$SCENE")
 9 |     echo Procssing scene-$F_SCENE
10 |     find "../Detection/scene_$F_SCENE" -maxdepth 1 -type f -name "*.txt" | while read -r file;
11 |     do
12 |         CAMERA=$(basename "$file")
13 |         number=$(echo "$CAMERA" | sed 's/camera_\([0-9]\+\).txt/\1/')
14 |         python3 demo/top_down_video_demo_with_track_file.py ../Detection/scene_${F_SCENE}/${CAMERA} ./configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth --video-path ../Original/scene_${F_SCENE}/camera_${number}/video.mp4 --out-file ../Pose/scene_${F_SCENE}/camera_${number}/camera_${number}_out_keypoint.json
15 |     done
16 | done
17 | 


--------------------------------------------------------------------------------
/poser/load_tracking_result.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | 
 4 | 
 5 | def load_tracking(file_name):
 6 |     tracking_file = open(file_name)
 7 |     result = {}
 8 |     for line in tracking_file:
 9 |         line = line.rstrip().split(',')
10 |         frame_id = int(line[1])
11 |         track_id = int(line[2])
12 |         bbox = [float(line[3]), float(line[4]), float(line[5]), float(line[6]), 1.0]
13 |         if frame_id not in result.keys():
14 |             result[frame_id] = []
15 |         result[frame_id].append({'bbox': np.array(bbox)})
16 |     return result
17 | 
18 | def load_tracking_id(file_name):
19 |     tracking_file = open(file_name)
20 |     result = {}
21 |     for line in tracking_file:
22 |         line = line.rstrip().split(',')
23 |         frame_id = int(line[1])
24 |         track_id = int(line[2])
25 |         if frame_id not in result.keys():
26 |             result[frame_id] = []
27 |         result[frame_id].append({'track_id': track_id})
28 |     return result
29 |     
30 | if __name__ == '__main__':
31 |     print("run load_tracking")
32 |     #load_tracking('')
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 RIIPS
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tools/extract_frame.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import numpy as np
 4 | import PIL.Image as Image
 5 | import cv2
 6 | from multiprocessing import Pool
 7 | from sys import stdout
 8 | import argparse
 9 | import os.path as osp
10 | 
11 | def make_parser():
12 |     parser = argparse.ArgumentParser("reid")
13 |     parser.add_argument("root_path", type=str, default=None)
14 |     parser.add_argument("-s", "--scene", type=str, default=None)
15 |     return parser
16 | 
17 | args = make_parser().parse_args()
18 | data_root = osp.join(args.root_path, "Original")
19 | scene = args.scene
20 | 
21 | fprint, endl = stdout.write, "\n"
22 | 
23 | IMAGE_FORMAT = ".jpg"
24 | 
25 | 
26 | def video2image(parameter_set):
27 |     scenario, camera, camera_dir = parameter_set
28 |     fprint(f"[Processing] {scenario} {camera}{endl}")
29 |     imgs_dir = f"{camera_dir}/Frame"
30 |     if not os.path.exists(imgs_dir):
31 |         os.makedirs(imgs_dir)
32 |     print("camera_dir:" + camera_dir)
33 |     cap = cv2.VideoCapture(f"{camera_dir}/video.mp4")
34 |     current_frame = 1
35 |     ret, frame = cap.read()
36 |     while ret:
37 |         frame_file_name = f"{str(current_frame).zfill(6)}{IMAGE_FORMAT}"
38 |         cv2.imwrite(f"{imgs_dir}/{frame_file_name}", frame)
39 |         ret, frame = cap.read()
40 |         current_frame += 1
41 |     fprint(f"[Done] {scenario} {camera}{endl}")
42 | 
43 | 
44 | def main():
45 |     parameter_sets = []
46 |     scenario_dir = osp.join(data_root, scene)
47 |     cameras = os.listdir(scenario_dir)
48 |     for each_camera in cameras:
49 |         cam = each_camera
50 |         if "map" in each_camera:
51 |             continue
52 |         camera_dir = f"{scenario_dir}/{each_camera}"                
53 |         parameter_sets.append(
54 |             [scene, each_camera, camera_dir]
55 |         )
56 | 
57 |     pool = Pool(processes=len(parameter_sets))
58 |     pool.map(video2image, parameter_sets)
59 |     pool.close()
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     main()
64 | 
65 | 


--------------------------------------------------------------------------------
/tools/generate_submission.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import argparse
 3 | import os
 4 | import numpy as np
 5 | 
 6 | def read_json_file(file_path):
 7 |     with open(file_path, 'r') as file:
 8 |         data = json.load(file)
 9 |     return data
10 | 
11 | def convert_coordinates_2world(x, y):
12 |     vector_xyz = np.array([x, y, z])
13 |     vector_xyz_3d = np.dot(np.linalg.inv(homography_matrix), vector_xyz.T)
14 |     vector_xyz_3d = vector_xyz_3d / vector_xyz_3d[2]
15 |     return vector_xyz_3d[0], vector_xyz_3d[1]
16 | 
17 | def load_calibration(calib_path):
18 |     data = read_json_file(calib_path)
19 |     global camera_projection_matrix
20 |     global homography_matrix
21 |     camera_projection_matrix = np.array(data["camera projection matrix"])
22 |     homography_matrix =  np.array(data["homography matrix"])
23 | 
24 | def generate_submission(json_path, data_root="", save_path=""):
25 |     json_path = os.path.join(data_root, json_path)
26 |     submission_path = os.path.join(data_root, save_path )
27 |     if not os.path.exists(submission_path):
28 |             os.makedirs(submission_path)
29 |     submission_path = os.path.join(submission_path, 'track1.txt')
30 |     json_data = read_json_file(json_path)
31 |     ret_data = []
32 |     for cam in json_data:
33 |         print(f"processing camera : {cam.zfill(3)}")
34 |         for seq in json_data[cam]:
35 |             item = json_data[cam][seq]
36 |             if "GlobalOfflineID" in item:
37 |                 ret_line = [cam, \
38 |                             item["GlobalOfflineID"], \
39 |                             (item["Frame"] - 1), \
40 |                             item["Coordinate"]["x1"], \
41 |                             item["Coordinate"]["y1"], \
42 |                             (item["Coordinate"]["x2"] - item["Coordinate"]["x1"]), \
43 |                             (item["Coordinate"]["y2"] - item["Coordinate"]["y1"]), \
44 |                             "{:.6f}".format(item["WorldCoordinate"]["x"]), \
45 |                             "{:.6f}".format(item["WorldCoordinate"]["y"])]
46 |                 ret_data.append(ret_line)
47 |     ret_data = sorted(ret_data, key=lambda x: (int(x[0]), int(x[2]), int(x[1])))
48 |     np.savetxt(submission_path, ret_data, delimiter=' ', fmt="%s")
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     print("create track1.txt")
53 |     scenes = os.listdir("./Tracking/")
54 |     for sc in scenes:
55 |         print(f"processing scene : {sc}")
56 |         generate_submission(json_path=os.path.join(f"Tracking", sc,"fixed_whole_tracking_results.json"), save_path=os.path.join(f"Submission", sc))
57 | 
58 |     print("merge track1.txt")
59 |     with open(os.path.join("Submission", "track1.txt"), "w") as merged_file:
60 |         for file_path in scenes:
61 |             with open(os.path.join("Submission", f"{file_path}/track1.txt"), "r") as file:
62 |                 merged_file.write(file.read())


--------------------------------------------------------------------------------
/embedder/aic24_extract.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | extract ReID features from testing data.
  3 | '''
  4 | import os
  5 | import argparse
  6 | import os.path as osp
  7 | import numpy as np
  8 | import torch
  9 | import time
 10 | import torchvision.transforms as T
 11 | from PIL import Image
 12 | import sys
 13 | from utils import FeatureExtractor
 14 | import torchreid
 15 | import json
 16 | 
 17 | def make_parser():
 18 |     parser = argparse.ArgumentParser("reid")
 19 |     parser.add_argument("root_path", type=str, default=None)
 20 |     parser.add_argument("-s", "--scene", type=str, default=None)
 21 |     return parser
 22 | 
 23 | if __name__ == "__main__":
 24 | 
 25 |     args = make_parser().parse_args()
 26 |     data_root = args.root_path
 27 |     scene = args.scene
 28 | 
 29 |     sys.path.append(data_root+'/deep-person-reid')
 30 | 
 31 |     img_dir = os.path.join(data_root,'Original')
 32 |     det_dir = os.path.join(data_root,'Detection')
 33 |     out_dir = os.path.join(data_root,'EmbedFeature')
 34 | 
 35 |     models = {
 36 |               'osnet_x1_0':data_root+'/deep-person-reid/checkpoints/osnet_ms_m_c.pth.tar'
 37 |              }
 38 |     
 39 |     
 40 |     model_names = ['osnet_x1_0']
 41 |     
 42 | 
 43 |     val_transforms = T.Compose([
 44 |         T.Resize([256, 128]),
 45 |         T.ToTensor(),
 46 |         T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
 47 |     ])
 48 |     
 49 |     for model_idx,name in enumerate(models):
 50 |         
 51 |         model_p = models[name]
 52 |         model_name = model_names[model_idx]
 53 | 
 54 |         print('Using model {}'.format(name))
 55 | 
 56 |         extractor = FeatureExtractor(
 57 |             model_name=model_name,
 58 |             model_path=model_p,
 59 |             device='cuda'
 60 |         )   
 61 | 
 62 |         for file in os.listdir(os.path.join(det_dir,scene)):
 63 |             base, ext = os.path.splitext(file)
 64 |             if ext == '.txt':
 65 |                 print('processing file {}{}'.format(base,ext))
 66 |                 det_path = os.path.join(det_dir,scene,'{}.txt'.format(base))
 67 |                 json_path = os.path.join(det_dir,scene,'{}.json'.format(base))
 68 |                 dets = np.genfromtxt(det_path,dtype=str,delimiter=',')
 69 |                 with open(json_path) as f:
 70 |                     jf = json.load(f)
 71 |                 cur_frame = 0
 72 |                 u_num = 0
 73 |                 emb = np.array([None]*len(dets))
 74 |                 start = time.time()
 75 |                 print('processing scene {} cam {} with {} detections'.format(scene,base,len(dets)))
 76 |                 for idx,(cam,frame,_,x1,y1,x2,y2,conf) in enumerate(dets):
 77 |                     u_num += 1
 78 |                     x1,y1,x2,y2 = map(float,[x1,y1,x2,y2])
 79 |                     if idx%1000 == 0:
 80 |                         if idx !=0:
 81 |                             end = time.time()
 82 |                             print('processing time :',end-start)
 83 |                         start = time.time()
 84 |                         print('process {}/{}'.format(idx,len(dets)))
 85 |                     if cur_frame != int(frame):
 86 |                         cur_frame = int(frame)
 87 |                     if not os.path.isdir(osp.join(out_dir,scene,cam)):
 88 |                         os.makedirs(osp.join(out_dir,scene,cam))
 89 |                     save_fn = os.path.join(out_dir,scene,cam,'feature_{}_{}_{}_{}_{}_{}_{}.npy'.format(cur_frame,u_num,str(int(x1)),str(int(x2)),str(int(y1)),str(int(y2)),str(conf).replace(".","")))
 90 |                     jf[str(idx).zfill(8)]['NpyPath'] = os.path.join(scene,cam,'feature_{}_{}_{}_{}_{}_{}_{}.npy'.format(cur_frame,u_num,str(int(x1)),str(int(x2)),str(int(y1)),str(int(y2)),str(conf).replace(".","")))
 91 |                     img_path = os.path.join(img_dir,scene,cam,'Frame',frame.zfill(6)+'.jpg')
 92 |                     img = Image.open(img_path)
 93 |         
 94 |                     img_crop = img.crop((x1,y1,x2,y2))
 95 |                     img_crop = val_transforms(img_crop.convert('RGB')).unsqueeze(0)
 96 |                     feature = extractor(img_crop).cpu().detach().numpy()[0]
 97 |     
 98 |                     np.save(save_fn,feature)
 99 |                 end = time.time()
100 |                 print('processing time :',end-start)
101 |                 start = time.time()
102 |                 print('process {}/{}'.format(idx+1,len(dets)))
103 |                 with open(json_path, 'w') as f:
104 |                     json.dump(jf, f, ensure_ascii=False)
105 | 


--------------------------------------------------------------------------------
/tracking/src/run.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import argparse
  4 | from datetime import datetime
  5 | 
  6 | from tracking import Tracker
  7 | from utils import DetectedObjects
  8 | 
  9 | def run_scpt(feature_data_root, out_dir="outdir", tracking_params={}):
 10 |     # Load and generate "detected object list"
 11 |     tracking_results = {}
 12 |     if not os.path.isdir(feature_data_root):
 13 |         raise Exception(f"No such directory: {feature_data_root}")
 14 |     if os.path.basename(feature_data_root).startswith("camera_"):
 15 |         camera_ids = [os.path.basename(feature_data_root)]
 16 |         feature_data_root = os.path.dirname(feature_data_root)
 17 |         is_multi = False
 18 |     else:
 19 |         camera_ids = [cam_id for cam_id in os.listdir(feature_data_root) if cam_id[:7] == "camera_"]
 20 |         is_multi = True
 21 | 
 22 |     # loading detections
 23 |     for camera_id in camera_ids:
 24 |         data_dir = os.path.join(feature_data_root, camera_id)
 25 |         camera_id = int(camera_id[7:])
 26 |         detected_objects = load_detections(data_dir)
 27 |         tracking_results[camera_id] = detected_objects.to_trackingdict()
 28 |         del detected_objects
 29 |     
 30 |     # Run SCT on all detections of all cameras
 31 |     for camera_id in tracking_results:
 32 |         tracking_dict = tracking_results[camera_id]
 33 |         start_time = datetime.now()
 34 |         tracker = Tracker(tracking_params)
 35 |         tracking_results[camera_id] = tracker.scpt(tracking_dict) # tracking returns tracking_dict
 36 |         end_time = datetime.now()
 37 |         print(f"Camera{camera_id} elapsed time: {end_time - start_time}")
 38 | 
 39 |         # Dump the result
 40 |         out_json = os.path.join(out_dir, f'camera{camera_id:03d}_tracking_results.json')
 41 |         os.makedirs(os.path.dirname(out_json), exist_ok=True)
 42 |         with open(out_json, mode='w') as f:
 43 |             json.dump(tracking_results[camera_id], f)        
 44 | 
 45 | def run_mcpt(scene_id, json_dir,out_dir="outdir", tracking_params={}):
 46 |     start_time = datetime.now()
 47 |     tracker = Tracker(tracking_params)
 48 |     whole_tracking_result = tracker.mcpt(scene_id, json_dir,out_dir)
 49 |     
 50 |     # Dump the result
 51 |     out_file = os.path.join(out_dir, 'whole_tracking_results.json')
 52 |     with open(out_file, mode='w') as f:
 53 |         json.dump(whole_tracking_result, f)
 54 |     end_time = datetime.now()
 55 |     print(f"Elapsed_time: {end_time - start_time}")
 56 | 
 57 | 
 58 | def correct_scpt_result(scene_id, json_dir, out_dir=None, tracking_params={}):
 59 |     if not os.path.isdir(json_dir):
 60 |         raise Exception(f"The directory '{json_dir}' does not exist.")
 61 |     if out_dir == None:
 62 |         out_dir = json_dir
 63 |     
 64 |     json_files = [f for f in os.listdir(json_dir) if os.path.splitext(f)[1].lower() == ".json" and f.startswith("camera")]
 65 |     json_files = sorted(json_files)
 66 |     for json_file in json_files:
 67 |         camera_id = int(json_file.split("_")[0][6:])
 68 |         with open(os.path.join(json_dir, json_file)) as f:
 69 |             tracking_dict = json.load(f)
 70 |         tracker = Tracker(tracking_params)
 71 |         tracking_dict = tracker.correcting_scpt_result(tracking_dict) 
 72 |         out_file = os.path.join(out_dir, "fixed_"+os.path.basename(json_file))
 73 |         with open(out_file, mode='w') as f:
 74 |             json.dump(tracking_dict, f)
 75 | 
 76 | def correct_mcpt_result(scene_id,json_dir,out_dir,tracking_params={}):
 77 |     with open(os.path.join(json_dir, 'whole_tracking_results.json')) as f:
 78 |         tracking_results = json.load(f)
 79 |     with open(os.path.join(json_dir, f"representative_nodes_scene{str(scene_id)}.json")) as f:
 80 |         representative_nodes = json.load(f)
 81 |     tracker = Tracker(tracking_params)
 82 |     tracking_resuluts = tracker.correcting_mcpt_result(scene_id,tracking_results,representative_nodes)
 83 |     out_file = os.path.join(out_dir, "fixed_whole_tracking_results.json")
 84 |     with open(out_file, mode='w') as f:
 85 |         json.dump(tracking_resuluts, f)
 86 | 
 87 | 
 88 | def load_detections(data_root, debug=False):
 89 |     print(f"Loading detections from {data_root}.")
 90 |     detected_objects = DetectedObjects()
 91 |     detected_objects.load_from_directory(feature_root=data_root)
 92 |     print(f"Found {len(detected_objects.objects)} frames, and {detected_objects.num_objects} objects.")
 93 |     if debug:
 94 |         frames = sorted(detected_objects.objects)
 95 |         min_num_obj = 9999999
 96 |         max_num_obj = 0
 97 |         for frame in frames:
 98 |             obj = detected_objects[frame]
 99 |             num = len(obj)
100 |             min_num_obj = min(min_num_obj, num)
101 |             max_num_obj = max(max_num_obj, num)
102 |         print(f"###  MIN num detections: {min_num_obj},  MAX num detections: {max_num_obj} ###\n")
103 | 
104 |     return detected_objects
105 | 
106 | def get_args():
107 |     parser = argparse.ArgumentParser(description='Offline Tracker sample app.')
108 |     parser.add_argument('-d', '--data', default='EmbedFeature/scene_001', type=str)
109 |     parser.add_argument('-o', '--outdir', default='output', type=str)
110 | 
111 |     return parser.parse_args()
112 | 
113 | if __name__ == "__main__":
114 |     args = get_args()
115 | 
116 |     run(feature_data_root=args.data, out_dir=args.outdir, tracking_params={})
117 | 


--------------------------------------------------------------------------------
/tracking/infer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import json
  4 | from datetime import datetime
  5 | from multiprocessing import Pool
  6 | import subprocess
  7 | import glob
  8 | import tarfile
  9 | import argparse
 10 | 
 11 | sys.path.append("tracking")
 12 | sys.path.append("tracking/src")
 13 | import run
 14 | 
 15 | """
 16 | This file contains functions to execute offline tracking.
 17 | """
 18 | 
 19 | # Single camera people tracking
 20 | def scpt(tracking_params={}):
 21 |     # distributed SCPT processing by simply using multiprocessing pool.
 22 |     global scene_id
 23 |     global camera_ids
 24 |     global exp_root
 25 |     global tracking_parameters
 26 |     tracking_parameters = tracking_params
 27 | 
 28 |     num_processes = 5  # Could be more than 5, but it depends on machine instance
 29 |     p = Pool(num_processes)
 30 |     result = p.map(single_tracking, camera_ids)
 31 | 
 32 |     run.correct_scpt_result(scene_id=scene_id, json_dir=exp_root, out_dir=exp_root,
 33 |         tracking_params=tracking_params)
 34 | 
 35 | def single_tracking(cam_id):
 36 |     global scene_id
 37 |     global embed_root
 38 |     global exp_root
 39 |     global tracking_parameters
 40 | 
 41 |     print(f"Started a background process to camera_{cam_id}\n")
 42 |     run.run_scpt(feature_data_root=f'{embed_root}/scene_{scene_id:03d}/camera_{cam_id:04d}', out_dir=exp_root,
 43 |                 tracking_params=tracking_parameters)
 44 |     return
 45 | 
 46 | def get_camera_ids(scene_id, json_f="tracking/config/scene_2_camera_id_file.json"):
 47 |     with open(json_f) as f:
 48 |         scene2camera = json.load(f)
 49 |     camera_ids = []
 50 |     for scene_camera in scene2camera:
 51 |         if scene_camera["scene_name"] == f"scene_{scene_id:03d}":
 52 |             camera_ids = scene_camera["camera_ids"]
 53 |             break
 54 |     return camera_ids
 55 | 
 56 | 
 57 | # Multi camera tracking, aka ReID
 58 | def mcpt(tracking_params={}):
 59 |     global scene_id
 60 |     global exp_root
 61 |     global tracking_parameters
 62 |     tracking_parameters = tracking_params
 63 | 
 64 |     run.run_mcpt(scene_id=scene_id, json_dir=exp_root, out_dir=exp_root, tracking_params=tracking_parameters)
 65 |     run.correct_mcpt_result(scene_id=scene_id, json_dir=exp_root, out_dir=exp_root, tracking_params=tracking_parameters)
 66 | 
 67 | def run_tracking(scene, embed, output, debug=False, tracking_params={}):
 68 |     """
 69 |     Main routine
 70 |     """
 71 |     global scene_id
 72 |     global embed_root
 73 |     global exp_root
 74 |     global output_root
 75 |     global camera_ids
 76 |     global exec_scpt
 77 |     global exec_mcpt
 78 | 
 79 |     if debug:
 80 |         print(f"### tracking parameters: {tracking_params}", flush=True)
 81 | 
 82 |     scene_id = scene
 83 |     embed_root = embed
 84 |     camera_ids = get_camera_ids(scene_id)
 85 |     print(f"Target scene ID: {scene_id}, camera IDs: {camera_ids}")
 86 | 
 87 |     # Configure output directory
 88 |     exp_root = os.path.join(output, f"scene_{scene_id:03d}")
 89 |     output_root = exp_root
 90 | 
 91 |     # Execute SCPT (Single Camera People Tracking)
 92 |     if exec_scpt:
 93 |         scpt_started = datetime.now()
 94 |         print(f"Start SCPT: {scpt_started}", flush=True)
 95 |         scpt(tracking_params=tracking_params)
 96 |         print(f"SCPT finished. Elapsed: {datetime.now()-scpt_started}", flush=True)
 97 | 
 98 |     # Execute MCPT (Multi Camera People Tracking) aka ReID
 99 |     if exec_mcpt:
100 |         mcpt_started = datetime.now()
101 |         print(f"Start MCPT: {mcpt_started}")
102 |         mcpt(tracking_params=tracking_params)
103 |         print(f"MCPT finished. Elapsed: {datetime.now()-mcpt_started}", flush=True)
104 | 
105 | 
106 | def get_parameters_to_scene(scene_id, param_file):
107 |     if not os.path.isfile(param_file):
108 |         print(f"'parameters_per_scene file does not exist. {param_file}")
109 |         return {}
110 | 
111 |     sys.path.append("tracking/config")
112 |     import parameters_per_scene as pps
113 | 
114 |     scene = int(scene_id)
115 |     if scene in pps.parameters_per_scene:
116 |         return pps.parameters_per_scene[scene]
117 |     else:
118 |         return {}
119 | 
120 | def get_args():
121 |     parser = argparse.ArgumentParser(description='Offline Tracker Inferencing app.')
122 |     parser.add_argument('-s', '--scene', type=int, required=True)
123 |     parser.add_argument('-o', '--output', default="Tracking", type=str)
124 |     parser.add_argument('-all', '--exec_all', action='store_true')
125 |     parser.add_argument('-scpt', '--exec_scpt', action='store_true')
126 |     parser.add_argument('-mcpt', '--exec_mcpt', action='store_true')
127 | 
128 |     return parser.parse_args()
129 | 
130 | if __name__ == "__main__":
131 |     global exec_scpt
132 |     global exec_mcpt
133 | 
134 |     args = get_args()
135 | 
136 |     if args.exec_all or (not (args.exec_scpt | args.exec_mcpt)):
137 |         exec_scpt = exec_mcpt = True
138 |     else:
139 |         exec_scpt = exec_mcpt = False
140 |         if args.exec_scpt:
141 |             exec_scpt = True
142 |         if args.exec_mcpt:
143 |             exec_mcpt = True
144 | 
145 |     # Default tracking parameter
146 |     default_tracking_parameters = {
147 |         "epsilon_scpt": 0.10, "time_period":3,"epsilon_mcpt": 0.37, "short_track_th":120,
148 |         "keypoint_condition_th":1, "replace_similarity_by_wcoordinate":True, "distance_type":"min",
149 |         "distance_th":10, "sim_th":0.85, "delete_gid_th":5000
150 |         }
151 | 
152 |     scene = args.scene
153 |     param_file = "tracking/config/parameters_per_scene.py"
154 |     parameters = get_parameters_to_scene(scene, param_file)
155 |     if len(parameters) > 0:
156 |         tracking_parameters = parameters["tracking_parameters"]
157 |     else:
158 |         # Empty parameters to the scene, so use the default parameters.
159 |         tracking_parameters = default_tracking_parameters
160 |     embed_path = f"EmbedFeature"
161 | 
162 |     # Run offline tracking
163 |     run_tracking(scene=scene, embed=embed_path, output=args.output, tracking_params=tracking_parameters)
164 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # CVPRW2024: Overlap Suppression Clustering for Offline Multi-Camera People Tracking
  2 | 
  3 | The highest HOTA submission in the 8th NVIDIA AI City Challenge (2024) Track 1: Multi-Camera People Tracking. This submission placed 2nd in the competition due to its offline tracking algorithm.  
  4 | [[Paper]](https://openaccess.thecvf.com/content/CVPR2024W/AICity/papers/Yoshida_Overlap_Suppression_Clustering__for_Offline_Multi-Camera_People_Tracking_CVPRW_2024_paper.pdf)
  5 | 
  6 | ## Dataset Availability
  7 | 
  8 | The official dataset can be downloaded from the AI City Challenge website (https://www.aicitychallenge.org/2024-data-and-evaluation/). You need to fill out the dataset request form to obtain the password to download them.
  9 | 
 10 | Referring to the DATASET LICENSE AGREEMENT from the dataset author(s), we are not allowed to share the dataset. 
 11 | ```
 12 | 2.c. ... you may not copy, sell, rent, sublicense, transfer or distribute the DATASET, or share with others.  
 13 | ```
 14 | 
 15 |  
 16 | ## Ranking 
 17 | 
 18 | <img src="ranking.jpg" width="650" />
 19 | 
 20 | ## Overall Pipeline
 21 | 
 22 | <img src="overall-pipeline.png" width="650" />
 23 | 
 24 | ## Environment Requirements
 25 | 
 26 | The implementation of our work is built upon [BoT-SORT](https://github.com/NirAharon/BoT-SORT), [OpenMMLab](https://github.com/open-mmlab), and [torchreid](https://github.com/KaiyangZhou/deep-person-reid).
 27 | 
 28 | Three different environments are required for the reproduction process. Please install these three environments according to the following repos:
 29 | <a name="install"></a>
 30 | 1. [Install BoT-SORT for people detection](https://github.com/NirAharon/BoT-SORT#installation)
 31 | 2. [Install torchreid for feature extraction](https://github.com/KaiyangZhou/deep-person-reid#installation)
 32 | 3. [Install mmpose for pose estimation](https://mmpose.readthedocs.io/en/latest/installation.html) (*Please note that you need to have a version in the 0.x series for this to work.)
 33 | 
 34 | We will provide you with the installation command for mmpose v0.29.0.
 35 | Please note that this command may change due to updates or modifications in mmpose.
 36 | ```
 37 | #step 1
 38 | conda create --name openmmlab python=3.8 -y
 39 | conda activate openmmlab
 40 | 
 41 | #step 2
 42 | conda install pytorch torchvision -c pytorch
 43 | 
 44 | #step 3
 45 | pip install -U openmim
 46 | mim install mmengine
 47 | mim install "mmcv==1.7.0"
 48 | 
 49 | mim install "mmdet==2.28.2"
 50 | 
 51 | #Build mmpose from source
 52 | git clone https://github.com/open-mmlab/mmpose.git -b v0.29.0 --depth 1
 53 | cd mmpose
 54 | pip install -r requirements.txt
 55 | pip install -v -e .
 56 | ```
 57 | If you receive an mmcv AssertionError, please reinstall mmcv.
 58 | ```
 59 | mim uninstall mmcv
 60 | mim install "mmcv==1.7.0"
 61 | ```
 62 | Once you installed above to the same machine, you'll see the root folder organized as follows:
 63 | ```
 64 | root
 65 | │  README.md
 66 | │  ranking.jpg
 67 | │  overall-pipeline.png
 68 | │
 69 | ├─assets
 70 | ├─detector
 71 | ├─embedder
 72 | ├─poser
 73 | ├─scripts
 74 | ├─tools
 75 | ├─tracking
 76 | │
 77 | ├─BoT-SORT
 78 | ├─deep-person-reid
 79 | └─mmpose
 80 | ```
 81 | 
 82 | ## Training 
 83 | This project executes
 84 | 1) Person detection
 85 | 2) Feature extraction of each person
 86 | 3) Pose estimation of each person,
 87 | 
 88 | However we'll utilize pre-trained models for those. Therefore there's nothing to train.
 89 | 
 90 | ## Running Tracking
 91 | 
 92 | ### Preparation
 93 | #### 0. Place your video files.
 94 | 
 95 | Place your video files to under directory that corresponds to scene/camera IDs, such as Original/<scene_nnn>/<camera_nnnn>/video.mp4.
 96 | 
 97 | For example, you'd need to place camera-361 of scene-41 video file, run commands like below. Please don't forget to place whole video files of the scene you'd want to process.
 98 | ```
 99 | mkdir -p Original/scene_041/camera_0361
100 | cp <scene41_camera361_video_file> Original/scene_041/camera_0361/video.mp4
101 | ```
102 | 
103 | #### 1. Frame Extraction
104 | 
105 | Run a command below to extract frame images.
106 | ```
107 | sh scripts/extract_frame.sh 41
108 | ```
109 | 
110 | #### 2. Person Detection
111 | 
112 | Run steps below for person detection.
113 | - Install BoT-SORT as instructed in Environment Requirement section above [here](#install).
114 | - Prepare Models. Download the pretrained YOLOX_x model from [ByteTrack [Google Drive]](https://drive.google.com/file/d/1P4mY0Yyd3PPTybgZkjMYhFri88nTmJX5/view), and store it to BoT-SORT directory.
115 | - Run person detection by executing a command below.
116 | ```
117 | sh scripts/detection.sh 41
118 | ```
119 | 
120 | #### 3. Feature extraction
121 | 
122 | Run steps below to get feature extraction.
123 | - Install deep-person-reid as instructed in Environment Requirement section above [here](#install).
124 | - Prepare Models. Download the pretrained deep-person-reid model from [torchreid](https://kaiyangzhou.github.io/deep-person-reid/MODEL_ZOO). By running a script mentioned below will download this pretrained model automatically and will store it accordingly.
125 | - Run feature extraction by executing a command below.
126 | ```
127 | sh scripts/embedding.sh 41
128 | ```
129 | 
130 | #### 4. Pose estimation
131 | 
132 | Run steps below to get pose estimation.
133 | - Install mmpose as instructed in Environment Requirement section above [here](#install).
134 | - Run pose estimation by executing a command below.
135 | ```
136 | sh scripts/pose.sh 41
137 | ```
138 | 
139 | ### Single Camera People Tracking and Multi Camera People Tracking
140 | 
141 | #### 5. Both Single Camera People Tracking and Multi Camera People Tracking
142 | 
143 | Run command steps below to run both Single Camera People Tracking and Multi Camera People Tracking at once.
144 | ```
145 | python3 -m venv .venv
146 | source .venv/bin/activate
147 | pip install -r tracking/requirements.txt
148 | sh scripts/tracking.sh 41
149 | ```
150 | 
151 | #### 6. Combine tracking results of each scenes for submission.
152 | 
153 | Run command below to combine all results of each scenes. This will generate track1.txt under "Submission" directory.
154 | ```
155 | python3 tools/generate_submission.py
156 | ```
157 | 


--------------------------------------------------------------------------------
/poser/top_down_video_demo_with_track_file.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import os
  3 | import warnings
  4 | from argparse import ArgumentParser
  5 | 
  6 | import cv2
  7 | import mmcv
  8 | import json
  9 | import numpy as np
 10 | 
 11 | from mmpose.apis import (collect_multi_frames, inference_top_down_pose_model,
 12 |                          init_pose_model, process_mmdet_results,
 13 |                          vis_pose_result)
 14 | from mmpose.datasets import DatasetInfo
 15 | 
 16 | try:
 17 |     from mmdet.apis import inference_detector, init_detector
 18 |     has_mmdet = True
 19 | except (ImportError, ModuleNotFoundError):
 20 |     has_mmdet = False
 21 |     
 22 | from load_tracking_result import load_tracking
 23 | 
 24 | 
 25 | def main():
 26 |     """Visualize the demo video (support both single-frame and multi-frame).
 27 | 
 28 |     Using mmdet to detect the human.
 29 |     """
 30 |     parser = ArgumentParser()
 31 |     parser.add_argument('track_result', help='Track result file')
 32 |     parser.add_argument('pose_config', help='Config file for pose')
 33 |     parser.add_argument('pose_checkpoint', help='Checkpoint file for pose')
 34 |     parser.add_argument('--video-path', type=str, help='Video path')
 35 |     parser.add_argument(
 36 |         '--show',
 37 |         action='store_true',
 38 |         default=False,
 39 |         help='whether to show visualizations.')
 40 |     parser.add_argument(
 41 |         '--out-video-root',
 42 |         default='',
 43 |         help='Root of the output video file. '
 44 |         'Default not saving the visualization video.')
 45 |     parser.add_argument(
 46 |         '--device', default='cuda:0', help='Device used for inference')
 47 |     parser.add_argument(
 48 |         '--det-cat-id',
 49 |         type=int,
 50 |         default=1,
 51 |         help='Category id for bounding box detection model')
 52 |     parser.add_argument(
 53 |         '--bbox-thr',
 54 |         type=float,
 55 |         default=0.3,
 56 |         help='Bounding box score threshold')
 57 |     parser.add_argument(
 58 |         '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold')
 59 |     parser.add_argument(
 60 |         '--radius',
 61 |         type=int,
 62 |         default=4,
 63 |         help='Keypoint radius for visualization')
 64 |     parser.add_argument(
 65 |         '--thickness',
 66 |         type=int,
 67 |         default=1,
 68 |         help='Link thickness for visualization')
 69 |     parser.add_argument(
 70 |         '--use-multi-frames',
 71 |         action='store_true',
 72 |         default=False,
 73 |         help='whether to use multi frames for inference in the pose'
 74 |         'estimation stage. Default: False.')
 75 |     parser.add_argument(
 76 |         '--online',
 77 |         action='store_true',
 78 |         default=False,
 79 |         help='inference mode. If set to True, can not use future frame'
 80 |         'information when using multi frames for inference in the pose'
 81 |         'estimation stage. Default: False.')
 82 |     parser.add_argument(
 83 |         '--out-file',
 84 |         type=str
 85 |     )
 86 | 
 87 | 
 88 |     assert has_mmdet, 'Please install mmdet to run the demo.'
 89 | 
 90 |     args = parser.parse_args()
 91 | 
 92 |     # assert args.show or (args.out_video_root != '')
 93 |     
 94 |     print('Initializing model...')
 95 |     # # build the detection model from a config file and a checkpoint file
 96 |     # det_model = init_detector(
 97 |     #     args.det_config, args.det_checkpoint, device=args.device.lower())
 98 |     track_results = load_tracking(args.track_result)
 99 | 
100 |     # build the pose model from a config file and a checkpoint file
101 |     pose_model = init_pose_model(
102 |         args.pose_config, args.pose_checkpoint, device=args.device.lower())
103 | 
104 |     dataset = pose_model.cfg.data['test']['type']
105 |     # get datasetinfo
106 |     dataset_info = pose_model.cfg.data['test'].get('dataset_info', None)
107 |     if dataset_info is None:
108 |         warnings.warn(
109 |             'Please set `dataset_info` in the config.'
110 |             'Check https://github.com/open-mmlab/mmpose/pull/663 for details.',
111 |             DeprecationWarning)
112 |     else:
113 |         dataset_info = DatasetInfo(dataset_info)
114 | 
115 |     # read video
116 |     video = mmcv.VideoReader(args.video_path)
117 |     assert video.opened, f'Faild to load video file {args.video_path}'
118 | 
119 |     if args.out_video_root == '':
120 |         save_out_video = False
121 |     else:
122 |         os.makedirs(args.out_video_root, exist_ok=True)
123 |         save_out_video = True
124 | 
125 |     if save_out_video:
126 |         fps = video.fps
127 |         size = (video.width, video.height)
128 |         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
129 |         videoWriter = cv2.VideoWriter(
130 |             os.path.join(args.out_video_root,
131 |                          f'vis_{os.path.basename(args.video_path)}'), fourcc,
132 |             fps, size)
133 | 
134 |     # frame index offsets for inference, used in multi-frame inference setting
135 |     if args.use_multi_frames:
136 |         assert 'frame_indices_test' in pose_model.cfg.data.test.data_cfg
137 |         indices = pose_model.cfg.data.test.data_cfg['frame_indices_test']
138 | 
139 |     # whether to return heatmap, optional
140 |     return_heatmap = False
141 | 
142 |     # return the output of some desired layers,
143 |     # e.g. use ('backbone', ) to return backbone feature
144 |     output_layer_names = None
145 |     
146 |     save_results = {}
147 | 
148 |     print('Running inference...')
149 |     for frame_id, cur_frame in enumerate(mmcv.track_iter_progress(video)):
150 |         # get the detection results of current frame
151 |         # the resulting box is (x1, y1, x2, y2)
152 |         # mmdet_results = inference_detector(det_model, cur_frame)
153 | 
154 |         # # keep the person class bounding boxes.
155 |         # person_results = process_mmdet_results(mmdet_results, args.det_cat_id)
156 |         if frame_id not in track_results.keys():
157 |             continue
158 |         person_results = track_results[frame_id]
159 | 
160 |         if args.use_multi_frames:
161 |             frames = collect_multi_frames(video, frame_id, indices,
162 |                                           args.online)
163 | 
164 |         # test a single image, with a list of bboxes.
165 |         pose_results, returned_outputs = inference_top_down_pose_model(
166 |             pose_model,
167 |             frames if args.use_multi_frames else cur_frame,
168 |             person_results,
169 |             bbox_thr=args.bbox_thr,
170 |             format='xyxy',
171 |             dataset=dataset,
172 |             dataset_info=dataset_info,
173 |             return_heatmap=return_heatmap,
174 |             outputs=output_layer_names)
175 | 
176 |         save_results[frame_id] = pose_results
177 |         # show the results
178 |         vis_frame = vis_pose_result(
179 |             pose_model,
180 |             cur_frame,
181 |             pose_results,
182 |             dataset=dataset,
183 |             dataset_info=dataset_info,
184 |             kpt_score_thr=args.kpt_thr,
185 |             radius=args.radius,
186 |             thickness=args.thickness,
187 |             show=False)
188 | 
189 |         if args.show:
190 |             cv2.imshow('Frame', vis_frame)
191 | 
192 |         if save_out_video:
193 |             videoWriter.write(vis_frame)
194 | 
195 |         if args.show and cv2.waitKey(1) & 0xFF == ord('q'):
196 |             break
197 | 
198 |     class NumpyEncoder(json.JSONEncoder):
199 |         """ Special json encoder for numpy types """
200 |         def default(self, obj):
201 |             if isinstance(obj, np.integer):
202 |                 return int(obj)
203 |             elif isinstance(obj, np.floating):
204 |                 return float(obj)
205 |             elif isinstance(obj, np.ndarray):
206 |                 return obj.tolist()
207 |             return json.JSONEncoder.default(self, obj)
208 |     os.makedirs(os.path.dirname(args.out_file), exist_ok=True)
209 |     json.dump(save_results, open(args.out_file, 'w'), cls=NumpyEncoder)
210 | 
211 |     if save_out_video:
212 |         videoWriter.release()
213 |     if args.show:
214 |         cv2.destroyAllWindows()
215 | 
216 | 
217 | if __name__ == '__main__':
218 |     main()
219 | 


--------------------------------------------------------------------------------
/tracking/src/tracking.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import tqdm
  4 | from sklearn.cluster import DBSCAN
  5 | from scipy.spatial.distance import cdist
  6 | from sklearn.metrics.pairwise import cosine_similarity
  7 | 
  8 | from utils import DetectedObjects
  9 | from scpt import *
 10 | from mcpt import *
 11 | 
 12 | class Tracker():
 13 |     """
 14 |     This class represents YOTM, aka Yoshida Offline Tracking Method.
 15 |     """
 16 |     def __init__(self, params={}):
 17 |         self.camera_ids = []
 18 |         self.tracking_dicts = {}
 19 |         self._init_parameters()
 20 |         self.update_parameters(**params)
 21 |         self.frame_period = self.parameters["time_period"] * self.parameters["fps"]
 22 | 
 23 |     def _init_parameters(self):
 24 | 
 25 |         #self.parameters[""]: = 
 26 |         self.parameters = {}
 27 |         self.parameters["image_size"] = (1920,1080)
 28 | 
 29 |         # sct parameters
 30 |         self.parameters["time_period"]:int = 3
 31 |         self.parameters["fps"]:int = 30
 32 |         self.parameters["epsilon_scpt"]:float = 0.1
 33 |         self.parameters["min_samples"]:int = 4
 34 |         self.parameters["remove_noise_cluster"]:bool = True
 35 |         self.parameters["overlap_suppression"]:bool = True
 36 |         self.parameters["num_candidates"]:int = 10
 37 |         self.parameters["clustering_method"]:str = "agglomerative" #agglomerative or dbsacn
 38 |         self.parameters["debug"]:bool = False
 39 |         
 40 |         #fix_sct parameters
 41 |         self.parameters["sequential_nms"]:bool = True
 42 |         self.parameters["temporally_snms_th"]:float = 0.6
 43 |         self.parameters["spatially_snms_th"]:float = 0.6
 44 |         self.parameters["merge_nonoverlap"]:bool = True
 45 | 
 46 |         self.parameters["separate_warp"]:bool = True
 47 |         self.parameters["warp_th"]:int = 40
 48 |         self.parameters["alpha"]:float = 0.5
 49 | 
 50 |         self.parameters["exclude_short_track"]:bool = False
 51 |         self.parameters["short_tracklet_th"]:int = 120
 52 | 
 53 |         self.parameters["exclude_motionless_track"]:bool = False        
 54 |         self.parameters["stop_track_th"]:int = 25
 55 | 
 56 |         # mct parameters
 57 |         self.parameters["epsilon_mcpt"]:float = 0.4
 58 |         self.parameters["keypoint_th"]:float = 0.8
 59 |         self.parameters["keypoint_condition_th"]:float = 1
 60 |         self.parameters["distance_th"]:int = 5
 61 | 
 62 |         self.parameters["check_sc_overlap"]:bool = False
 63 |         self.parameters["distance_type"]:str = "max" #max or mean or min
 64 |         self.parameters["replace_similarity_by_wcoordinate"]:bool = False
 65 |         self.parameters["replace_value"]: float = -10
 66 |         self.parameters["representative_selection_method"]:str = "keypoint" #keypoint or centrality
 67 |         self.parameters["aspect_th"]:float =0.5 
 68 |         
 69 |         # fix mct parameters
 70 |         self.parameters["reassign_global_id"]:bool = True
 71 |         self.parameters["short_track_th"]:int = 120
 72 |         self.parameters["delete_gid_th"]:int = 6000 
 73 |         self.parameters["assign_all_tracklet"]:bool = False
 74 |         self.parameters["sim_th"]:float = 0.75
 75 |         self.parameters["delete_few_camera_cluster"]:bool = False 
 76 |         
 77 |         self.parameters["measure_wcoordinate"]:bool = False
 78 | 
 79 |         self.parameters["remove_noise_image"]:bool = True
 80 | 
 81 |         self.parameters["delete_distant_person"]:bool = True
 82 | 
 83 |         self.parameters["interpolate_track"]:bool = True
 84 |         self.parameters["max_interpolate_interval"]:int = 15
 85 |         
 86 | 
 87 |     def update_parameter(self, parameter, value):
 88 |         if not parameter in self.parameters:
 89 |             print(f"Unknown parameter: {parameter}.")
 90 |             sys.exit()
 91 |             return
 92 |         self.parameters[parameter] = value
 93 |     
 94 |     def update_parameters(self, **params):
 95 |         for key in params:
 96 |             self.update_parameter(key, params[key])
 97 |     
 98 |     def scpt(self, tracking_dict):
 99 |         """
100 |         This performs object tracking with single camera dataset.
101 |         Most of code below are just copied from '20240214_OfflineTracking-Debug.ipynb' and tweaked few.
102 |         """
103 | 
104 |         frame_period = self.parameters["time_period"] * self.parameters["fps"]
105 |         epsilon = self.parameters["epsilon_scpt"]
106 | 
107 |         max_offlineid = -1
108 |         last_frame = get_max_value_of_dict(tracking_dict, "Frame")
109 |         time_section_serial_dict = {timesection:[] for timesection in range(last_frame//frame_period+1) }
110 | 
111 |         for serial in tracking_dict.keys():
112 |             frame = tracking_dict[serial]["Frame"]
113 |             time_section = frame // frame_period
114 |             time_section_serial_dict[time_section].append(serial)
115 | 
116 |         for time_section in range(last_frame//frame_period+1): 
117 |             serials = time_section_serial_dict[time_section]
118 |             if len(serials) == 0: continue
119 |             clusters = tracking_by_clustering(tracking_dict,serials, **self.parameters)
120 | 
121 |             clusters = [cluster+max_offlineid+1 if cluster != -1 else -i for i,cluster in enumerate(clusters)]
122 |             max_offlineid = max(clusters) if max(clusters) > 0 else max_offlineid
123 | 
124 |             if time_section == 0:
125 |                 for serial,cluster in zip(serials,clusters):
126 |                     tracking_dict[serial]["OfflineID"] = int(cluster)
127 |             elif time_section > 0:
128 |                 past_serials = time_section_serial_dict[time_section-1]
129 |                 tracking_dict = associate_cluster_between_period(tracking_dict, clusters, serials, past_serials, **self.parameters)  
130 | 
131 |         # We have tracking results in TrackingDict, yet will gather results for debugging. Could be deleted.
132 |         offline_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict]
133 |         new_offline_ids_dict = {key:i for i,key in enumerate(set(offline_ids)) if key != -1}
134 |         new_offline_ids_dict[-1] = -1        
135 | 
136 |         for serial in tracking_dict:
137 |             offline_id = tracking_dict[serial]["OfflineID"]
138 |             tracking_dict[serial]["OfflineID"] = new_offline_ids_dict[offline_id]
139 | 
140 |         return tracking_dict
141 |     
142 |     def correcting_scpt_result(self,tracking_dict,**kwargs): 
143 | 
144 |         sequential_nms  = self.parameters["sequential_nms"]
145 |         separate_warp = self.parameters["separate_warp"]
146 |         exclude_short_track = self.parameters["exclude_short_track"]
147 |         exclude_motionless_track = self.parameters["exclude_motionless_track"]    
148 |         print("sequential_nms:",sequential_nms)
149 |         print("separate_warp:",separate_warp)
150 |         print("exclude_short_track:",exclude_short_track)
151 |         print("exclude_motionless_track:",exclude_motionless_track)    
152 | 
153 |         if sequential_nms:
154 |             tracking_dict = sequential_non_maximum_suppression(tracking_dict, **self.parameters) 
155 |         if separate_warp:
156 |             tracking_dict = separate_warp_tracklet(tracking_dict, **self.parameters)
157 |         if exclude_short_track:
158 |             tracking_dict = exclude_short_tracklet(tracking_dict, **self.parameters)
159 |         if exclude_motionless_track:
160 |             tracking_dict = exclude_motionless_tracklet(tracking_dict, **self.parameters)
161 |         return tracking_dict
162 | 
163 |     def mcpt(self,scene_id, json_dir,out_dir):
164 |         epsilon = self.parameters["epsilon_mcpt"]
165 | 
166 |         if not os.path.isdir(json_dir):
167 |             raise Exception(f"The directory '{json_dir}' does not exist.")
168 |         if out_dir == None:
169 |             out_dir = json_dir
170 |         tracking_results = {}
171 |         json_files = [f for f in os.listdir(json_dir) if os.path.splitext(f)[1].lower() == ".json" and f.startswith("fixed_camera")]
172 |         json_files = sorted(json_files)
173 |         for json_file in json_files:
174 |             camera_id = int(json_file.split("_")[1][6:])
175 |             with open(os.path.join(json_dir, json_file)) as f:
176 |                 tracking_dict = json.load(f)
177 |             print(f"{json_file} len(serials):{len(tracking_dict)}")
178 |             tracking_results[camera_id] = tracking_dict
179 |         tracking_results = multi_camera_people_tracking(tracking_results, scene_id=scene_id, json_dir=json_dir, out_dir=out_dir, **self.parameters)
180 | 
181 |         return tracking_results
182 | 
183 |     def correcting_mcpt_result(self,scene_id,tracking_results,represntative_nodes,**kwargs):
184 |         reassign_global_id  = self.parameters["reassign_global_id"]
185 |         measure_wcoordinate = self.parameters["measure_wcoordinate"]
186 |         interpolate_track = self.parameters["interpolate_track"]
187 |         remove_noise_image = self.parameters["remove_noise_image"]
188 |         delete_distant_person = self.parameters["delete_distant_person"]
189 |         print("reassign_global_id:",reassign_global_id)
190 |         print("measure_wcoordinate:",measure_wcoordinate)
191 |         print("interpolate_track:",interpolate_track)
192 |         print("delete_distant_person:",delete_distant_person)
193 |         
194 |         if reassign_global_id:
195 |             tracking_results = global_id_reassignment(tracking_results,represntative_nodes,scene_id,**self.parameters)
196 |         if measure_wcoordinate:
197 |             tracking_results = measure_world_coordinate(scene_id,tracking_results,**self.parameters)
198 |         if remove_noise_image:
199 |             tracking_results = remove_noise_images(scene_id,tracking_results,**self.parameters)
200 |         if delete_distant_person:
201 |             tracking_results = delete_distant_persons(tracking_results,**self.parameters)
202 |         if interpolate_track:
203 |             tracking_results = interpolate_tracklet(tracking_results,represntative_nodes,**self.parameters) 
204 |         
205 |         return tracking_results
206 | 


--------------------------------------------------------------------------------
/tracking/src/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import json
  4 | import glob
  5 | 
  6 | 
  7 | class DetectedObjects:
  8 |     """
  9 |     Represents whole detected objects to track.
 10 |     Object dict is built by frame_id as a key and its entity contains a list of all Detected objects of the frame. 
 11 |     """
 12 |     def __init__(self):
 13 |         self.num_objects = 0
 14 |         self.objects = {}
 15 |         self._objects_registered = {}
 16 |         #self.scene_id = scene_id
 17 |         #self.camera_id = -1
 18 |         self.camera_projection_matrix = None
 19 |         self.homography_matrix = None
 20 | 
 21 |     def __str__(self):
 22 |         return f"DetectedObjects: scene_id:{self.scene_id}, camera_id:{self.camera_id}, num_objects:{self.num_objects}"
 23 | 
 24 |     def load_from_directory(self, feature_root, calibration_path="Calibration"):
 25 |         if not os.path.isdir(feature_root):
 26 |             raise Exception(f'There is no directory to read from. {feature_root}')
 27 |         npys = sorted(glob.glob(os.path.join(feature_root, "**/*.npy"), recursive=True))
 28 |         scene_id = None
 29 |         camera_id = None
 30 |         path_list = feature_root.split("/")
 31 |         for dir in path_list:  
 32 |             if dir.startswith("scene_"):  
 33 |                 scene_id = int(dir.replace("scene_",""))
 34 |             if dir.startswith("camera_"):  
 35 |                 camera_id = int(dir.replace("camera_",""))
 36 |         if scene_id is not None and camera_id is not None:
 37 |             calibration_path = f"Original/scene_{scene_id:03d}/camera_{camera_id:04d}/calibration.json"
 38 |             self.load_calibration(calibration_path)
 39 |         else:
 40 |             print(f'\033[33mwarning\033[0m : failed to get scene_id and camera_id from feature path.')
 41 |             print(f'\033[33mwarning\033[0m : world coordinate calculations are ignored.')
 42 | 
 43 | 
 44 |         # Below is to parse camera id from the path, we're probably not going to use it though.
 45 |         #camera_id = None
 46 |         #dirs = npys[0].split("/")
 47 |         #if len(dirs) < 2:
 48 |         #    print(f"Cannot prop camera id from input path. {feature_path}")
 49 |         #else:
 50 |         #    camera_id = dirs[-1]
 51 |         #    if "Camera" in camera_id:
 52 |         #        self.camera_id = int(camera_id[len("Camera"):])
 53 |         
 54 |         for f in npys:
 55 |             self.add_object_from_image_path(f)
 56 | 
 57 |     def add_object(self, frame_id, coordinate, world_coordinate, confidence, feature_path, image_path=None):
 58 |         if isinstance(frame_id, str):
 59 |             frame_id = int(frame_id)
 60 | 
 61 |         # Check if coordinate is reasonable
 62 |         if coordinate.x1 >= coordinate.x2 or coordinate.y1 >= coordinate.y2:
 63 |             print(f"Unnatural coordinate found in frame {frame_id}: {coordinate}")
 64 |             return
 65 | 
 66 |         detected_obj = DetectedObject(object_id=self.num_objects, frame_id=frame_id, coordinate=coordinate, worldcoordinate=world_coordinate,
 67 |                                       confidence=confidence, feature_path=feature_path)
 68 |         key = f"{coordinate.x1}_{coordinate.y1}_{coordinate.x2}_{coordinate.y2}"
 69 |         if frame_id in self.objects:
 70 |             if not key in self._objects_registered[frame_id]:
 71 |                 objects_per_frame = self.objects[frame_id].append(detected_obj)
 72 |                 self._objects_registered[frame_id].append(key)
 73 |             else:
 74 |                 print(f"Duplicate coord found in frame {frame_id}: {coordinate}")
 75 |                 return
 76 |         else:
 77 |             objects_per_frame = self.objects[frame_id] = [detected_obj]
 78 |             self._objects_registered[frame_id] = [key]
 79 |         self.num_objects += 1
 80 | 
 81 |     def add_object_from_image_path(self, feature_path, image_path=None, calibration_path="Calibration"):
 82 |         file_path = os.path.basename(feature_path)
 83 |         if file_path.startswith("feature_"):
 84 |             _, frame_id, serial_no, x1, x2, y1, y2, conf = os.path.splitext(file_path)[0].split("_")
 85 |             conf = conf if len(conf) == 1 else conf[0]+"."+conf[1:]
 86 |         else:
 87 |             serial_no, frame_id, x1, x2, y1, y2 = os.path.splitext(file_path)[0].split("_")
 88 |             x1, x2, y1, y2 = int(x1.replace("x","")), int(x2), int(y1.replace("y","")), int(y2)
 89 |             conf = 0.98765 # Dummy
 90 |         World_coordinate = None
 91 |         if self.homography_matrix is not None:
 92 |             w_x, w_y = self.convert_coordinates_2world((int(float(x1)) + int(float(x2))) / 2, int(float(y2)))
 93 |             World_coordinate = WorldCoordinate(w_x, w_y)
 94 | 
 95 |         self.add_object(frame_id=int(frame_id), coordinate=Coordinate(x1, y1, x2, y2), world_coordinate=World_coordinate,
 96 |                         confidence=float(conf), feature_path=feature_path, image_path=image_path)
 97 | 
 98 |     def get_objects_of_frames(self, start_frame, end_frame):
 99 |         if start_frame > self.num_frames() or end_frame > self.num_frames():
100 |             return None
101 |         object_dict = {}
102 |         for frame_id in range(start_frame, end_frame):
103 |             if frame_id in self.objects:
104 |                 object_dict[frame_id] = self[frame_id]
105 |             #else:
106 |             #    print(f"There is no such frame in the DetectedObjects, will be ignored. frame_id: {frame_id}")
107 |         return object_dict
108 | 
109 |     def get_object_ids_of_frames(self, start_frame, end_frame):
110 |         """
111 |         Returns a list of detected object IDs that appeared within the specified frame window.
112 |         """
113 |         if start_frame > self.num_frames() or end_frame > self.num_frames():
114 |             return None
115 |         object_ids = []
116 |         for frame_id in range(start_frame, end_frame):
117 |             if frame_id in self.objects:
118 |                 for det in self[frame_id]:
119 |                     object_ids.append(det.object_id)
120 |         return sorted(object_ids)
121 | 
122 |     def __getitem__(self, frame_id):
123 |         if frame_id in self.objects:
124 |             return self.objects[frame_id]
125 |         else:
126 |             return None
127 | 
128 |     def num_frames(self):
129 |         """
130 |         Returns number of frames that currently holding.
131 |         """
132 |         return len(self.objects)
133 | 
134 |     def last_frame_id(self):
135 |         """
136 |         Returns the last frame id.
137 |         """
138 |         return max(self.objects.keys())
139 | 
140 |     def to_trackingdict(self):
141 |         """
142 |         Compatibility function to convert detections in TrackingDict format.
143 |         """
144 |         track_dict = {}
145 |         for frame_id in self.objects:
146 |             for detected_object in self.objects[frame_id]:
147 |                 serial_no = detected_object.object_id
148 |                 coordinate = json.loads(detected_object.coordinate.__str__())
149 |                 if detected_object.worldcoordinate.__str__() != "None":
150 |                     world_coordinate = json.loads(detected_object.worldcoordinate.__str__())
151 |                 else:
152 |                     world_coordinate = None
153 |                 new_object = { "Frame": frame_id, "NpyPath": detected_object.feature_path,
154 |                                "Coordinate": coordinate, "WorldCoordinate": world_coordinate,  "OfflineID": -1 } #"ClusterID": None,
155 |                 track_dict[serial_no] = new_object
156 |         return track_dict
157 | 
158 |     def load_calibration(self, calib_path):
159 |         if os.path.isfile(calib_path):
160 |             with open(calib_path, 'r') as file:
161 |                 data = json.load(file)
162 |                 self.camera_projection_matrix = np.array(data["camera projection matrix"])
163 |                 self.homography_matrix =  np.array(data["homography matrix"])
164 |         else:
165 |             print(f'\033[33mwarning\033[0m : not found Calibration File.')
166 |             print(f'\033[33mwarning\033[0m : world coordinate calculations are ignored.')
167 | 
168 |     def convert_coordinates_2world(self, x, y):
169 |         vector_xyz = np.array([x, y, 1]) # z=1
170 |         vector_xyz_3d = np.dot(np.linalg.inv(self.homography_matrix), vector_xyz.T)
171 |         return vector_xyz_3d[0] / vector_xyz_3d[2], vector_xyz_3d[1] / vector_xyz_3d[2]
172 |         
173 | class DetectedObject:
174 |     """
175 |     Represents individual detected object to track.
176 |     """
177 |     def __init__(self, object_id, frame_id, coordinate, confidence, worldcoordinate, feature_path, image_path=None):
178 |         self.object_id = f"{object_id:08d}" # AKA serial number
179 |         self.frame_id = frame_id
180 |         self.feature_path = feature_path
181 |         self.confidence = confidence
182 |         self.image_path = image_path
183 |         if isinstance(coordinate, Coordinate):
184 |             self.coordinate = coordinate
185 |         elif isinstance(coordinate, (list, tuple)) and len(coordinate) == 4:
186 |             self.coordinate = Coordinate(*coordinate)
187 |         else:
188 |             raise Exception(f"Unknown coordinate format: {coordinate}")
189 | 
190 |         if isinstance(worldcoordinate, WorldCoordinate):
191 |             self.worldcoordinate = worldcoordinate
192 |         elif isinstance(worldcoordinate, (list, tuple)) and len(worldcoordinate) == 4:
193 |             self.worldcoordinate = WorldCoordinate(*worldcoordinate)
194 |         else:
195 |             self.worldcoordinate = None
196 | 
197 | class Coordinate:
198 |     def __init__(self, x1, y1, x2, y2):
199 |         self.x1 = int(float(x1))
200 |         self.y1 = int(float(y1))
201 |         self.x2 = int(float(x2))
202 |         self.y2 = int(float(y2))
203 | 
204 |     def __str__(self):
205 |         return(f'{{"x1":{self.x1}, "y1":{self.y1}, "x2":{self.x2}, "y2":{self.y2}}}')
206 | 
207 | class WorldCoordinate:
208 |     def __init__(self, x, y):
209 |         self.x = float(x)
210 |         self.y = float(y)
211 | 
212 |     def __str__(self):
213 |         return(f'{{"x":{self.x}, "y":{self.y}}}')
214 | 
215 | class TrackingCluster:
216 |     def __init__(self, camera_id, offline_id):
217 |         self.camera_id = camera_id
218 |         self.offline_id = 0
219 |         self.global_offline_id = -1
220 |         self.clusters = {}
221 |         self.serials = []
222 | 
223 |     def add(self, serial):
224 |         if serial in self.serials:
225 |             raise Exception("DUP!")
226 |         self.serials.append(serial)
227 |         
228 | 
229 | class TrackingClusters:
230 |     def __init__(self, camera_id):
231 |         self.camera_id = camera_id
232 |         self.clusters = []
233 |         self.offline_ids = []
234 | 
235 |     def add(self, cluster: TrackingCluster):
236 |         cl_id = cluster.offline_id
237 |         if cl_id in self.offline_ids:
238 |             raise Exception("DUP!")
239 |         else:
240 |             self.clusters.append(cluster)
241 | 
242 |     def get(self, cluster_id):
243 |         if not cluster_id in self.offline_ids:
244 |             raise Exception("No cluster_id registered. {cluster_id}")
245 |         else:
246 |             return self.clusters[offline_ids.index(cluster_id)]
247 | 
248 | class feature_vector_shed:
249 |     def __init__(self):
250 |         self.features = {}
251 | 
252 |     def add_vector(self, camera_id, serial_no, npy_path):
253 |         key = camera_id + "_" + serial_no
254 |         if key in self.features:
255 |             print(f"Feature vector of camera ID '{camera_id}' and serial no '{serial_no}' is already exist. ")
256 |             return
257 |             
258 |         if not os.path.isfile(npy_path):
259 |             print(f"The feature vector file '{npy_path}' does not exist. ")
260 |             return
261 |         feature = np.load(npy_path)
262 |         self.features[key] = feature
263 | 
264 |     def get(self, camera_id, serial_no):
265 |         key = camera_id + "_" + serial_no
266 |         return self.features[key]
267 | 


--------------------------------------------------------------------------------
/tracking/src/pose.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import json
  4 | import cv2
  5 | 
  6 | class PoseKeypoints:
  7 |     def __init__(self, keypoint_json):
  8 |         self.kp_indice_foot = [15, 16] # ankles
  9 |         self.kp_indice_torso = [5, 6, 11, 12, 13, 14] # shoulders, hips, knees
 10 |         self.kp_indice_torso_legs = [5, 6, 11, 12, 13, 14, 15, 16] # shoulders, hips, knees, ankles
 11 | 
 12 |         self._parse_keypoint_json(keypoint_json)
 13 |         self.serial_dict = {}
 14 | 
 15 |     def _parse_keypoint_json(self, file_path):
 16 |         if os.path.isfile(file_path):
 17 |             with open(file_path, 'r') as file:
 18 |                 data = json.load(file)
 19 |             self.keypoints = data
 20 |         else:
 21 |             raise Exception(f"Keypoint json file '{file_path}' does not exist.")
 22 | 
 23 |     def filter(self, keypoints=None, score_thr=0.3, target_parts="torso_legs", max_frames=0):
 24 |         filtered = {}
 25 |         if keypoints == None:
 26 |             keypoints = self.keypoints
 27 |         for i, frame in enumerate(keypoints):
 28 |             if max_frames != 0 and i >= max_frames:
 29 |                 break
 30 |             detections = keypoints[frame]
 31 |             target_indices = self.kp_indice_torso if target_parts == "torso" else self.kp_indice_torso_legs
 32 |             for det in detections:
 33 |                 kps = det["keypoints"]
 34 |                 confidences = [k for i2, k in enumerate(kps) if i2 in target_indices and k[2] >= score_thr]
 35 |                 if len(confidences) < (len(target_indices)):
 36 |                     continue
 37 | 
 38 |                 pose_entity = [det["bbox"], ]
 39 |                 if int(frame) in filtered:
 40 |                     filtered[frame].append(det)
 41 |                 else:
 42 |                     filtered[frame] = [det]
 43 |         print(f"Num of filtered results: {len(filtered)}")
 44 |         return filtered
 45 | 
 46 |     def summary(self): # Just show top_n data
 47 |         if len(self.keypoints) <= 0:
 48 |             print(f"Empty keypoints")
 49 |             return
 50 |         print(f"Number of frames: {len(self.keypoints)}")
 51 | 
 52 |     def get_keypoints(self, serial:str):
 53 |         """
 54 |         This must be called after assign_serial_from_tracking_dict() was called,
 55 |         as it builds a dictionary with "serial" number as keys.
 56 |         
 57 |         serial: zero-filled 8-digit string
 58 |         """
 59 |         if isinstance(serial, int):
 60 |             serial = f"{serial:08d}"
 61 |         elif isinstance(serial, str) and len(serial) != 8:
 62 |             serial = f"{int(serial):08d}"
 63 |         if len(self.serial_dict) <= 0:
 64 |             raise Exception(f"Serial based dictionary is not built yet.")
 65 |         if serial in self.serial_dict:
 66 |             return self.serial_dict[serial]
 67 |         else:
 68 |             return None
 69 | 
 70 |     def _build_serial_dict(self, keypoints=None):
 71 |         """
 72 |         This must be called after assign_serial_from_tracking_dict() was called,
 73 |         as it builds a dictionary with "serial" number as keys.
 74 |         """
 75 |         if len(keypoints) == 0:
 76 |             return None
 77 |         serial_dict = {}
 78 |         if keypoints == None:
 79 |             keypoints = self.keypoints
 80 |         for i, frame in enumerate(keypoints):
 81 |             detections = keypoints[frame]
 82 |             for det in detections:
 83 |                 if "serial" in det: 
 84 |                     serial = det["serial"]
 85 |                     if serial in serial_dict:
 86 |                         print(f"DUP in serial numbers!!")
 87 |                     else:
 88 |                         serial_dict[serial] = {"bbox": det["bbox"], "Keypoints": det["keypoints"]}
 89 |         self.serial_dict = serial_dict
 90 | 
 91 |         return self.serial_dict
 92 | 
 93 |     def assign_serial_from_tracking_dict(self, tracking_dict, keypoints=None):
 94 |         """
 95 |         tracking_dict: dictionary of tracking_dict or path to tracking_dict json file.
 96 |         """
 97 |         if keypoints == None:
 98 |             keypoints = self.keypoints
 99 |         if isinstance(tracking_dict, str):
100 |             if os.path.isfile(tracking_dict):
101 |                 with open(tracking_dict) as f:
102 |                     tracking_dict = json.load(f)
103 |         tracking_coord = {}
104 |         for serial in tracking_dict:
105 |             td_coord = tracking_dict[serial]["Coordinate"]
106 |             td_frame = tracking_dict[serial]["Frame"]
107 |             key = f"{td_frame}_{td_coord['x1']}_{td_coord['y1']}_{td_coord['x2']}_{td_coord['y2']}"
108 |             if key in tracking_coord:
109 |                 continue #raise Exception(f"DUP! {key}")
110 |             tracking_coord[key] = serial
111 |         for frame in keypoints:
112 |             detections = keypoints[frame]
113 |     
114 |             for det in detections:
115 |                 bbox = det["bbox"]
116 |                 key = f"{int(frame)}_{int(bbox[0])}_{int(bbox[1])}_{int(bbox[2])}_{int(bbox[3])}"
117 |                 if key in tracking_coord:
118 |                     det["serial"] = tracking_coord[key]
119 |                 else:
120 |                     #print(f"No tracking found for bbox: {key}, {bbox}")
121 |                     pass
122 | 
123 |         # Build dict with serial as key
124 |         return self._build_serial_dict(keypoints=keypoints)
125 | 
126 |     def show_footpoints(self, keypoints=None, frame_img_root="Frames", output_mp4=None, score_thr=0.3, target_parts="torso_legs", max_frames=0): # Generate mp4
127 |         # Creating mp4
128 |         if output_mp4 == None:
129 |             output_mp4 = f"foot_points.mp4"
130 |         fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
131 |         video_wtr  = cv2.VideoWriter(output_mp4, fourcc=fourcc, fps=30.0, frameSize=(1280, 960))
132 |         if not video_wtr.isOpened():
133 |             print(f"Cannot open video writer.")
134 |             return
135 | 
136 |         filtered = self.filter(keypoints=keypoints, score_thr=score_thr, target_parts=target_parts, max_frames=max_frames)
137 |         for frame in filtered:
138 |             # Read frame image file
139 |             frame_img_path = os.path.join(frame_img_root, f"{int(frame):06d}.jpg")
140 |             frame_img = cv2.imread(frame_img_path)
141 |             detections = self.keypoints[frame]
142 |             target_indices = self.kp_indice_torso if target_parts == "torso" else self.kp_indice_torso_legs
143 |             for det in detections:
144 |                 keypoints = det["keypoints"]
145 |                 left_ankle, right_ankle = keypoints[self.kp_indice_foot[0]], keypoints[self.kp_indice_foot[1]]
146 |                 if float(left_ankle[2]) >= score_thr:
147 |                     color = (0, 255, 0)
148 |                 else:
149 |                     #print(f"Low confidence on KP[15]: {float(fp1[2])}")
150 |                     color = (0, 0, 255)
151 |                 cv2.circle(frame_img, (int(left_ankle[0]), int(left_ankle[1])), 5, color, 3)
152 |                 
153 |                 if float(right_ankle[2]) >= score_thr:
154 |                     color = (0, 255, 0)
155 |                 else:
156 |                     #print(f"Low confidence on KP[16]: {float(fp2[2])}")
157 |                     color = (0, 0, 255)
158 |                 cv2.circle(frame_img, (int(right_ankle[0]), int(right_ankle[1])), 5, color, 3)
159 |             frame_img = cv2.resize(frame_img, (1280, 960))
160 |             video_wtr.write(frame_img)
161 |         video_wtr.release()
162 |         print(f"Saved video file: {output_mp4}\n")
163 | 
164 |     def show_footpoints_custom(self, frame_img_root="Frames", output_mp4=None, score_thr=0.3, target_parts="torso_legs"): # Generate mp4
165 |         # Creating mp4
166 |         if output_mp4 == None:
167 |             output_mp4 = f"foot_points.mp4"
168 |         fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
169 |         video_wtr  = cv2.VideoWriter(output_mp4, fourcc=fourcc, fps=30.0, frameSize=(1280, 960))
170 |         if not video_wtr.isOpened():
171 |             print(f"Cannot open video writer.")
172 |             return
173 | 
174 |         for i, frame in enumerate(self.keypoints):
175 |             if i >= 300: # only 10-sec, just for debug
176 |                 break
177 |             # Read frame image file
178 |             frame_img_path = os.path.join(frame_img_root, f"{int(frame):06d}.jpg")
179 |             frame_img = cv2.imread(frame_img_path)
180 |             detections = self.keypoints[frame]
181 |             target_indices = self.kp_indice_torso if target_parts == "torso" else self.kp_indice_torso_legs
182 |             for det in detections:
183 |                 keypoints = det["keypoints"]
184 |                 confidences = [k for i2, k in enumerate(keypoints) if i2 in target_indices and k[2] >= score_thr]
185 |                 if len(confidences) < (len(target_indices)):
186 |                     # Show bbox in red if doesn't meet the criteria
187 |                     bbox = det["bbox"]
188 |                     cv2.rectangle(frame_img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), thickness=2)
189 | 
190 |                 left_ankle, right_ankle = keypoints[self.kp_indice_foot[0]], keypoints[self.kp_indice_foot[1]]
191 |                 if float(left_ankle[2]) >= score_thr:
192 |                     color = (0, 255, 0)
193 |                 else:
194 |                     #print(f"Low confidence on KP[15]: {float(fp1[2])}")
195 |                     color = (0, 0, 255)
196 |                 cv2.circle(frame_img, (int(left_ankle[0]), int(left_ankle[1])), 5, color, 3)
197 |                 
198 |                 if float(right_ankle[2]) >= score_thr:
199 |                     color = (0, 255, 0)
200 |                 else:
201 |                     #print(f"Low confidence on KP[16]: {float(fp2[2])}")
202 |                     color = (0, 0, 255)
203 |                 cv2.circle(frame_img, (int(right_ankle[0]), int(right_ankle[1])), 5, color, 3)
204 |             frame_img = cv2.resize(frame_img, (1280, 960))
205 |             video_wtr.write(frame_img)
206 |         video_wtr.release()
207 |         print(f"Saved video file: {output_mp4}\n")
208 | 
209 |     def draw_keypoints(self, frame_img, frame_id, out_file="kp_img.jpg"):
210 |         def draw_line(img, s1, s2, bbox):
211 |             color = (255, 0, 0) # Blue
212 |             cv2.line(img, (int(s1[0]), int(s1[1])),
213 |                 (int(s2[0]), int(s2[1])), color, thickness=2)
214 | 
215 |         def draw_dot(img, src, bbox):
216 |             color = (0, 255, 0) # Green
217 |             cv2.circle(img, (int(src[0]), int(src[1])), 5, color, 2)
218 | 
219 |         frame_id = str(frame_id)
220 |         if not frame_id in self.keypoints:
221 |             print(f"There's no record asssiate with frame {frame_id} in the keypoint data.")
222 |             return
223 | 
224 |         # Read frame image file
225 |         if os.path.isfile(frame_img):
226 |             img = cv2.imread(frame_img)
227 |         else:
228 |             print(f"There's no such image file {frame_img}.")
229 |             return
230 |         
231 |         detections = self.keypoints[str(frame_id)]
232 |         for det in detections:
233 |             keypoints = det["keypoints"]
234 |             bbox = det["bbox"]
235 | 
236 |             # draw lines
237 |             # 0 to 1, 2
238 |             draw_line(img, keypoints[0], keypoints[1], bbox)
239 |             draw_line(img, keypoints[0], keypoints[2], bbox)
240 |             # 1 to 2, 3
241 |             draw_line(img, keypoints[1], keypoints[2], bbox)
242 |             draw_line(img, keypoints[1], keypoints[3], bbox)
243 |             # 2 to 4
244 |             draw_line(img, keypoints[2], keypoints[4], bbox)
245 |             # 3 to 5
246 |             draw_line(img, keypoints[3], keypoints[5], bbox)
247 |             # 4 to 6
248 |             draw_line(img, keypoints[4], keypoints[6], bbox)
249 |             # 5 to 6, 7, 11
250 |             draw_line(img, keypoints[5], keypoints[6], bbox)
251 |             draw_line(img, keypoints[5], keypoints[7], bbox)
252 |             draw_line(img, keypoints[5], keypoints[11], bbox)
253 |             # 6 to 8, 12
254 |             draw_line(img, keypoints[6], keypoints[8], bbox)
255 |             draw_line(img, keypoints[6], keypoints[12], bbox)
256 |             # 7 to 9
257 |             draw_line(img, keypoints[7], keypoints[9], bbox)
258 |             # 8 to 10
259 |             draw_line(img, keypoints[8], keypoints[10], bbox)
260 |             # 11 to 12, 13
261 |             draw_line(img, keypoints[11], keypoints[12], bbox)
262 |             draw_line(img, keypoints[11], keypoints[13], bbox)
263 |             # 12 to 14
264 |             draw_line(img, keypoints[12], keypoints[14], bbox)
265 |             # 13 to 15
266 |             draw_line(img, keypoints[13], keypoints[15], bbox)
267 |             # 14 to 16
268 |             draw_line(img, keypoints[14], keypoints[16], bbox)
269 | 
270 |             # Draw dots
271 |             for kp in keypoints:
272 |                 draw_dot(img, (int(kp[0]), int(kp[1])), bbox)
273 | 
274 |         cv2.imwrite(out_file, img)
275 |         print(f"Saved keypoint file: {out_file}")
276 | 


--------------------------------------------------------------------------------
/tracking/config/scene_2_camera_id_file.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "scene_name": "scene_001",
  4 |     "camera_ids": [
  5 |       1,
  6 |       2,
  7 |       3,
  8 |       4,
  9 |       5,
 10 |       6,
 11 |       7,
 12 |       8,
 13 |       9,
 14 |       10
 15 |     ]
 16 |   },
 17 |   {
 18 |     "scene_name": "scene_041",
 19 |     "camera_ids": [
 20 |       361,
 21 |       362,
 22 |       363,
 23 |       364,
 24 |       365,
 25 |       366,
 26 |       367,
 27 |       368,
 28 |       369,
 29 |       370
 30 |     ]
 31 |   },
 32 |   {
 33 |     "scene_name": "scene_042",
 34 |     "camera_ids": [
 35 |       371,
 36 |       372,
 37 |       373,
 38 |       374,
 39 |       375,
 40 |       376,
 41 |       377,
 42 |       378,
 43 |       379
 44 |     ]
 45 |   },
 46 |   {
 47 |     "scene_name": "scene_043",
 48 |     "camera_ids": [
 49 |       380,
 50 |       381,
 51 |       382,
 52 |       383,
 53 |       384,
 54 |       385,
 55 |       386,
 56 |       387,
 57 |       388,
 58 |       389
 59 |     ]
 60 |   },
 61 |   {
 62 |     "scene_name": "scene_044",
 63 |     "camera_ids": [
 64 |       390,
 65 |       391,
 66 |       392,
 67 |       393,
 68 |       394,
 69 |       395,
 70 |       396
 71 |     ]
 72 |   },
 73 |   {
 74 |     "scene_name": "scene_045",
 75 |     "camera_ids": [
 76 |       397,
 77 |       398,
 78 |       399,
 79 |       400,
 80 |       401,
 81 |       402,
 82 |       403,
 83 |       404
 84 |     ]
 85 |   },
 86 |   {
 87 |     "scene_name": "scene_046",
 88 |     "camera_ids": [
 89 |       405,
 90 |       406,
 91 |       407,
 92 |       408,
 93 |       409,
 94 |       410,
 95 |       411
 96 |     ]
 97 |   },
 98 |   {
 99 |     "scene_name": "scene_047",
100 |     "camera_ids": [
101 |       412,
102 |       413,
103 |       414,
104 |       415,
105 |       416,
106 |       417,
107 |       418,
108 |       419,
109 |       420,
110 |       421
111 |     ]
112 |   },
113 |   {
114 |     "scene_name": "scene_048",
115 |     "camera_ids": [
116 |       422,
117 |       423,
118 |       424,
119 |       425,
120 |       426,
121 |       427,
122 |       428,
123 |       429
124 |     ]
125 |   },
126 |   {
127 |     "scene_name": "scene_049",
128 |     "camera_ids": [
129 |       430,
130 |       431,
131 |       432,
132 |       433,
133 |       434,
134 |       435,
135 |       436,
136 |       437,
137 |       438,
138 |       439
139 |     ]
140 |   },
141 |   {
142 |     "scene_name": "scene_050",
143 |     "camera_ids": [
144 |       440,
145 |       441,
146 |       442,
147 |       443,
148 |       444,
149 |       445,
150 |       446,
151 |       447
152 |     ]
153 |   },
154 |   {
155 |     "scene_name": "scene_051",
156 |     "camera_ids": [
157 |       448,
158 |       449,
159 |       450,
160 |       451,
161 |       452,
162 |       453,
163 |       454
164 |     ]
165 |   },
166 |   {
167 |     "scene_name": "scene_052",
168 |     "camera_ids": [
169 |       455,
170 |       456,
171 |       457,
172 |       458,
173 |       459,
174 |       460,
175 |       461,
176 |       462
177 |     ]
178 |   },
179 |   {
180 |     "scene_name": "scene_053",
181 |     "camera_ids": [
182 |       463,
183 |       464,
184 |       465,
185 |       466,
186 |       467,
187 |       468,
188 |       469,
189 |       470,
190 |       471
191 |     ]
192 |   },
193 |   {
194 |     "scene_name": "scene_054",
195 |     "camera_ids": [
196 |       472,
197 |       473,
198 |       474,
199 |       475,
200 |       476,
201 |       477,
202 |       478,
203 |       479
204 |     ]
205 |   },
206 |   {
207 |     "scene_name": "scene_055",
208 |     "camera_ids": [
209 |       480,
210 |       481,
211 |       482,
212 |       483,
213 |       484,
214 |       485,
215 |       486
216 |     ]
217 |   },
218 |   {
219 |     "scene_name": "scene_056",
220 |     "camera_ids": [
221 |       487,
222 |       488,
223 |       489,
224 |       490,
225 |       491,
226 |       492,
227 |       493,
228 |       494,
229 |       495
230 |     ]
231 |   },
232 |   {
233 |     "scene_name": "scene_057",
234 |     "camera_ids": [
235 |       496,
236 |       497,
237 |       498,
238 |       499,
239 |       500,
240 |       501,
241 |       502,
242 |       503,
243 |       504,
244 |       505
245 |     ]
246 |   },
247 |   {
248 |     "scene_name": "scene_058",
249 |     "camera_ids": [
250 |       506,
251 |       507,
252 |       508,
253 |       509,
254 |       510,
255 |       511,
256 |       512,
257 |       513,
258 |       514
259 |     ]
260 |   },
261 |   {
262 |     "scene_name": "scene_059",
263 |     "camera_ids": [
264 |       515,
265 |       516,
266 |       517,
267 |       518,
268 |       519,
269 |       520,
270 |       521,
271 |       522,
272 |       523,
273 |       524
274 |     ]
275 |   },
276 |   {
277 |     "scene_name": "scene_060",
278 |     "camera_ids": [
279 |       525,
280 |       526,
281 |       527,
282 |       528,
283 |       529,
284 |       530,
285 |       531,
286 |       532,
287 |       533,
288 |       534
289 |     ]
290 |   },
291 |   {
292 |     "scene_name": "scene_061",
293 |     "camera_ids": [
294 |       535,
295 |       536,
296 |       537,
297 |       538,
298 |       539,
299 |       540,
300 |       541,
301 |       542,
302 |       543,
303 |       544
304 |     ]
305 |   },
306 |   {
307 |     "scene_name": "scene_062",
308 |     "camera_ids": [
309 |       545,
310 |       546,
311 |       547,
312 |       548,
313 |       549,
314 |       550,
315 |       551,
316 |       552,
317 |       553,
318 |       554
319 |     ]
320 |   },
321 |   {
322 |     "scene_name": "scene_063",
323 |     "camera_ids": [
324 |       555,
325 |       556,
326 |       557,
327 |       558,
328 |       559,
329 |       560,
330 |       561,
331 |       562,
332 |       563,
333 |       564
334 |     ]
335 |   },
336 |   {
337 |     "scene_name": "scene_064",
338 |     "camera_ids": [
339 |       565,
340 |       566,
341 |       567,
342 |       568,
343 |       569,
344 |       570,
345 |       571,
346 |       572,
347 |       573,
348 |       574
349 |     ]
350 |   },
351 |   {
352 |     "scene_name": "scene_065",
353 |     "camera_ids": [
354 |       575,
355 |       576,
356 |       577,
357 |       578,
358 |       579,
359 |       580,
360 |       581,
361 |       582,
362 |       583,
363 |       584
364 |     ]
365 |   },
366 |   {
367 |     "scene_name": "scene_066",
368 |     "camera_ids": [
369 |       585,
370 |       586,
371 |       587,
372 |       588,
373 |       589,
374 |       590,
375 |       591,
376 |       592,
377 |       593,
378 |       594
379 |     ]
380 |   },
381 |   {
382 |     "scene_name": "scene_067",
383 |     "camera_ids": [
384 |       595,
385 |       596,
386 |       597,
387 |       598,
388 |       599,
389 |       600,
390 |       601,
391 |       602,
392 |       603,
393 |       604
394 |     ]
395 |   },
396 |   {
397 |     "scene_name": "scene_068",
398 |     "camera_ids": [
399 |       605,
400 |       606,
401 |       607,
402 |       608,
403 |       609,
404 |       610,
405 |       611,
406 |       612,
407 |       613,
408 |       614
409 |     ]
410 |   },
411 |   {
412 |     "scene_name": "scene_069",
413 |     "camera_ids": [
414 |       615,
415 |       616,
416 |       617,
417 |       618,
418 |       619,
419 |       620,
420 |       621,
421 |       622,
422 |       623,
423 |       624
424 |     ]
425 |   },
426 |   {
427 |     "scene_name": "scene_070",
428 |     "camera_ids": [
429 |       625,
430 |       626,
431 |       627,
432 |       628,
433 |       629,
434 |       630,
435 |       631,
436 |       632,
437 |       633,
438 |       634
439 |     ]
440 |   },
441 |   {
442 |     "scene_name": "scene_071",
443 |     "camera_ids": [
444 |       635,
445 |       636,
446 |       637,
447 |       638,
448 |       639,
449 |       640,
450 |       641,
451 |       642,
452 |       643,
453 |       644,
454 |       645,
455 |       646,
456 |       647,
457 |       648,
458 |       650
459 |     ]
460 |   },
461 |   {
462 |     "scene_name": "scene_072",
463 |     "camera_ids": [
464 |       651,
465 |       652,
466 |       653,
467 |       654,
468 |       655,
469 |       656,
470 |       657,
471 |       658,
472 |       659,
473 |       660,
474 |       661,
475 |       662,
476 |       663,
477 |       664,
478 |       665,
479 |       666
480 |     ]
481 |   },
482 |   {
483 |     "scene_name": "scene_073",
484 |     "camera_ids": [
485 |       667,
486 |       668,
487 |       669,
488 |       670,
489 |       671,
490 |       672,
491 |       673,
492 |       674,
493 |       675,
494 |       676,
495 |       677,
496 |       678,
497 |       679,
498 |       680,
499 |       681,
500 |       682
501 |     ]
502 |   },
503 |   {
504 |     "scene_name": "scene_074",
505 |     "camera_ids": [
506 |       683,
507 |       684,
508 |       685,
509 |       686,
510 |       687,
511 |       688,
512 |       689,
513 |       690,
514 |       691,
515 |       692,
516 |       693,
517 |       694,
518 |       695,
519 |       696,
520 |       697,
521 |       698
522 |     ]
523 |   },
524 |   {
525 |     "scene_name": "scene_075",
526 |     "camera_ids": [
527 |       699,
528 |       700,
529 |       701,
530 |       702,
531 |       703,
532 |       704,
533 |       705,
534 |       706,
535 |       707,
536 |       708,
537 |       709,
538 |       710,
539 |       711,
540 |       712,
541 |       713,
542 |       714
543 |     ]
544 |   },
545 |   {
546 |     "scene_name": "scene_076",
547 |     "camera_ids": [
548 |       715,
549 |       716,
550 |       717,
551 |       718,
552 |       719,
553 |       720,
554 |       721,
555 |       722,
556 |       723,
557 |       724,
558 |       725,
559 |       726,
560 |       727,
561 |       728,
562 |       729,
563 |       730
564 |     ]
565 |   },
566 |   {
567 |     "scene_name": "scene_077",
568 |     "camera_ids": [
569 |       731,
570 |       732,
571 |       733,
572 |       734,
573 |       735,
574 |       736,
575 |       737,
576 |       738,
577 |       739,
578 |       740,
579 |       741,
580 |       742,
581 |       743,
582 |       744,
583 |       745,
584 |       746
585 |     ]
586 |   },
587 |   {
588 |     "scene_name": "scene_078",
589 |     "camera_ids": [
590 |       747,
591 |       748,
592 |       749,
593 |       750,
594 |       751,
595 |       752,
596 |       753,
597 |       754,
598 |       755,
599 |       756,
600 |       757,
601 |       758,
602 |       759,
603 |       760,
604 |       761,
605 |       762
606 |     ]
607 |   },
608 |   {
609 |     "scene_name": "scene_079",
610 |     "camera_ids": [
611 |       763,
612 |       764,
613 |       765,
614 |       766,
615 |       767,
616 |       768,
617 |       769,
618 |       770,
619 |       771,
620 |       772,
621 |       773,
622 |       774,
623 |       775,
624 |       776,
625 |       777,
626 |       778
627 |     ]
628 |   },
629 |   {
630 |     "scene_name": "scene_080",
631 |     "camera_ids": [
632 |       779,
633 |       780,
634 |       781,
635 |       782,
636 |       783,
637 |       784,
638 |       785,
639 |       786,
640 |       787,
641 |       788,
642 |       789,
643 |       790,
644 |       791,
645 |       792,
646 |       793,
647 |       794
648 |     ]
649 |   },
650 |   {
651 |     "scene_name": "scene_081",
652 |     "camera_ids": [
653 |       795,
654 |       796,
655 |       797,
656 |       798,
657 |       799,
658 |       800,
659 |       801,
660 |       802,
661 |       803,
662 |       804,
663 |       805,
664 |       806,
665 |       807,
666 |       808,
667 |       809,
668 |       810
669 |     ]
670 |   },
671 |   {
672 |     "scene_name": "scene_082",
673 |     "camera_ids": [
674 |       811,
675 |       812,
676 |       813,
677 |       814,
678 |       815,
679 |       816,
680 |       817,
681 |       818,
682 |       819,
683 |       820,
684 |       821,
685 |       822,
686 |       823,
687 |       824,
688 |       825,
689 |       826
690 |     ]
691 |   },
692 |   {
693 |     "scene_name": "scene_083",
694 |     "camera_ids": [
695 |       827,
696 |       828,
697 |       829,
698 |       830,
699 |       831,
700 |       832,
701 |       833,
702 |       834,
703 |       835,
704 |       836,
705 |       837,
706 |       838,
707 |       839,
708 |       840,
709 |       841,
710 |       842
711 |     ]
712 |   },
713 |   {
714 |     "scene_name": "scene_084",
715 |     "camera_ids": [
716 |       843,
717 |       844,
718 |       845,
719 |       846,
720 |       847,
721 |       848,
722 |       849,
723 |       850,
724 |       851,
725 |       852,
726 |       853,
727 |       854,
728 |       855,
729 |       856,
730 |       857,
731 |       858
732 |     ]
733 |   },
734 |   {
735 |     "scene_name": "scene_085",
736 |     "camera_ids": [
737 |       859,
738 |       860,
739 |       861,
740 |       862,
741 |       863,
742 |       864,
743 |       865,
744 |       866,
745 |       867,
746 |       868,
747 |       869,
748 |       870,
749 |       871,
750 |       872,
751 |       873,
752 |       874
753 |     ]
754 |   },
755 |   {
756 |     "scene_name": "scene_086",
757 |     "camera_ids": [
758 |       875,
759 |       876,
760 |       877,
761 |       878,
762 |       879,
763 |       880,
764 |       881,
765 |       882,
766 |       883,
767 |       884,
768 |       885,
769 |       886,
770 |       887,
771 |       888,
772 |       889,
773 |       890
774 |     ]
775 |   },
776 |   {
777 |     "scene_name": "scene_087",
778 |     "camera_ids": [
779 |       891,
780 |       892,
781 |       893,
782 |       894,
783 |       895,
784 |       896,
785 |       897,
786 |       898,
787 |       899,
788 |       900,
789 |       901,
790 |       902,
791 |       903,
792 |       904,
793 |       905,
794 |       906
795 |     ]
796 |   },
797 |   {
798 |     "scene_name": "scene_088",
799 |     "camera_ids": [
800 |       907,
801 |       908,
802 |       909,
803 |       910,
804 |       911,
805 |       912,
806 |       913,
807 |       914,
808 |       915,
809 |       916,
810 |       917,
811 |       918,
812 |       919,
813 |       920,
814 |       921,
815 |       922
816 |     ]
817 |   },
818 |   {
819 |     "scene_name": "scene_089",
820 |     "camera_ids": [
821 |       923,
822 |       924,
823 |       925,
824 |       926,
825 |       927,
826 |       928,
827 |       929,
828 |       930,
829 |       931,
830 |       932,
831 |       933,
832 |       934,
833 |       935,
834 |       936,
835 |       937
836 |     ]
837 |   },
838 |   {
839 |     "scene_name": "scene_090",
840 |     "camera_ids": [
841 |       938,
842 |       939,
843 |       940,
844 |       941,
845 |       942,
846 |       943,
847 |       944,
848 |       945,
849 |       946,
850 |       947,
851 |       948,
852 |       949,
853 |       950,
854 |       951,
855 |       952,
856 |       953
857 |     ]
858 |   }
859 | ]


--------------------------------------------------------------------------------
/detector/aic24_get_detection.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import argparse
  3 | import os
  4 | import os.path as osp
  5 | import time
  6 | import cv2
  7 | import torch
  8 | import numpy as np
  9 | import json
 10 | 
 11 | from loguru import logger
 12 | 
 13 | sys.path.append('.')
 14 | 
 15 | from yolox.data.data_augment import preproc
 16 | from yolox.exp import get_exp
 17 | from yolox.utils import fuse_model, get_model_info, postprocess
 18 | from yolox.utils.visualize import plot_tracking
 19 | from tracker.bot_sort import BoTSORT
 20 | from tracker.tracking_utils.timer import Timer
 21 | 
 22 | 
 23 | IMAGE_EXT = [".jpg"]
 24 | def make_parser():
 25 |     parser = argparse.ArgumentParser("BoT-SORT Demo!")
 26 |     parser.add_argument("root_path", type=str, default=None)
 27 |     parser.add_argument("-s","--scene", default=None, type=str)
 28 |     #parser.add_argument("demo", default="image", help="demo type, eg. image, video and webcam")
 29 |     parser.add_argument("-expn", "--experiment-name", type=str, default=None)
 30 |     parser.add_argument("-n", "--name", type=str, default=None, help="model name")
 31 |     parser.add_argument("--path", default="", help="path to images or video")
 32 |     parser.add_argument("--camid", type=int, default=0, help="webcam demo camera id")
 33 |     parser.add_argument("--save_result", action="store_true",help="whether to save the inference result of image/video")
 34 |     parser.add_argument("-f", "--exp_file", default="yolox/exps/example/mot/yolox_x_mix_det.py", type=str, help="pls input your expriment description file")
 35 |     parser.add_argument("-c", "--ckpt", default="bytetrack_x_mot17.pth.tar", type=str, help="ckpt for eval")
 36 |     parser.add_argument("--device", default="gpu", type=str, help="device to run our model, can either be cpu or gpu")
 37 |     parser.add_argument("--conf", default=None, type=float, help="test conf")
 38 |     parser.add_argument("--nms", default=None, type=float, help="test nms threshold")
 39 |     parser.add_argument("--tsize", default=None, type=int, help="test img size")
 40 |     parser.add_argument("--fps", default=30, type=int, help="frame rate (fps)")
 41 |     parser.add_argument("--fp16", dest="fp16", default=False, action="store_true",help="Adopting mix precision evaluating.")
 42 |     parser.add_argument("--fuse", dest="fuse", default=False, action="store_true", help="Fuse conv and bn for testing.")
 43 |     parser.add_argument("--trt", dest="trt", default=False, action="store_true", help="Using TensorRT model for testing.")
 44 | 
 45 |     # tracking args
 46 |     parser.add_argument("--track_high_thresh", type=float, default=0.6, help="tracking confidence threshold")
 47 |     parser.add_argument("--track_low_thresh", default=0.1, type=float, help="lowest detection threshold")
 48 |     parser.add_argument("--new_track_thresh", default=0.7, type=float, help="new track thresh")
 49 |     parser.add_argument("--track_buffer", type=int, default=30, help="the frames for keep lost tracks")
 50 |     parser.add_argument("--match_thresh", type=float, default=0.8, help="matching threshold for tracking")
 51 |     parser.add_argument("--aspect_ratio_thresh", type=float, default=1.6, help="threshold for filtering out boxes of which aspect ratio are above the given value.")
 52 |     parser.add_argument('--min_box_area', type=float, default=10, help='filter out tiny boxes')
 53 |     parser.add_argument("--fuse-score", dest="fuse_score", default=False, action="store_true", help="fuse score and iou for association")
 54 | 
 55 |     # CMC
 56 |     parser.add_argument("--cmc-method", default="orb", type=str, help="cmc method: files (Vidstab GMC) | orb | ecc")
 57 | 
 58 |     # ReID
 59 |     parser.add_argument("--with-reid", dest="with_reid", default=False, action="store_true", help="test mot20.")
 60 |     parser.add_argument("--fast-reid-config", dest="fast_reid_config", default=r"fast_reid/configs/MOT17/sbs_S50.yml", type=str, help="reid config file path")
 61 |     parser.add_argument("--fast-reid-weights", dest="fast_reid_weights", default=r"pretrained/mot17_sbs_S50.pth", type=str,help="reid config file path")
 62 |     parser.add_argument('--proximity_thresh', type=float, default=0.5, help='threshold for rejecting low overlap reid matches')
 63 |     parser.add_argument('--appearance_thresh', type=float, default=0.25, help='threshold for rejecting low appearance similarity reid matches')
 64 |     return parser
 65 | 
 66 | 
 67 | def get_image_list(path):
 68 |     image_names = []
 69 |     for maindir, subdir, file_name_list in os.walk(path):
 70 |         for filename in file_name_list:
 71 |             apath = osp.join(maindir, filename)
 72 |             ext = osp.splitext(apath)[1]
 73 |             if ext in IMAGE_EXT:
 74 |                 image_names.append(apath)
 75 |     return image_names
 76 | 
 77 | 
 78 | def write_results(filename, results):
 79 |     save_format = '{frame},{id},{x1},{y1},{w},{h},{s},-1,-1,-1\n'
 80 |     with open(filename, 'w') as f:
 81 |         for frame_id, tlwhs, track_ids, scores in results:
 82 |             for tlwh, track_id, score in zip(tlwhs, track_ids, scores):
 83 |                 if track_id < 0:
 84 |                     continue
 85 |                 x1, y1, w, h = tlwh
 86 |                 line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1), s=round(score, 2))
 87 |                 f.write(line)
 88 |     logger.info('save results to {}'.format(filename))
 89 | 
 90 | 
 91 | class Predictor(object):
 92 |     def __init__(
 93 |         self,
 94 |         model,
 95 |         exp,
 96 |         trt_file=None,
 97 |         decoder=None,
 98 |         device=torch.device("cpu"),
 99 |         fp16=False
100 |     ):
101 |         self.model = model
102 |         self.decoder = decoder
103 |         self.num_classes = exp.num_classes
104 |         self.confthre = exp.test_conf
105 |         self.nmsthre = exp.nmsthre
106 |         self.test_size = exp.test_size
107 |         self.device = device
108 |         self.fp16 = fp16
109 |         if trt_file is not None:
110 |             from torch2trt import TRTModule
111 | 
112 |             model_trt = TRTModule()
113 |             model_trt.load_state_dict(torch.load(trt_file))
114 | 
115 |             x = torch.ones((1, 3, exp.test_size[0], exp.test_size[1]), device=device)
116 |             self.model(x)
117 |             self.model = model_trt
118 |         self.rgb_means = (0.485, 0.456, 0.406)
119 |         self.std = (0.229, 0.224, 0.225)
120 | 
121 |     def inference(self, img, timer):
122 |         img_info = {"id": 0}
123 |         if isinstance(img, str):
124 |             img_info["file_name"] = osp.basename(img)
125 |             img = cv2.imread(img)
126 |         else:
127 |             img_info["file_name"] = None
128 | 
129 |         height, width = img.shape[:2]
130 |         img_info["height"] = height
131 |         img_info["width"] = width
132 |         img_info["raw_img"] = img
133 | 
134 |         img, ratio = preproc(img, self.test_size, self.rgb_means, self.std)
135 |         img_info["ratio"] = ratio
136 |         img = torch.from_numpy(img).unsqueeze(0).float().to(self.device)
137 |         if self.fp16:
138 |             img = img.half()  # to FP16
139 | 
140 |         with torch.no_grad():
141 |             timer.tic()
142 |             outputs = self.model(img)
143 |             if self.decoder is not None:
144 |                 outputs = self.decoder(outputs, dtype=outputs.type())
145 |             outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
146 |         return outputs, img_info
147 | 
148 | 
149 | def image_demo(predictor, vis_folder, current_time, args):
150 | 
151 |     root_path = args.root_path
152 |     scene = args.scene
153 |     input = osp.join(root_path, "Original", scene)
154 |     cameras = []
155 |     for f in os.listdir(input):
156 |         if os.path.isdir(os.path.join(input, f)):
157 |             cameras.append(f)
158 |     cameras = sorted(cameras)
159 |     scale = min(800/1080,1440/1920)
160 |     for cam in cameras:
161 |         imgs = sorted(os.listdir(osp.join(input, cam, 'Frame')))
162 |         timer = Timer()
163 |         output = osp.join(root_path,'Detection', '{}.txt'.format(osp.join(scene, cam)))
164 |         outjson = osp.join(root_path,'Detection', '{}.json'.format(osp.join(scene, cam)))
165 |         if not os.path.isdir(osp.join(root_path,'Detection',scene)):
166 |             os.makedirs(osp.join(root_path,'Detection',scene))
167 |         u_num = 0
168 |         ret_json = {}
169 |         results = []
170 |         for frame_id, img_path in enumerate(imgs, 1):
171 |             img_path = osp.join(input, cam, 'Frame',img_path)
172 | 
173 |             # Detect objects
174 |             outputs, img_info = predictor.inference(img_path, timer)
175 |             
176 |             detections = []
177 |             if outputs[0] is not None:
178 |                 outputs = outputs[0].cpu().numpy()
179 |                 detections = outputs[:, :7]
180 |                 detections[:, :4] /= scale
181 |                 detections = detections[detections[:,4]>0.1]
182 |                 timer.toc()
183 |             else:
184 |                 timer.toc()
185 | 
186 |             for det in detections:
187 |                 x1,y1,x2,y2,score,_,_ = det
188 |                 x1 = max(0,x1)
189 |                 y1 = max(0,y1)
190 |                 x2 = min(1920,x2)
191 |                 y2 = min(1080,y2)
192 |                 results.append([cam,frame_id,1,int(x1),int(y1),int(x2),int(y2),score])
193 |                 det_json = {}
194 |                 det_json['Frame'] = frame_id
195 |                 det_json['ImgPath'] = img_path.replace(root_path + '/','')
196 |                 det_json['NpyPath'] = ''
197 |                 Coordinate = {'x1':int(x1), 'y1':int(y1), 'x2': int(x2), 'y2': int(y2)}
198 |                 det_json['Coordinate'] = Coordinate
199 |                 det_json['ClusterID'] = None
200 |                 det_json['OfflineID'] = None
201 |                 ret_json[str(u_num).zfill(8)] = det_json
202 |                 u_num += 1
203 | 
204 |             if frame_id % 1000 == 0:
205 |                 logger.info('Processing cam {} frame {} ({:.2f} fps)'.format(cam, frame_id, 1. / max(1e-5, timer.average_time)))
206 | 
207 |         with open(output,'a') as f:
208 |             for cam,frame_id,cls,x1,y1,x2,y2,score in results:
209 |                 f.write('{},{},{},{},{},{},{},{}\n'.format(cam,frame_id,cls,x1,y1,x2,y2,score))
210 |         with open(outjson, 'a') as f:
211 |             json.dump(ret_json, f, ensure_ascii=False)
212 | 
213 | def imageflow_demo(predictor, vis_folder, current_time, args):
214 |     cap = cv2.VideoCapture(args.path if args.demo == "video" else args.camid)
215 |     width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # float
216 |     height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float
217 |     fps = cap.get(cv2.CAP_PROP_FPS)
218 |     timestamp = time.strftime("%Y_%m_%d_%H_%M_%S", current_time)
219 |     save_folder = osp.join(vis_folder, timestamp)
220 |     os.makedirs(save_folder, exist_ok=True)
221 |     if args.demo == "video":
222 |         save_path = osp.join(save_folder, args.path.split("/")[-1])
223 |     else:
224 |         save_path = osp.join(save_folder, "camera.mp4")
225 |     logger.info(f"video save_path is {save_path}")
226 |     vid_writer = cv2.VideoWriter(
227 |         save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height))
228 |     )
229 |     tracker = BoTSORT(args, frame_rate=args.fps)
230 |     timer = Timer()
231 |     frame_id = 0
232 |     results = []
233 |     while True:
234 |         if frame_id % 20 == 0:
235 |             logger.info('Processing frame {} ({:.2f} fps)'.format(frame_id, 1. / max(1e-5, timer.average_time)))
236 |         ret_val, frame = cap.read()
237 |         if ret_val:
238 |             # Detect objects
239 |             outputs, img_info = predictor.inference(frame, timer)
240 |             scale = min(exp.test_size[0] / float(img_info['height'], ), exp.test_size[1] / float(img_info['width']))
241 | 
242 |             if outputs[0] is not None:
243 |                 outputs = outputs[0].cpu().numpy()
244 |                 detections = outputs[:, :7]
245 |                 detections[:, :4] /= scale
246 | 
247 |                 # Run tracker
248 |                 online_targets = tracker.update(detections, img_info["raw_img"])
249 | 
250 |                 online_tlwhs = []
251 |                 online_ids = []
252 |                 online_scores = []
253 |                 for t in online_targets:
254 |                     tlwh = t.tlwh
255 |                     tid = t.track_id
256 |                     vertical = tlwh[2] / tlwh[3] > args.aspect_ratio_thresh
257 |                     if tlwh[2] * tlwh[3] > args.min_box_area and not vertical:
258 |                         online_tlwhs.append(tlwh)
259 |                         online_ids.append(tid)
260 |                         online_scores.append(t.score)
261 |                         results.append(
262 |                             f"{frame_id},{tid},{tlwh[0]:.2f},{tlwh[1]:.2f},{tlwh[2]:.2f},{tlwh[3]:.2f},{t.score:.2f},-1,-1,-1\n"
263 |                         )
264 |                 timer.toc()
265 |                 online_im = plot_tracking(
266 |                     img_info['raw_img'], online_tlwhs, online_ids, frame_id=frame_id + 1, fps=1. / timer.average_time
267 |                 )
268 |             else:
269 |                 timer.toc()
270 |                 online_im = img_info['raw_img']
271 |             if args.save_result:
272 |                 vid_writer.write(online_im)
273 |             ch = cv2.waitKey(1)
274 |             if ch == 27 or ch == ord("q") or ch == ord("Q"):
275 |                 break
276 |         else:
277 |             break
278 |         frame_id += 1
279 | 
280 |     if args.save_result:
281 |         res_file = osp.join(vis_folder, f"{timestamp}.txt")
282 |         with open(res_file, 'w') as f:
283 |             f.writelines(results)
284 |         logger.info(f"save results to {res_file}")
285 | 
286 | 
287 | def main(exp, args):
288 |     if not args.experiment_name:
289 |         args.experiment_name = exp.exp_name
290 | 
291 |     output_dir = osp.join(exp.output_dir, args.experiment_name)
292 |     os.makedirs(output_dir, exist_ok=True)
293 | 
294 |     if args.save_result:
295 |         vis_folder = osp.join(output_dir, "track_vis")
296 |         os.makedirs(vis_folder, exist_ok=True)
297 | 
298 |     if args.trt:
299 |         args.device = "gpu"
300 |     args.device = torch.device("cuda" if args.device == "gpu" else "cpu")
301 | 
302 |     logger.info("Args: {}".format(args))
303 | 
304 |     if args.conf is not None:
305 |         exp.test_conf = args.conf
306 |     if args.nms is not None:
307 |         exp.nmsthre = args.nms
308 |     if args.tsize is not None:
309 |         exp.test_size = (args.tsize, args.tsize)
310 | 
311 |     model = exp.get_model().to(args.device)
312 |     logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size)))
313 |     model.eval()
314 | 
315 |     if not args.trt:
316 |         if args.ckpt is None:
317 |             ckpt_file = osp.join(output_dir, "best_ckpt.pth.tar")
318 |         else:
319 |             ckpt_file = args.ckpt
320 |         logger.info("loading checkpoint")
321 |         ckpt = torch.load(ckpt_file, map_location="cpu")
322 |         # load the model state dict
323 |         model.load_state_dict(ckpt["model"])
324 |         logger.info("loaded checkpoint done.")
325 | 
326 |     if args.fuse:
327 |         logger.info("\tFusing model...")
328 |         model = fuse_model(model)
329 | 
330 |     if args.fp16:
331 |         model = model.half()  # to FP16
332 | 
333 |     if args.trt:
334 |         assert not args.fuse, "TensorRT model is not support model fusing!"
335 |         trt_file = osp.join(output_dir, "model_trt.pth")
336 |         assert osp.exists(
337 |             trt_file
338 |         ), "TensorRT model is not found!\n Run python3 tools/trt.py first!"
339 |         model.head.decode_in_inference = False
340 |         decoder = model.head.decode_outputs
341 |         logger.info("Using TensorRT to inference")
342 |     else:
343 |         trt_file = None
344 |         decoder = None
345 | 
346 |     predictor = Predictor(model, exp, trt_file, decoder, args.device, args.fp16)
347 |     current_time = time.localtime()
348 |     
349 |     image_demo(predictor, None, current_time, args)
350 | 
351 | 
352 | if __name__ == "__main__":
353 |     args = make_parser().parse_args()
354 |     exp = get_exp(args.exp_file, args.name)
355 | 
356 |     args.ablation = False
357 |     args.mot20 = not args.fuse_score
358 | 
359 |     main(exp, args)
360 | 


--------------------------------------------------------------------------------
/tracking/src/scpt.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import sys
  4 | from sklearn.cluster import DBSCAN 
  5 | from sklearn.metrics.pairwise import cosine_similarity
  6 | from itertools import combinations, permutations, product, chain
  7 | from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
  8 | from scipy.spatial.distance import squareform
  9 | from scipy.interpolate import RegularGridInterpolator
 10 | from collections import Counter
 11 | 
 12 | 
 13 | def create_centrality_matrix(clusters, similarity_matrix,frames,**kwargs):
 14 |     # translate the similarity matrix between each node into the centrality matrix between each cluster
 15 |     remove_noise_cluster = kwargs.get('remove_noise_cluster', True)
 16 |     epsilon = kwargs.get('epsilon', 0.3)
 17 | 
 18 |     unique_clusters = sorted(list(set(clusters)))
 19 |     if remove_noise_cluster:
 20 |         if -1 in unique_clusters: 
 21 |             unique_clusters.remove(-1)
 22 | 
 23 |     centrality_matrix =  np.ones((len(unique_clusters),len(unique_clusters)))*-1 
 24 |     np.fill_diagonal(centrality_matrix, 0)
 25 | 
 26 |     cluster_frames_dict = {cluster:[] for cluster in unique_clusters}
 27 |     if remove_noise_cluster:
 28 |         [cluster_frames_dict[cluster].append(frame) for frame,cluster in zip(frames,clusters) if cluster != -1]
 29 |     else:
 30 |         [cluster_frames_dict[cluster].append(frame) for frame,cluster in zip(frames,clusters)]
 31 | 
 32 |     for i in range(len(unique_clusters)):
 33 |         cluster1 = unique_clusters[i]
 34 |         cluster1_frames = cluster_frames_dict[cluster1]
 35 |         cluster1_indices = [k for k,cluster in enumerate(clusters) if cluster ==cluster1] #indices of similarity_matrix
 36 |         for j in range(i+1,len(unique_clusters)):
 37 |             cluster2 = unique_clusters[j]
 38 |             cluster2_frames = cluster_frames_dict[cluster2]
 39 |             common_frames = set(cluster1_frames).intersection(set(cluster2_frames))
 40 |             if len(common_frames) > 0: continue
 41 |             cluster2_indices = [k for k,cluster in enumerate(clusters) if cluster ==cluster2]
 42 |             similarities = similarity_matrix[np.ix_(cluster1_indices, cluster2_indices)]
 43 |             centrality =  np.sum(similarities[similarities > (1 - epsilon)])
 44 |             centrality_matrix[i,j] = centrality
 45 |             centrality_matrix[j,i] = centrality
 46 |     return centrality_matrix
 47 | 
 48 | def associate_cluster(clusters,centrality_matrix,**kwargs):
 49 |     # perform hierarchical clustering that targets clusters.
 50 |     epsilon = kwargs.get('epsilon', 0.3)
 51 |     remove_noise_cluster = kwargs.get('remove_noise_cluster', True)
 52 |     cost_function = kwargs.get('cost_function', 1)
 53 |     minimize = kwargs.get("minimize",True)
 54 |     """
 55 |     cost_function:1 ⇒ single linkage like
 56 |     cost_function:2 ⇒ average linkage like
 57 |     """
 58 |     np.fill_diagonal(centrality_matrix, 0)
 59 |     clusters = np.array(clusters)
 60 |     unique_clusters = np.sort(np.unique(clusters)) 
 61 |     if remove_noise_cluster:
 62 |         if -1 in unique_clusters: 
 63 |             unique_clusters = unique_clusters[unique_clusters != -1]
 64 | 
 65 |     if cost_function == 1:
 66 |         pass
 67 |     elif cost_function == 2:
 68 |         count = Counter(clusters)
 69 |         if remove_noise_cluster:
 70 |             if -1 in count.keys():
 71 |                 del count[-1]
 72 |     centrality = np.max(centrality_matrix)
 73 | 
 74 |     th = 1 - epsilon 
 75 |     while centrality > th:
 76 |         if cost_function == 1:
 77 |             max_index = np.argmax(centrality_matrix)
 78 |         elif cost_function == 2:
 79 |             len_element_matrix = np.outer(list(count.values()),list(count.values())) 
 80 |             averaged_centrality_matrix = np.multiply(centrality_matrix,1/len_element_matrix)
 81 |             np.fill_diagonal(averaged_centrality_matrix, 0)
 82 |             max_index = np.argmax(averaged_centrality_matrix)
 83 | 
 84 |         cluster1_index, cluster2_index = np.unravel_index(max_index, centrality_matrix.shape)
 85 |         cluster1 = unique_clusters[cluster1_index]
 86 |         cluster2 = unique_clusters[cluster2_index]
 87 |         if cost_function == 1 or cost_function == 3:
 88 |             centrality = centrality_matrix[cluster1_index, cluster2_index]
 89 |         elif cost_function == 2:
 90 |             centrality = averaged_centrality_matrix[cluster1_index, cluster2_index]
 91 |             
 92 |         if centrality > th:
 93 |             target_row = centrality_matrix[[cluster1_index,cluster2_index],:]
 94 |             sum_row = np.sum(target_row,axis=0)
 95 |             if minimize:
 96 |                 mask = np.min(target_row, axis=0)
 97 |                 sum_row = np.where(mask < 0, -1, sum_row)
 98 |             centrality_matrix[:, cluster1_index] = sum_row
 99 |             centrality_matrix[cluster1_index,:] = sum_row
100 | 
101 |             next_indices = np.arange(len(unique_clusters))             
102 |             next_indices = next_indices[next_indices != cluster2_index]
103 |             centrality_matrix = centrality_matrix[np.ix_(next_indices,next_indices)]
104 |             np.fill_diagonal(centrality_matrix, 0)
105 |             clusters = np.where(clusters == cluster2, cluster1, clusters)
106 |             unique_clusters = unique_clusters[unique_clusters != cluster2]
107 | 
108 |             if cost_function == 2:
109 |                 count[cluster1] += count[cluster2]
110 |                 del count[cluster2]
111 |         else:
112 |             break
113 |     return clusters
114 | 
115 | 
116 | def get_initial_index(distance_matrix,overlap_indices_list): 
117 |     # determines the initial index for the assignment problem.
118 |     distances = [] 
119 |     for overlap_indices in overlap_indices_list:
120 |         min_distance = 2
121 |         for index1,index2 in combinations(overlap_indices,2): #
122 |             distance = distance_matrix[index1,index2]
123 |             min_distance = distance if distance < min_distance else min_distance
124 |         distances.append(min_distance)     
125 |     max_index = np.argmax(distances) 
126 |     return max_index
127 | 
128 | 
129 | def fill_none(lst):
130 |     # fill "None" to the missing value in sequential number list
131 |     used_nums = [num for num in lst if num is not None]
132 |     unused_nums = [num for num in range(len(lst)) if num not in used_nums]
133 |     for i in range(len(lst)):
134 |         if lst[i] is None:
135 |             lst[i] = unused_nums.pop(0)
136 |     return lst
137 | 
138 | def get_candidates_indices_list(similarity_matrix,subcluster_indices_list,overlap_indices_list,epsilon,**kwargs):
139 |     # get candidates of the assignment problem
140 |     num_candidates = kwargs.get('num_candidates', 10)
141 | 
142 |     if len(overlap_indices_list) < num_candidates:
143 |         candidates_indices_list = overlap_indices_list 
144 |     else:
145 |         np.fill_diagonal(similarity_matrix, 0)
146 |         flatten_subcluster_indices = list(chain.from_iterable(subcluster_indices_list))
147 |         tmp_similarity_matrix = similarity_matrix[flatten_subcluster_indices]
148 |         
149 |         max_similarities =np.max(tmp_similarity_matrix,axis=0)
150 |         neighbor_indices = np.where(max_similarities > (1-epsilon))[0]
151 |         sorted_indices = np.argsort(max_similarities[neighbor_indices])[::-1]
152 |         neighbor_indices = neighbor_indices[sorted_indices]
153 |     
154 |         if len(neighbor_indices) > num_candidates:
155 |             neighbor_indices = neighbor_indices[:num_candidates]
156 |         neighbor_indices = neighbor_indices.tolist() 
157 | 
158 |         candidates_indices_list = []
159 |         for neighbor_index in neighbor_indices:
160 |             for overlap_indices in overlap_indices_list:
161 |                 if neighbor_index not in overlap_indices: continue
162 |                 candidates_indices_list.append(overlap_indices)
163 |                 for overlap_index in overlap_indices:
164 |                     try:
165 |                         neighbor_indices.remove(overlap_index)                        
166 |                     except:
167 |                         pass
168 |     return candidates_indices_list
169 | 
170 | def agglomerative_clustering(distance_matrix,**kwargs): 
171 |     # perform agglomerative hierarchical clustering
172 |     epsilon = kwargs.get('epsilon', 0.3)
173 |     metric = kwargs.get('metric','cosine')
174 |     np.fill_diagonal(distance_matrix, 0) 
175 |     linked = linkage(squareform(distance_matrix), method='single', metric=metric)
176 |     clusters = list(fcluster(linked, epsilon, criterion='distance')) # min(clusters)=1
177 |     return clusters
178 | 
179 | def bipartite_matching(new_key,centrality_dict,centrality_matrix,overlap_indices,**kwargs):
180 |     # bipartite matching between unclustered overlap nodes and clustered overlap nodes 
181 |     epsilon = kwargs.get('epsilon', 0.3)
182 | 
183 |     sum_centrality = 0
184 |     subcluster_indices = [None]*len(overlap_indices)
185 |     th = 1-epsilon
186 |     while np.max(centrality_matrix) > th:
187 |         max_index = np.argmax(centrality_matrix)
188 |         row_index, col_index = np.unravel_index(max_index, centrality_matrix.shape)
189 |         centrality = centrality_matrix[row_index, col_index]
190 |         sum_centrality += centrality
191 |         subcluster_indices[row_index] = col_index
192 |         centrality_matrix[row_index,:]=0
193 |         centrality_matrix[:,col_index]=0
194 |     centrality_dict[new_key] = {"overlap_indices":overlap_indices,"indices":subcluster_indices,"centrality":sum_centrality} 
195 |     
196 |     return centrality_dict
197 | 
198 | 
199 | def separate_into_subcluster(tmp_clusters, overlap_indices_list, distance_matrix,**kwargs):
200 |     # overlap nodes are separated into subclusters
201 |     epsilon = kwargs.get('epsilon', 0.3)
202 |     matching_algo_th = kwargs.get('matching_algo_th', 0)   
203 |     debug = kwargs.get('debug', False)
204 | 
205 |     max_overlap = max([len(i) for i in overlap_indices_list]) #the number of overlap in the same frame
206 |     initial_index = get_initial_index(distance_matrix,overlap_indices_list) #index of overlap_indices_list
207 |     initial_node_indices = overlap_indices_list[initial_index] 
208 |     del overlap_indices_list[initial_index]
209 | 
210 |     subcluster_indices_list =  [[] for _ in range(max_overlap)] 
211 |     [subcluster_indices_list[i].append(initial_node_index) for i,initial_node_index in enumerate(initial_node_indices)]
212 | 
213 |     similarity_matrix = 1-distance_matrix
214 |     np.fill_diagonal(similarity_matrix, 0) 
215 |     
216 |     # separte overlap nodes into several groups 
217 |     while len(overlap_indices_list) != 0:    
218 |         centrality_dict = {}
219 |         max_centrality = 0
220 | 
221 |         candidates_indices_list = get_candidates_indices_list(similarity_matrix,subcluster_indices_list,overlap_indices_list,epsilon)  
222 |         for i,overlap_indices in enumerate(candidates_indices_list): 
223 |             centrality_matrix = np.zeros((len(overlap_indices),len(subcluster_indices_list))) #can not use create_centrality_matrix
224 |             for j, overlap_index in enumerate(overlap_indices):
225 |                 tmp_similarity_matrix = similarity_matrix[overlap_index]
226 |                 for k, subcluster_indices in enumerate(subcluster_indices_list):
227 |                     similarities = tmp_similarity_matrix[subcluster_indices]
228 |                     centrality =  np.sum(similarities[similarities > (1 - epsilon)]) 
229 |                     centrality_matrix[j,k] = centrality
230 |             
231 |             centrality_dict = bipartite_matching(i,centrality_dict,centrality_matrix,overlap_indices,epsilon=epsilon)
232 |         
233 |         max_centrality = 0 if centrality_dict == {} else np.max([value["centrality"] for value in centrality_dict.values()]) 
234 | 
235 |         if max_centrality == 0:
236 |             max_index = get_initial_index(distance_matrix,overlap_indices_list)
237 |             max_subcluster_indices = list(range(max_overlap))
238 |             overlap_indices = overlap_indices_list[max_index]
239 |         else:
240 |             max_index = [key for key,value in zip(centrality_dict,centrality_dict.values()) if value["centrality"]==max_centrality][0] 
241 |             max_subcluster_indices = list(centrality_dict[max_index]["indices"]) 
242 |             if None in max_subcluster_indices:
243 |                 max_subcluster_indices = fill_none(max_subcluster_indices)
244 |             overlap_indices = centrality_dict[max_index]["overlap_indices"]
245 |         [subcluster_indices_list[max_subcluster_index].append(overlap_index) for max_subcluster_index,overlap_index in zip(max_subcluster_indices,overlap_indices)]    
246 |         overlap_indices_list.remove(overlap_indices)
247 |     
248 |     # assign cluster ID
249 |     for subcluster_indices in subcluster_indices_list:
250 |         if len(subcluster_indices) == 1: 
251 |             tmp_clusters[subcluster_indices[0]] = np.max(tmp_clusters)+1
252 |         else:
253 |             sub_clusters = agglomerative_clustering(distance_matrix[np.ix_(subcluster_indices, subcluster_indices)],epsilon=epsilon)
254 |             sub_clusters = [sub_cluster+max(tmp_clusters) for sub_cluster in sub_clusters]            
255 |             for sub_cluster,sub_cluster_index in zip(sub_clusters,subcluster_indices):
256 |                 tmp_clusters[sub_cluster_index] = sub_cluster
257 |     return tmp_clusters
258 | 
259 | def overlap_suppression_clustering(distance_matrix,frames,nonoverlap_indices,overlap_indices_list,**kwargs): #overlap_indices_list,
260 |     epsilon = kwargs.get('epsilon', 0.3)
261 |     debug = kwargs.get('debug', False)
262 |     clusters = [-1]*len(frames) 
263 |     
264 |     # clustering for non-overlapping nodes
265 |     if nonoverlap_indices != []:
266 |         if len(nonoverlap_indices) > 1:
267 |             nonoverlap_clusters = agglomerative_clustering(distance_matrix[np.ix_(nonoverlap_indices,nonoverlap_indices)], epsilon=epsilon)
268 |         else:
269 |             nonoverlap_clusters = [0]
270 |         for k,target_index in enumerate(nonoverlap_indices):
271 |             clusters[target_index] = nonoverlap_clusters[k]  
272 | 
273 |     # clustering for overlapping nodes         
274 |     clusters = separate_into_subcluster(clusters, overlap_indices_list, distance_matrix,epsilon=epsilon,debug=debug)
275 | 
276 |     similarity_matrix = 1 - distance_matrix
277 |     centrality_matrix = create_centrality_matrix(clusters,similarity_matrix,frames,epsilon=epsilon)
278 | 
279 |     # merging for subcluster
280 |     clusters = associate_cluster(clusters,centrality_matrix, epsilon=epsilon)
281 | 
282 |     return clusters
283 | 
284 | def divide_overlap_or_nonoverlap(cluster_frames,cluster_indices):
285 | 
286 |     frame_indices_dict = {frame:[] for frame in sorted(list(set(cluster_frames)))}
287 |     [frame_indices_dict[frame].append(index) for index,frame in zip(cluster_indices,cluster_frames)]
288 |     overlap_indices_list = [indices for indices in frame_indices_dict.values() if len(indices) > 1] 
289 |     flattened_overlap_indices = sum(overlap_indices_list, []) 
290 |     nonoverlap_indices = [index for index in cluster_indices if index not in flattened_overlap_indices]
291 | 
292 |     return overlap_indices_list, nonoverlap_indices
293 | 
294 | def reclustering_overlap_cluster(distance_matrix,tracking_dict,serials,clusters,**kwargs): 
295 |     epsilon = kwargs.get('epsilon', 0.3)
296 |     debug = kwargs.get('debug', False)
297 | 
298 |     frames = [tracking_dict[serial]["Frame"] for serial in serials]
299 |     
300 |     cluster_frame_dict = {cluster:[] for cluster in set(clusters)} #20240418 add set()
301 |     [cluster_frame_dict[cluster].append(frame) for cluster,frame in zip(clusters,frames)]
302 |     cluster_indices_dict = {cluster:[] for cluster in set(clusters)} #20240418 add set()
303 |     [cluster_indices_dict[cluster].append(i) for i,cluster in enumerate(clusters)]
304 | 
305 |     for cluster in cluster_frame_dict: 
306 |         cluster_frames = cluster_frame_dict[cluster]
307 |         if len(list(set(cluster_frames))) == len(cluster_frames):continue 
308 |         cluster_indices = cluster_indices_dict[cluster]
309 | 
310 |         #divide overlap/nonoverlap
311 |         overlap_indices_list, nonovelap_indices = divide_overlap_or_nonoverlap(cluster_frames,cluster_indices)
312 |     
313 |         tmp_clusters = overlap_suppression_clustering(distance_matrix,frames,nonovelap_indices,overlap_indices_list,epsilon=epsilon,debug=debug)  
314 | 
315 |         max_cluster_id = np.max(clusters) 
316 |         for index,tmp_cluster in enumerate(tmp_clusters):
317 |             if clusters[index] != cluster: continue 
318 |             clusters[index] = max_cluster_id + tmp_cluster + 1
319 |     return clusters 
320 | 
321 | def create_similarity_matrix_scpt(serials, tracking_dict, epsilon):
322 |     # create a similarity matrix from features
323 |     for n,serial in enumerate(serials):
324 |         feature = np.load(tracking_dict[serial]["NpyPath"])
325 |         if n==0: feature_stack = np.empty((0,len(feature.flatten())))
326 |         feature_stack  = np.append(feature_stack , feature.reshape(1,-1) , axis=0)
327 |     similarity_matrix = cosine_similarity(feature_stack)
328 |     similarity_matrix = similarity_matrix.astype(np.float16)
329 |     
330 |     similarity_matrix = np.where(similarity_matrix < (1-epsilon),0,similarity_matrix)
331 |     return similarity_matrix
332 | 
333 | 
334 | def tracking_by_clustering(tracking_dict,serials,**kwargs):
335 |     min_samples = kwargs.get('min_samples', 4)
336 |     epsilon = kwargs.get('epsilon_scpt', 0.3)
337 |     overlap_suppression = kwargs.get('overlap_suppression', True)
338 |     debug = kwargs.get('debug', False)
339 |     clustering_method = kwargs.get('clustering_method', "agglomerative")
340 | 
341 |     if len(serials) ==1:
342 |         clusters = [0]    
343 |     else:
344 |         similarity_matrix = create_similarity_matrix_scpt(serials,tracking_dict,epsilon)
345 |         
346 |         np.fill_diagonal(similarity_matrix, 1)
347 |         distance_matrix = 1 - similarity_matrix
348 |         if clustering_method == "agglomerative":
349 |             clusters = agglomerative_clustering(distance_matrix,epsion=epsilon) #min(clusters)=1
350 |         
351 |         elif clustering_method == "dbscan":
352 |             dbscan = DBSCAN(eps=epsilon,min_samples=min_samples,metric="precomputed")
353 |             clusters = dbscan.fit_predict(distance_matrix)
354 |             coreindices = dbscan.core_sample_indices_
355 |             clusters = [cluster if cluster != -1 else -i  for i,cluster in enumerate(clusters)]
356 |             unique_clusters = list(set(clusters))
357 |             new_clusterid_dict = {key:i for i,key in enumerate(unique_clusters)} 
358 |             clusters = [new_clusterid_dict[old_cluster] for old_cluster in clusters]
359 |             
360 |         if overlap_suppression == True:    
361 |             clusters = reclustering_overlap_cluster(distance_matrix,tracking_dict,serials,clusters,epsilon=epsilon,debug=debug)
362 |     
363 |     unique_clusters = list(set(clusters))
364 |     new_clusterid_dict = {key:i for i,key in enumerate(unique_clusters)} 
365 |     clusters = [new_clusterid_dict[old_cluster] for old_cluster in clusters]
366 | 
367 |     return clusters 
368 | 
369 | def associate_cluster_between_period(tracking_dict,clusters,serials,past_serials,**kwargs):
370 |     # associate clusters between adjacent time periods
371 |     epsilon = kwargs.get('epsilon_scpt', 0.3)
372 |     frames = [tracking_dict[serial]["Frame"] for serial in serials] 
373 |     past_frames = [tracking_dict[serial]["Frame"] for serial in past_serials] 
374 |     offline_ids = [tracking_dict[serial]["OfflineID"] for serial in past_serials] 
375 |     
376 |     unique_offline_ids = list(set(offline_ids))
377 |     unique_clusters = list(set(clusters))
378 | 
379 |     all_serials = past_serials + serials
380 |     all_clusters = offline_ids + clusters
381 |     all_unique_clusters = sorted(unique_offline_ids + unique_clusters)
382 |     all_frames = past_frames  + frames
383 | 
384 |     similarity_matrix = create_similarity_matrix_scpt(all_serials, tracking_dict,epsilon)
385 | 
386 |     centrality_matrix = create_centrality_matrix(all_clusters, similarity_matrix,all_frames,epsilon=epsilon)
387 |     del similarity_matrix
388 |     np.fill_diagonal(centrality_matrix, 0)
389 |     
390 |     all_clusters = associate_cluster(all_clusters,centrality_matrix,epsilon=epsilon)
391 | 
392 |     for serial,cluster in zip(all_serials,all_clusters):
393 |         tracking_dict[serial]["OfflineID"] = int(cluster)
394 |     return tracking_dict
395 | 
396 | def get_overlap_coefficient(rectangle1, rectangle2):
397 |     # meaure spatially overlap_coefficient
398 |     overlap_width = min(rectangle1[2], rectangle2[2]) - max(rectangle1[0], rectangle2[0])
399 |     overlap_height = min(rectangle1[3], rectangle2[3]) - max(rectangle1[1], rectangle2[1])
400 |     overlap_area = max(overlap_width, 0) * max(overlap_height, 0)
401 |     rectangle1_area = (rectangle1[2] - rectangle1[0]) * (rectangle1[3] - rectangle1[1])
402 |     rectangle2_area = (rectangle2[2] - rectangle2[0]) * (rectangle2[3] - rectangle2[1])
403 |     #iou = overlap_area / (rectangle1_area + rectangle2_area - overlap_area)
404 |     overlap_coefficient = overlap_area / min(rectangle1_area,rectangle2_area)
405 |     return overlap_coefficient
406 | 
407 | def sequential_non_maximum_suppression(tracking_dict,**kwargs):
408 |     #Sequential NMS is perfomed in this function.
409 |     #Sequential NMS calculates the overlap coefficient both temporally and spatially. 
410 |     temporally_snms_th = kwargs.get('temporally_snms_th', 0.6)
411 |     spatially_snms_th = kwargs.get('spatially_snms_th', 0.6)
412 |     remove_noise_cluster = kwargs.get('remove_noise_cluster', True)
413 |     merge_nonoverlap = kwargs.get('merge_nonoverlap', True)
414 | 
415 |     offline_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict.keys()]
416 |     unique_offline_ids = sorted(list(set(offline_ids)))
417 |     if remove_noise_cluster:
418 |         if min(unique_offline_ids) == -1: 
419 |             unique_offline_ids.remove(-1)
420 | 
421 |     offline_id_serial_dict = {offline_id:[] for offline_id in unique_offline_ids}
422 |     [offline_id_serial_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict.keys() if tracking_dict[serial]["OfflineID"] != -1]
423 |     offline_id_frame_dict = {offline_id:[] for offline_id in unique_offline_ids}
424 |     [offline_id_frame_dict[tracking_dict[serial]["OfflineID"]].append(tracking_dict[serial]["Frame"]) for serial in tracking_dict.keys() if tracking_dict[serial]["OfflineID"] != -1]
425 |         
426 |     for offline_id1, offline_id2 in combinations(unique_offline_ids,2):
427 | 
428 |         id1_frames = offline_id_frame_dict[offline_id1]
429 |         id2_frames = offline_id_frame_dict[offline_id2]
430 |         overlap_frames = set(id1_frames).intersection(set(id2_frames))
431 | 
432 |         if len(id1_frames) < len(id2_frames):
433 |             (offline_id1,offline_id2) = (offline_id2,offline_id1)
434 |             (id1_frames,id2_frames) = (id2_frames,id1_frames)
435 | 
436 |         if max(len(overlap_frames)/len(id1_frames),len(overlap_frames)/len(id2_frames)) <temporally_snms_th: continue 
437 |         
438 |         id1_serials = offline_id_serial_dict[offline_id1]
439 |         id2_serials = offline_id_serial_dict[offline_id2]
440 | 
441 |         id1_lap_pos_list = [list(tracking_dict[serial]["Coordinate"].values()) for n,(frame,serial) in enumerate(zip(id1_frames,id1_serials)) if (frame in overlap_frames)] 
442 |         id2_lap_pos_list = [list(tracking_dict[serial]["Coordinate"].values()) for n,(frame,serial) in enumerate(zip(id2_frames,id2_serials)) if (frame in overlap_frames)] 
443 | 
444 |         overlap_coefficients = []
445 |         for id1_lap_pos,id2_lap_pos in zip(id1_lap_pos_list,id2_lap_pos_list):
446 |             overlap_coefficient= get_overlap_coefficient(id1_lap_pos,id2_lap_pos)
447 |             overlap_coefficients.append(overlap_coefficient)
448 | 
449 |         if np.mean(overlap_coefficients) < spatially_snms_th: continue
450 | 
451 |         if merge_nonoverlap:
452 |             for id2_serial,id2_frame in zip(id2_serials,id2_frames):
453 |                 if id2_frame in overlap_frames:
454 |                     tracking_dict[id2_serial]["OfflineID"] = -1
455 |                 else:
456 |                     tracking_dict[id2_serial]["OfflineID"] =  offline_id1
457 |                     offline_id_frame_dict[offline_id1].append(id2_frame)
458 |                     offline_id_serial_dict[offline_id1].append(id2_serial)
459 |                     offline_id_frame_dict[offline_id2].remove(id2_frame)
460 |                     offline_id_serial_dict[offline_id2].remove(id2_serial)
461 |         else:
462 |             for noise_serial in noise_serials:
463 |                 tracking_dict[noise_serial]["OfflineID"] = -1
464 | 
465 |     return tracking_dict
466 | 
467 | def get_warp_index(frames,trajectory,**kwargs):
468 |     # get index when occur the warp 
469 |     alpha = kwargs.get('alpha', 0.5)
470 |     warp_th = kwargs.get('warp_th', 50)
471 | 
472 |     split_index = None
473 |     interpolator = RegularGridInterpolator((np.array(frames),), np.array(trajectory), method='linear') 
474 |     
475 |     interpolaterd_frames = [i for i in range(min(frames),max(frames)+1)]
476 |     interpolaterd_trajectory = []
477 |     for i,frame in enumerate(interpolaterd_frames):
478 |         coordinate = interpolator([frame])[0]
479 |         interpolaterd_trajectory.append(tuple(coordinate))
480 |     
481 |     x_list, y_list = zip(*interpolaterd_trajectory)
482 |     delta_x = [x_list[i+1]-x  for i,x in enumerate(x_list[:-1])]
483 |     delta_y = [y_list[i+1]-y  for i,y in enumerate(y_list[:-1])]
484 |     interpolaterd_trajectory = np.array(interpolaterd_trajectory)
485 |     last_frame = max(frames)
486 | 
487 |     for t,frame in enumerate(interpolaterd_frames):
488 |         if t == 1:
489 |             weighted_cumsum = np.array([delta_x[t-1],delta_y[t-1]])
490 |         if t > 1:
491 |             weighted_cumsum = alpha*weighted_cumsum+(1-alpha)*np.array([delta_x[t-1],delta_y[t-1]])
492 |             if frame not in frames:
493 |                 continue
494 |             current_position = interpolaterd_trajectory[t]
495 |             past_position = interpolaterd_trajectory[t-1]
496 |             pred_current_position = current_position + weighted_cumsum
497 |             distance = np.sqrt(np.square(current_position[0] - pred_current_position[0])+np.square(current_position[1] - pred_current_position[1]))
498 |             if distance > warp_th:
499 |                 break      
500 |             last_frame = frame  
501 |     if last_frame != max(frames):
502 |         split_index = frames.index(last_frame)
503 |     return split_index
504 | 
505 | def separate_warp_tracklet(tracking_dict,**kwargs):
506 |     # separate warp tracklets based on motion feature. 
507 |     remove_noise_cluster = kwargs.get('remove_noise_cluster', True)
508 |     warp_th = kwargs.get('warp_th', 50)
509 | 
510 |     offline_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict.keys()]
511 |     unique_offline_ids = sorted(list(set(offline_ids)))
512 |     if remove_noise_cluster:
513 |         if min(unique_offline_ids) == -1: 
514 |             unique_offline_ids.remove(-1)
515 | 
516 |     offline_id_serial_dict = {offline_id:[] for offline_id in unique_offline_ids} 
517 |     [offline_id_serial_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1]
518 | 
519 |     max_offline_id = max(unique_offline_ids)
520 | 
521 |     while len(unique_offline_ids) > 0:
522 |         offline_id = unique_offline_ids.pop(0)
523 |         serials = offline_id_serial_dict[offline_id] 
524 |         if len(serials) <= 2: 
525 |             continue    
526 |         frames = [tracking_dict[serial]["Frame"] for serial in serials]
527 |         if len(frames) != len(set(frames)):
528 |             print(f"offline_id{offline_id} contains overlap")
529 |             continue
530 |         frames, serials = zip(*sorted(zip(frames, serials))) #sort by frame
531 |         pos_list = [tracking_dict[serial]["Coordinate"] for serial in serials]
532 |         trajectory = [((pos["x1"]+pos["x2"])/2,pos["y2"]) for pos in pos_list]
533 |         split_index = get_warp_index(frames,trajectory,warp_th=warp_th)
534 |         
535 |         if split_index != None:
536 |             split_serials = serials[split_index:]
537 |             max_offline_id += 1
538 |             unique_offline_ids.append(max_offline_id)
539 |             offline_id_serial_dict[max_offline_id] = split_serials
540 |             for serial in split_serials:
541 |                 tracking_dict[serial]["OfflineID"] = max_offline_id
542 |     return tracking_dict
543 | 
544 | def exclude_short_tracklet(tracking_dict,**kwargs):
545 |     # exclude tracklet that contains only a little serials from tracking_dict
546 |     short_tracklet_th = kwargs.get('short_tracklet_th', 5)
547 | 
548 |     offline_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict]
549 |     unique_offline_ids = sorted(list(set(offline_ids)))
550 |     if min(unique_offline_ids) == -1: unique_offline_ids.remove(-1)
551 | 
552 |     offline_id_serial_dict = {offlineID:[] for offlineID in unique_offline_ids} #OnlineIDからserialを検索するDict
553 |     [offline_id_serial_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1]
554 | 
555 |     for offline_id in unique_offline_ids:
556 |         serials = offline_id_serial_dict[offline_id]
557 |         if len(serials) <= short_tracklet_th:
558 |             for serial in serials:
559 |                 tracking_dict[serial]["OfflineID"] = -1
560 |     return tracking_dict
561 | 
562 | def exclude_motionless_tracklet(tracking_dict,**kwargs):
563 |     # exclude tracklet from tracking_dict
564 |     stop_track_th = kwargs.get('stop_track_th', 25)
565 | 
566 |     offline_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict]
567 |     unique_offline_ids = sorted(list(set(offline_ids)))
568 |     if min(unique_offline_ids) == -1: unique_offline_ids.remove(-1)
569 | 
570 |     offline_id_serial_dict = {offlineID:[] for offlineID in unique_offline_ids} #OnlineIDからserialを検索するDict
571 |     [offline_id_serial_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1]
572 | 
573 |     for offline_id in unique_offline_ids:
574 |         serials = offline_id_serial_dict[offline_id]
575 |         pos_list = [tracking_dict[serial]["Coordinate"] for serial in serials] 
576 |         x_pos_list = [(pos["x1"]+pos["x2"])/2 for pos in pos_list]
577 |         y_pos_list = [pos["y2"] for pos in pos_list]
578 |         x_min = np.min(x_pos_list)
579 |         x_max = np.max(x_pos_list)
580 |         y_min = np.min(y_pos_list)
581 |         y_max = np.max(y_pos_list)
582 |         if (x_max-x_min < stop_track_th) and (y_max-y_min < stop_track_th):
583 |             for serial in serials:
584 |                 tracking_dict[serial]["OfflineID"] = -1
585 | 
586 |     return tracking_dict
587 | 


--------------------------------------------------------------------------------
/tracking/src/mcpt.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import json
  4 | import numpy as np
  5 | from datetime import datetime
  6 | from collections import Counter
  7 | from sklearn.cluster import DBSCAN, AgglomerativeClustering
  8 | from sklearn.metrics.pairwise import cosine_similarity
  9 | from sklearn.metrics import pairwise_distances
 10 | from scipy.interpolate import RegularGridInterpolator
 11 | from itertools import combinations
 12 | from scipy.stats import mode
 13 | from scipy.spatial.distance import pdist, squareform
 14 | 
 15 | from scpt import associate_cluster,agglomerative_clustering 
 16 | import pose
 17 | 
 18 | """
 19 | Definitions for clustering to muilt-camera tracking.
 20 | """
 21 | 
 22 | def get_max_value_of_dict(dictionary, key):
 23 |     # get max value of any key from nested dictionary
 24 |     max_value = float('-inf')  
 25 |     for k, v in dictionary.items():
 26 |         if isinstance(v, dict):
 27 |             max_value = max(max_value, get_max_value_of_dict(v, key))
 28 |         elif k == key:  
 29 |             max_value = max(max_value, v)
 30 |     return max_value
 31 | 
 32 | def create_similarity_matrix_mcpt(representative_nodes,**kwargs):
 33 |     # create similarity matrix from representative feature
 34 |     short_track_th = kwargs.get('short_track_th', 0)
 35 |     representative_selection_method = kwargs.get("representative_selection_method","keypoint") 
 36 |     keypoint_condition_th = kwargs.get("keypoint_condition_th",2)
 37 |     feature_stack = None
 38 |     for camera_id in representative_nodes:
 39 |         tmp_representative_nodes = representative_nodes[camera_id]
 40 |         for local_id in tmp_representative_nodes:
 41 |             value = tmp_representative_nodes[local_id]
 42 |             representative_node = value["representative_node"]
 43 |             serials =  value["all_serials"]
 44 | 
 45 |             if len(serials) < short_track_th:
 46 |                 continue
 47 |             if representative_selection_method == "keypoint":
 48 |                 score = representative_node["score"]
 49 |                 if score > keypoint_condition_th:
 50 |                     continue
 51 | 
 52 |             feature = np.load(representative_node["npy_path"])
 53 |             if feature_stack is None:
 54 |                 feature_stack = np.empty((0, len(feature.flatten())))
 55 |             feature_stack  = np.append(feature_stack , feature.reshape(1, -1) , axis=0)
 56 |     similarity_matrix = cosine_similarity(feature_stack)
 57 |     similarity_matrix = similarity_matrix.astype(np.float16)
 58 |     return similarity_matrix
 59 | 
 60 | def measure_intersect_area(rectangle1, rectangle2):
 61 |     # measure intersect area
 62 |     intersect_width = min(rectangle1[2], rectangle2[2]) - max(rectangle1[0], rectangle2[0])
 63 |     intersect_height = min(rectangle1[3], rectangle2[3]) - max(rectangle1[1], rectangle2[1])
 64 |     intersect_area = max(intersect_width, 0) * max(intersect_height, 0)
 65 |     return intersect_area
 66 | 
 67 | def eval_keypoints(serial,other_serials,keypoints_results,**kwargs):
 68 |     # evaluate results of pose estimation
 69 |     """
 70 |     condition = 1: All keypoints has high confidence 
 71 |     condition = 2: half of keypoint has high confidence (left or right half of the body)
 72 |     condition = 3: part of the keypoint has high confidence in left or right half of the body
 73 |     condition = 4: almost keypoints has low confidence 
 74 |     """
 75 |     keypoint_th = kwargs.get("keypoint_th",0.7)
 76 | 
 77 |     kp = keypoints_results.get_keypoints(serial)
 78 |     if kp == None:
 79 |         condition, intersect_ratio, score,area = 4, 1 , 0, 0
 80 |     else:
 81 |         x1,y1,x2,y2,bbox_confidence = kp["bbox"]
 82 |         keypoints = kp["Keypoints"]
 83 |         area = (x2-x1)*(y2-y1)
 84 |         x_list, y_list, scores = zip(*keypoints)
 85 | 
 86 |         intersect_area = 0
 87 |         for other_serial in other_serials :
 88 |             other_kp =keypoints_results.get_keypoints(other_serial)
 89 |             if other_kp==None: continue
 90 |             x1_,y1_,x2_,y2_,bbox_confidence = other_kp["bbox"]
 91 |             tmp_intersect_area = measure_intersect_area([x1,y1,x2,y2],[x1_,y1_,x2_,y2_]) 
 92 |             intersect_area = max(intersect_area,tmp_intersect_area)
 93 |         intersect_ratio = intersect_area/((x2-x1)*(y2-y1))
 94 | 
 95 |         if np.min(scores) >= keypoint_th:
 96 |             score = np.mean(scores)
 97 |             condition = 1
 98 |         else:
 99 |             right_scores = [score for i,score in enumerate(scores) if i%2==0]
100 |             left_scores = [score for i,score in enumerate(scores) if i%2==1]
101 |             nose_score = right_scores.pop(0)
102 |             min_right_scores = np.min(right_scores)
103 |             min_left_scores = np.min(left_scores)
104 |             target_scores = left_scores if min_left_scores > min_right_scores else right_scores
105 |             min_score = np.min(target_scores)
106 |             score = np.mean(target_scores)
107 |             if min_score >= keypoint_th:
108 |                 condition = 2
109 |             else:
110 |                 count = len([tmp_score for tmp_score in target_scores if tmp_score >= keypoint_th])
111 |                 if count/len(target_scores) > 0.7:
112 |                     condition =3
113 |                 else: 
114 |                     condition = 4
115 |     return condition, intersect_ratio, score, area
116 | 
117 | def find_high_confidence_keypoint_node(tracking_dict,serials,keypoints_results,frame_serials_dict,**kwargs):
118 |     keypoint_th = kwargs.get("keypoint_th",0.7)
119 | 
120 |     conditions = []
121 |     intersects = []
122 |     image_scores = []
123 |     areas = []
124 |     
125 |     for k,serial in enumerate(serials):
126 |         frame = tracking_dict[serial]["Frame"]
127 |         other_serials = frame_serials_dict[frame]
128 |         other_serials.remove(serial)
129 | 
130 |         condition,intersect_ratio ,image_score,area = eval_keypoints(serial,other_serials,keypoints_results)
131 |         conditions.append(condition)
132 |         intersects.append(intersect_ratio)
133 |         image_scores.append(image_score)
134 |         areas.append(area)
135 |     min_condition = np.min(conditions)
136 |     index_area = np.array([(i,area) for i,(condition,area) in enumerate(zip(conditions,areas)) if condition ==  min_condition])
137 |     max_index = np.argmax(index_area[:,1])
138 |     index,max_area = index_area[max_index]
139 | 
140 |     serial = serials[int(index)]
141 |     feature = np.load(tracking_dict[serial]["NpyPath"])
142 |     return serial, feature, int(min_condition)
143 | 
144 | def decide_representative_nodes(tracking_results,out_dir,scene_id,**kwargs):
145 |     # decide representative nodes from each tracklet
146 |     epsilon = kwargs.get('epsilon_mcpt', 0.3)
147 |     representative_selection_method = kwargs.get("representative_selection_method","centrality") 
148 |     short_track_th = kwargs.get("short_track_th",20) 
149 |     model = kwargs.get("model","mmpose_hrnet")
150 |     keypoint_th = kwargs.get("keypoint_th",0.7)
151 | 
152 |     representative_nodes = {}
153 |     for camera_id in tracking_results:
154 |         representative_nodes[camera_id] = {}
155 |         tracking_dict = tracking_results[camera_id]
156 |         if representative_selection_method == "keypoint":
157 |             keypoints_results = pose.PoseKeypoints(f"Pose/scene_{str(scene_id).zfill(3)}/camera_{str(camera_id).zfill(4)}/camera_{str(camera_id).zfill(4)}_out_keypoint.json")
158 |             keypoints_results.assign_serial_from_tracking_dict(tracking_dict=tracking_dict)
159 |             max_frame = get_max_value_of_dict(tracking_dict,"Frame")
160 |             frame_serials_dict = {n+1:[] for n in range(max_frame)}
161 |             [frame_serials_dict[tracking_dict[serial]["Frame"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1]
162 | 
163 |         # Get each clusters, we need to iterate tracking_dict to extract cluster-wise data
164 |         local_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict]
165 |         unique_local_ids = sorted(set(local_ids))
166 |         if -1 in unique_local_ids:
167 |             unique_local_ids.remove(-1)
168 |         local_id_serials_dict = {local_id:[] for local_id in unique_local_ids}
169 |         [local_id_serials_dict[local_id].append(serial) for local_id,serial in zip(local_ids,tracking_dict) if local_id >= 0]
170 | 
171 |         # Get the representative node of each clusters
172 |         for local_id in local_id_serials_dict:
173 |             serials = local_id_serials_dict[local_id]
174 |             if representative_selection_method == "centrality":
175 |                 serials, serial, feature = find_highest_centrality_node(tracking_dict, serials, epsilon=epsilon)
176 |                 if serial != None:
177 |                     representative_node = {"serial": serial, "npy_path": tracking_dict[serial]["NpyPath"]}
178 |             elif representative_selection_method == "keypoint":
179 |                 serial, feature, score = find_high_confidence_keypoint_node(tracking_dict,serials,keypoints_results,frame_serials_dict,keypoint_th = keypoint_th)
180 |                 representative_node = {"serial": serial,"score":score, "npy_path": tracking_dict[serial]["NpyPath"]}
181 |             else:
182 |                 print("representative_selection_method is wrong")
183 |                 sys.exit()
184 |             # Save result out to json
185 |             if serials !=[]:
186 |                 
187 |                 representative_nodes[camera_id][local_id] = {"representative_node": representative_node, "all_serials": serials}
188 |     json_path = os.path.join(out_dir, f"representative_nodes_scene{scene_id}.json")
189 |     with open(json_path, "w") as f:
190 |         json.dump(representative_nodes, f)
191 | 
192 |     return representative_nodes
193 |         
194 | def multi_camera_people_tracking(tracking_results, scene_id, json_dir, out_dir,**kwargs):
195 |     # perform mcpt using tracking_results
196 |     # tracking_results contains tracking_dict, which contains results of scpt in each camera
197 |     print("running multi_camera_people_tracking")
198 |     
199 |     appearance_based_tracking = kwargs.get("appearance_based_tracking",True)
200 |     distance_type = kwargs.get("distance_type","max")
201 |     distance_th = kwargs.get("distance_th",5)
202 |     epsilon = kwargs.get("epsilon_mcpt",0.4)
203 |     representative_selection_method = kwargs.get("representative_selection_method","keypoint") 
204 |     short_track_th = kwargs.get("short_track_th",0) 
205 |     keypoint_th = kwargs.get("keypoint_th",0.7)
206 |     keypoint_condition_th = kwargs.get("keypoint_condition_th",2)
207 |     replace_similarity_by_wcoordinate = kwargs.get("replace_similarity_by_wcoordinate",True)
208 |     replace_value = kwargs.get('replace_value', -10)
209 | 
210 |     print("representative_selection_method:",representative_selection_method)
211 |     print("short_track_th:",short_track_th)
212 |     print("epsilon:",epsilon)
213 |     if representative_selection_method == "keypoint":
214 |         print("keypoint_condition_th:",keypoint_condition_th)
215 | 
216 |     # Representative image extraction
217 |     representative_nodes = get_representative_nodes_cache(scene_id=scene_id, out_dir=out_dir)
218 |     if representative_nodes == None:
219 |         representative_nodes = decide_representative_nodes(tracking_results,out_dir,scene_id,epsilon=epsilon,representative_selection_method=representative_selection_method,short_track_th=short_track_th,keypoint_th=keypoint_th)
220 |     else:
221 |         print(f"Found repsentative_nodes cache file. Got {len(representative_nodes)} camera(s) info.")
222 |     print("representative feature is selected")
223 |     
224 |     similarity_matrix = create_similarity_matrix_mcpt(representative_nodes,short_track_th=short_track_th,representative_selection_method=representative_selection_method,keypoint_condition_th=keypoint_condition_th)
225 |     similarity_matrix[similarity_matrix < (1-epsilon)] = 0    
226 |     clusters = list(range(len(similarity_matrix)))
227 |     print("number of tracklet:",len(set(clusters)))
228 |     similarity_matrix = replace_similarity(representative_nodes,similarity_matrix,tracking_results,clusters,distance_th=distance_th, 
229 |                                            distance_type=distance_type,replace_similarity_by_wcoordinate=replace_similarity_by_wcoordinate,
230 |                                            short_track_th = short_track_th, keypoint_condition_th=keypoint_condition_th,
231 |                                            representative_selection_method=representative_selection_method)
232 |     # perform Re-identification using hieralchical clustering with average linkage
233 |     clusters = associate_cluster(clusters, similarity_matrix, epsilon=epsilon, cost_function=2, minimize=False)
234 |     del similarity_matrix
235 | 
236 |     print("unique_clusters:",len(set(clusters)))
237 | 
238 |     camera_dict = create_camera_dict(representative_nodes,short_track_th = short_track_th,
239 |                                      keypoint_condition_th=keypoint_condition_th, representative_selection_method=representative_selection_method)
240 | 
241 |     for camera_id in camera_dict:
242 |         tracking_dict = tracking_results[int(camera_id)]
243 |         indices = camera_dict[camera_id]["indices"]
244 |         local_ids = camera_dict[camera_id]["unique_local_ids"]
245 |         tmp_clusters = [clusters[index] for index in indices]
246 |         local_id_cluster_dict = {local_id:cluster for local_id,cluster in zip(local_ids,tmp_clusters)}
247 | 
248 |         local_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict]
249 |         unique_local_ids = sorted(set(local_ids))
250 |         if -1 in unique_local_ids:
251 |             unique_local_ids.remove(-1)
252 |         local_id_serials_dict = {local_id:[] for local_id in unique_local_ids}
253 |         [local_id_serials_dict[local_id].append(serial) for local_id,serial in zip(local_ids,tracking_dict) if local_id >= 0]
254 |         for local_id in unique_local_ids:
255 |             for serial in local_id_serials_dict[local_id]:
256 |                 value = tracking_dict[serial]
257 |                 if local_id in local_id_cluster_dict:
258 |                     value["GlobalOfflineID"] = int(local_id_cluster_dict[local_id])
259 |     return tracking_results
260 | 
261 | def get_representative_nodes_cache(scene_id, out_dir):
262 |     # Get cached representative nodes info if any
263 |     representative_node_json = os.path.join(out_dir, f"representative_nodes_scene{scene_id}.json")
264 |     if os.path.isfile(representative_node_json):
265 |         with open(representative_node_json, "r") as f:
266 |             representative_nodes = json.load(f)
267 |             return representative_nodes
268 |     return None
269 | 
270 | def get_unique_global_ids(tracking_results,representative_nodes):
271 |     # get unique global ids from tracking_results 
272 |     global_ids = []
273 |     for camera_id in representative_nodes:
274 |         tracking_dict = tracking_results[camera_id]
275 |         for local_id in representative_nodes[camera_id]:
276 |             serial = representative_nodes[camera_id][local_id]["representative_node"]["serial"]
277 |             if "GlobalOfflineID" in tracking_dict[serial]:
278 |                 global_ids.append(tracking_dict[serial]["GlobalOfflineID"])
279 |     unique_global_ids = sorted(list(set(global_ids)))
280 |     return unique_global_ids
281 | 
282 | def get_serials_each_global_id(tracking_results,representative_nodes,unique_global_ids):
283 |     # get serials assigned each global id
284 |     global_serial_dict = {} #global_id: {camera_id:(local_id, serial)}
285 |     for global_id in unique_global_ids:
286 |         tmp_dict = {}
287 |         for camera_id in representative_nodes:
288 |             tmp_dict[camera_id] = []
289 |         global_serial_dict[global_id] = tmp_dict
290 |     for camera_id in representative_nodes:
291 |         tracking_dict = tracking_results[camera_id]
292 |         for local_id in representative_nodes[camera_id]:
293 |             serial = representative_nodes[camera_id][local_id]["representative_node"]["serial"]
294 |             if "GlobalOfflineID" in tracking_dict[serial]:
295 |                 global_id = tracking_dict[serial]["GlobalOfflineID"]
296 |                 global_serial_dict[global_id][camera_id].append((local_id,serial))
297 |     return global_serial_dict
298 | 
299 | def create_camera_dict(representative_nodes,**kwargs):
300 |     #
301 |     short_track_th = kwargs.get('short_track_th', 0)
302 |     representative_selection_method = kwargs.get("representative_selection_method","keypoint") 
303 |     keypoint_condition_th = kwargs.get("keypoint_condition_th",2)
304 | 
305 |     camera_dict = {camera_id:{"indices":[],"unique_local_ids":[]} for camera_id in representative_nodes}
306 |     max_id = 0
307 |     for camera_id in representative_nodes:
308 |         tmp_representative_nodes = representative_nodes[camera_id]
309 |         local_ids = []
310 |         for local_id in tmp_representative_nodes:
311 |             serials =  tmp_representative_nodes[local_id]["all_serials"]
312 |             if len(serials) < short_track_th:
313 |                 continue
314 |             if representative_selection_method == "keypoint":
315 |                 score = tmp_representative_nodes[local_id]["representative_node"]["score"]
316 |                 if score > keypoint_condition_th:
317 |                     continue
318 |             local_ids.append(int(local_id))
319 |         unique_local_ids = sorted(list(set(local_ids)))
320 |         camera_dict[camera_id]["indices"] += list(range(max_id,max_id+len(unique_local_ids)))
321 |         camera_dict[camera_id]["unique_local_ids"] += unique_local_ids
322 |         max_id += len(unique_local_ids)
323 |     return camera_dict 
324 | 
325 | def create_mcpt_feature_stack(tracking_results,target_list):
326 |     feature_stack = None
327 |     for camera_id, serial in target_list:
328 |         feature = np.load(tracking_results[camera_id][serial]["NpyPath"])
329 |         if feature_stack is None:
330 |             feature_stack = np.empty((0, len(feature.flatten())))
331 |         feature_stack  = np.append(feature_stack , feature.reshape(1, -1), axis=0)
332 |     return feature_stack
333 | 
334 | 
335 | 
336 | def assign_global_id(tracking_results,representative_nodes,**kwargs):
337 |     # assign unclustered tracklets to global id
338 | 
339 |     epsilon = kwargs.get('epsilon_mcpt', 0.3)
340 |     assign_all_tracklet = kwargs.get('assign_all_tracklet', False)
341 |     sim_th = kwargs.get('sim_th', 0.9)
342 |     print("sim_th:",sim_th)
343 |     print("assign_all_tracklet:",assign_all_tracklet)
344 |     model = kwargs.get("model","mmpose_hrnet")
345 | 
346 |     counter = 0
347 |     assigned_tracks = []
348 |     unassigned_tracks = []
349 | 
350 |     for camera_id in representative_nodes:
351 |         tracking_dict = tracking_results[camera_id]
352 |         for local_id in representative_nodes[camera_id]:
353 |             serial = representative_nodes[camera_id][local_id]["representative_node"]["serial"]
354 |             if "GlobalOfflineID" in tracking_dict[serial]:
355 |                 global_id = tracking_dict[serial]["GlobalOfflineID"]
356 |                 assigned_tracks.append((global_id,camera_id,local_id,serial))
357 |             else:
358 |                 unassigned_tracks.append((camera_id,local_id))
359 | 
360 |     target_list = [(camera_id,serial) for global_id,camera_id,local_id,serial in assigned_tracks]    
361 |     feature_stack = create_mcpt_feature_stack(tracking_results,target_list)
362 |     feature_stack_T = feature_stack.T
363 |     feature_stack_norm = np.linalg.norm(feature_stack, axis=1)
364 |     global_ids = [global_id for global_id,camera_id,local_id,serial in assigned_tracks]
365 | 
366 |     for k,(camera_id,local_id) in enumerate(unassigned_tracks):
367 |         npy_path = representative_nodes[camera_id][local_id]["representative_node"]["npy_path"]        
368 |         feature = np.load(npy_path)
369 |         cos_sims = np.dot(feature,feature_stack_T)/ (np.linalg.norm(feature)*feature_stack_norm)
370 |         
371 |         if assign_all_tracklet == False:
372 |             max_sim = np.max(cos_sims)
373 |             if max_sim < sim_th:
374 |                 continue
375 | 
376 |         similar_indices = list(np.where(cos_sims >= sim_th)[0])
377 |         if len(similar_indices) == 0:
378 |             continue 
379 |         
380 |         tmp_global_ids = [global_id for i,global_id in enumerate(global_ids) if  i in similar_indices]
381 |         global_id = mode(tmp_global_ids).mode
382 |         
383 |         counter += 1
384 |         serials = representative_nodes[camera_id][local_id]["all_serials"]
385 |         for serial in serials:
386 |             tracking_results[camera_id][serial]["GlobalOfflineID"] = int(global_id)
387 |     print(f"{counter} tracklets are reassigned")
388 |     return tracking_results
389 | 
390 | def global_id_reassignment(tracking_results, representative_nodes,scene_id,**kwargs):
391 |     # perform delete_small_global_id() and assign_global_id() for reassigning unclustered tracklets
392 |     epsilon = kwargs.get("epsilon_mcpt",0.3)
393 |     representative_selection_method = kwargs.get("representative_selection_method","centrality")
394 |     delete_gid_th = kwargs.get("delete_gid_th",10000)
395 |     assign_all_tracklet = kwargs.get("assign_all_tracklet",True)
396 |     sim_th = kwargs.get("sim_th",0.8)
397 |     delete_few_camera_cluter = kwargs.get('delete_few_camera_cluter',False)
398 |     
399 |     unique_global_ids = get_unique_global_ids(tracking_results,representative_nodes)
400 | 
401 |     global_serial_dict = get_serials_each_global_id(tracking_results,representative_nodes,unique_global_ids)
402 | 
403 |     tracking_results, unique_global_ids = delete_small_global_id(tracking_results,representative_nodes,global_serial_dict,
404 |                                                                  delete_gid_th = delete_gid_th,delete_few_camera_cluter=delete_few_camera_cluter)
405 | 
406 |     tracking_results = assign_global_id(tracking_results,representative_nodes,
407 |                                         delete_gid_th=delete_gid_th, assign_all_tracklet=assign_all_tracklet,sim_th=sim_th)
408 |     
409 |     return tracking_results
410 | 
411 | def translate_world_coordinate(x, y, homography_matrix):
412 |     # translate camera coordinate to world coordinate
413 |     vector_xyz = np.array([x, y, 1]) # z=1
414 |     vector_xyz_3d = np.dot(np.linalg.inv(homography_matrix), vector_xyz.T)
415 |     return vector_xyz_3d[0] / vector_xyz_3d[2], vector_xyz_3d[1] / vector_xyz_3d[2]
416 | 
417 | 
418 | def interpolate_tracklet(tracking_results,representative_nodes,**kwargs):
419 |     # interpolate missing detections for each tracklet
420 |     max_interpolate_interval = kwargs.get("max_interpolate_interval",150)
421 |     frame_sampling_freq = kwargs.get("frame_sampling_freq",1)
422 |     for camera_id in tracking_results:
423 |         tracking_dict = tracking_results[camera_id]
424 |         local_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict]
425 |         unique_local_ids = sorted(list(set(local_ids)))
426 |         if min(unique_local_ids) == -1: unique_local_ids.remove(-1)
427 |         local_id_serial_dict = {local_id:[] for local_id in unique_local_ids} 
428 |         [local_id_serial_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1]
429 |         local_id_frame_dict = {local_id:[] for local_id in unique_local_ids} 
430 |         [local_id_frame_dict[tracking_dict[serial]["OfflineID"]].append(tracking_dict[serial]["Frame"]) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] != -1]
431 | 
432 |         max_serial = int(max(tracking_dict.keys()))
433 |         for local_id in unique_local_ids:
434 |             frames, serials = zip(*sorted(zip(local_id_frame_dict[local_id], local_id_serial_dict[local_id])))
435 |             missing_frames = []
436 |             for frame,next_frame in zip(frames[:-1],frames[1:]):
437 |                 diff = next_frame - frame
438 |                 if diff > max_interpolate_interval: continue
439 |                 while diff > frame_sampling_freq:
440 |                     diff -= frame_sampling_freq
441 |                     missing_frame = next_frame - diff 
442 |                     missing_frames.append(missing_frame) 
443 |             if missing_frames==0: continue
444 |             global_id = tracking_dict[serials[0]]["GlobalOfflineID"] if "GlobalOfflineID" in tracking_dict[serials[0]] else None
445 |             
446 |             coordinates = [list(tracking_dict[serial]["Coordinate"].values())+list(tracking_dict[serial]["WorldCoordinate"].values()) for serial in serials]
447 |             interpolator = RegularGridInterpolator((np.array(frames),), np.array(coordinates), method='linear') 
448 |             for frame in missing_frames:
449 |                 x1,y1,x2,y2,w_x,w_y = interpolator([frame])[0]
450 |                 (x1, y1, x2, y2), (w_x,w_y) = map(int, [x1, y1, x2, y2]),map(float,[w_x,w_y])
451 |                 max_serial += 1
452 |                 if global_id != None:
453 |                     tracking_dict[str(max_serial).zfill(8)] = {"Frame": frame, "Coordinate": {'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2}, "WorldCoordinate": {'x': w_x, 'y': w_y}, "OfflineID": local_id, "GlobalOfflineID": global_id}
454 |                 else:
455 |                     tracking_dict[str(max_serial).zfill(8)] = {"Frame": frame, "Coordinate": {'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2}, "WorldCoordinate": {'x': w_x, 'y': w_y}, "OfflineID": local_id}
456 |     return tracking_results  
457 | 
458 | 
459 | def find_highest_centrality_node(tracking_dict, serials, **kwargs):
460 |     # find highest centrality node from each tracklet
461 |     epsilon = kwargs.get('epsilon_mcpt', 0.3)
462 |     stack_max_size = kwargs.get('stack_max_size', 2000)
463 |     image_size = kwargs.get('image_size', (1920,1080))
464 |     aspect_th = kwargs.get('aspect_th', 1.6)
465 | 
466 |     pos_list = [list(tracking_dict[serial]["Coordinate"].values()) for serial in serials]
467 |     pos_list = np.array(pos_list)
468 |     aspects = (pos_list[:,3]-pos_list[:,1])/(pos_list[:,2]-pos_list[:,0])
469 |     pos_list[:, 2] = image_size[0] - pos_list[:, 2]
470 |     pos_list[:, 3] = image_size[1] - pos_list[:, 3]
471 |     edge_distances = np.min(pos_list,axis = 1)
472 |     new_serials = []
473 |     for i, (serial,aspect, edge_distance) in enumerate(zip(serials,aspects,edge_distances)):
474 |         if (aspect >= aspect_th): # (edge_distance <= 1) and 
475 |             new_serials.append(serial)
476 |     if len(new_serials) == 0:
477 |         serial,feature = None,None
478 |         pass
479 |     elif len(new_serials) == 1 or len(new_serials)== 2:
480 |         serial = new_serials[0]
481 |         feature = np.load(tracking_dict[serial]["NpyPath"])
482 |     else:
483 |         freq =1
484 |         while len(new_serials)//freq > stack_max_size:
485 |             freq += 1
486 |         for n, serial in enumerate(new_serials):
487 |             if n % freq != 0: continue
488 |             feature = np.load(tracking_dict[serial]["NpyPath"])
489 |             if n== 0: 
490 |                 feature_stack = np.empty((0,len(feature.flatten())))
491 |             feature_stack  = np.append(feature_stack , feature.reshape(1, -1), axis=0)
492 |         similarity_matrix = cosine_similarity(feature_stack)
493 |         similarity_matrix = np.where(similarity_matrix < 1-epsilon, 0, similarity_matrix)
494 |         centralities = np.sum(similarity_matrix,axis=0)
495 |         idx_max = np.argmax(centralities)
496 |         serial = new_serials[idx_max*freq]
497 |         feature = feature_stack[idx_max]
498 |     return new_serials, serial, feature 
499 | 
500 | def minimize_similarity_by_sc_overlap(representative_nodes,matrix,tracking_results,clusters,camera_dict,**kwargs):
501 |     # minimize similarity if tracklets are overlapping in SCPT results
502 |     matrix_type = kwargs.get('matrix_type', "similarity")
503 |     if matrix_type == "similarity":
504 |         replace_value = -1
505 |     elif matrix_type == "distance":
506 |         replace_value = np.max(matrix[matrix<np.inf])
507 | 
508 |     for camera_id in representative_nodes:
509 |         tracking_dict = tracking_results[int(camera_id)]
510 |         indices = camera_dict[camera_id]["indices"]
511 |         unique_local_ids = camera_dict[camera_id]["unique_local_ids"]
512 |         local_ids_frame_dict = {local_id:[] for local_id in unique_local_ids}
513 | 
514 |         [local_ids_frame_dict[tracking_dict[serial]["OfflineID"]].append(tracking_dict[serial]["Frame"]) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] in unique_local_ids]
515 |         for index1 in range(len(indices)-1):
516 |             local_id1 = unique_local_ids[index1]
517 |             id1_frames = local_ids_frame_dict[local_id1]
518 |             id1_index = indices[index1]
519 |             for index2 in range(index1+1,len(indices)):   
520 |                 local_id2 = unique_local_ids[index2]
521 |                 id2_frames = local_ids_frame_dict[local_id2]
522 |                 common_frames = set(id1_frames).intersection(set(id2_frames))
523 |                 if len(common_frames) == 0: continue
524 |                 id2_index = indices[index2]
525 |                 matrix[id1_index,id2_index] = replace_value
526 |                 matrix[id2_index,id1_index] = replace_value     
527 |     return matrix
528 | 
529 | def replace_negative_value_by_wcoordinate(similarity_matrix,distance_matrix,**kwargs):
530 |     # replace multiple elements of the similarity matrix with a negative value based on the world coordinate
531 |     distance_th = kwargs.get('distance_th', 7)
532 |     replace_value = kwargs.get('replace_value', -10)
533 |     print("replace_negative_value")
534 |     print("distance_th:",distance_th)
535 |     distance_matrix = np.where(distance_matrix == np.inf, 0, distance_matrix)
536 |     similarity_matrix = np.where(distance_matrix > distance_th, replace_value, similarity_matrix)
537 |     return similarity_matrix
538 | 
539 | def maximize_similarity_by_wcoordinate(similarity_matrix,distance_matrix,**kwargs):
540 |     # replace multiple elements of the similarity matrix with 1 based on the world coordinate
541 |     max_distance_th = kwargs.get('max_distance_th', 0.5)
542 |     replace_value = kwargs.get('replace_value', 1)
543 |     print("maximize_similarity_by_wcoordinate")
544 |     similarity_matrix = np.where(distance_matrix < max_distance_th, replace_value, similarity_matrix)
545 |     return similarity_matrix
546 | 
547 | def replace_similarity(representative_nodes,similarity_matrix,tracking_results,clusters,**kwargs):
548 |     # replace multiple elements of the similarity matrix with another value
549 |     distance_th = kwargs.get('distance_th', 10)
550 |     check_sc_overlap = kwargs.get('check_sc_overlap', False)
551 |     replace_similarity_by_wcoordinate = kwargs.get('replace_similarity_by_wcoordinate', False)
552 |     distance_type = kwargs.get('distance_type', "min")
553 |     short_track_th = kwargs.get("short_track_th",0) 
554 |     keypoint_condition_th = kwargs.get("keypoint_condition_th",2)
555 |     replace_value = kwargs.get('replace_value', -10)
556 |     representative_selection_method = kwargs.get('representative_selection_method', 'keypoint')
557 |     
558 |     if check_sc_overlap:
559 |         camera_dict = create_camera_dict(representative_nodes,short_track_th = short_track_th,keypoint_condition_th=keypoint_condition_th,
560 |                                         representative_selection_method=representative_selection_method)
561 |         similarity_matrix = minimize_similarity_by_sc_overlap(representative_nodes,similarity_matrix,tracking_results,clusters,camera_dict, matrix_type = "similarity")
562 |     if replace_similarity_by_wcoordinate:
563 |         min_distance_matrix,max_distance_matrix,mean_distance_matrix = create_distance_matrix(representative_nodes,tracking_results, distance_type = distance_type,short_track_th =short_track_th, keypoint_condition_th = keypoint_condition_th,representative_selection_method =representative_selection_method)
564 |         similarity_matrix = maximize_similarity_by_wcoordinate(similarity_matrix, mean_distance_matrix)
565 |         similarity_matrix = replace_negative_value_by_wcoordinate(similarity_matrix, min_distance_matrix, distance_th=distance_th,replace_value=replace_value)
566 |         
567 |     return similarity_matrix
568 | 
569 | def measure_euclidean_distance(id1_pos_list,id2_pos_list):
570 |     points1 = np.array(id1_pos_list)
571 |     points2 = np.array(id2_pos_list)
572 |     diff = points1-points2
573 |     euclid_distances = np.sqrt(np.sum(diff**2, axis=1))
574 |     return euclid_distances
575 | 
576 | def create_distance_matrix(representative_nodes,tracking_results, **kwargs):
577 |     # create a Euclidean distance matrix showing the Euclidean distance between each tracklet
578 |     
579 |     distance_type =  kwargs.get('distance_type', "max") #distance_type  min or max or mean 
580 |     image_size = kwargs.get('image_size', (1920,1080))
581 |     short_track_th = kwargs.get('short_track_th', 0)
582 |     representative_selection_method = kwargs.get("representative_selection_method","keypoint") 
583 |     keypoint_condition_th = kwargs.get("keypoint_condition_th",2)
584 |     print("distance_type:",distance_type)
585 |     camera_dict = create_camera_dict(representative_nodes,short_track_th = short_track_th,
586 |                                    keypoint_condition_th=keypoint_condition_th, representative_selection_method=representative_selection_method)
587 |     shape = np.sum([len(camera_dict[camera_id]["indices"]) for camera_id in camera_dict])
588 |     max_distance_matrix  = np.full((shape, shape), np.inf, dtype=np.float16)
589 |     mean_distance_matrix  = np.full((shape, shape), np.inf, dtype=np.float16)
590 |     min_distance_matrix  = np.full((shape, shape), np.inf, dtype=np.float16)
591 | 
592 |     index_serials_dict = {index:[] for index in range(len(max_distance_matrix))}
593 |     index_frames_dict = {index:[] for index in range(len(max_distance_matrix))}
594 |     index_wpos_list_dict = {index:[] for index in range(len(max_distance_matrix))}
595 |     
596 |     for camera_id in representative_nodes:
597 |         tracking_dict = tracking_results[int(camera_id)]
598 |         indices = camera_dict[camera_id]["indices"]
599 |         unique_local_ids = camera_dict[camera_id]["unique_local_ids"]
600 |         local_ids_serials_dict = {local_id:[] for local_id in unique_local_ids}
601 |         [local_ids_serials_dict[tracking_dict[serial]["OfflineID"]].append(serial) for serial in tracking_dict if tracking_dict[serial]["OfflineID"] in unique_local_ids]
602 |         
603 |         for tmp_index in range(len(indices)):
604 |             local_id = unique_local_ids[tmp_index]
605 |             serials = local_ids_serials_dict[local_id]
606 |             frames = [tracking_dict[serial]["Frame"] for serial in serials]
607 |             wpos_list = [list(tracking_dict[serial]["WorldCoordinate"].values()) for serial in serials]
608 |             index = indices[tmp_index]
609 |             index_serials_dict[index] += serials
610 |             index_frames_dict[index] += frames
611 |             index_wpos_list_dict[index] += wpos_list
612 | 
613 |     for id1_index in range(len(max_distance_matrix)-1):
614 |         id1_frames = index_frames_dict[id1_index]
615 |         id1_wpos_list = index_wpos_list_dict[id1_index]
616 |         if id1_frames == []:
617 |             continue
618 | 
619 |         for id2_index in range(id1_index+1,len(max_distance_matrix)):   
620 |             id2_frames = index_frames_dict[id2_index]
621 |             if id2_frames == []:
622 |                 continue
623 |             common_frames = set(id1_frames).intersection(set(id2_frames))
624 |             if len(common_frames) < 1: continue
625 |             id2_wpos_list = index_wpos_list_dict[id2_index]
626 |             id1_lap_indices = [i for i,id1_frame in enumerate(id1_frames) if id1_frame in common_frames]
627 |             id2_lap_indices = [i for i,id2_frame in enumerate(id2_frames) if id2_frame in common_frames]
628 |             id1_lap_wpos_list = [id1_wpos_list[id1_lap_index] for id1_lap_index in id1_lap_indices]
629 |             id2_lap_wpos_list = [id2_wpos_list[id2_lap_index] for id2_lap_index in id2_lap_indices]
630 | 
631 |             euclid_distances = measure_euclidean_distance(id1_lap_wpos_list,id2_lap_wpos_list)
632 |             min_distance = np.min(euclid_distances)
633 |             mean_distance = np.mean(euclid_distances)
634 |             max_distance = np.max(euclid_distances)
635 |             min_distance_matrix[id1_index,id2_index] = min_distance
636 |             min_distance_matrix[id2_index,id1_index] = min_distance
637 |             if len(common_frames) > 120:
638 |                 mean_distance_matrix[id1_index,id2_index] = mean_distance
639 |                 mean_distance_matrix[id2_index,id1_index] = mean_distance
640 |                 max_distance_matrix[id1_index,id2_index] = max_distance
641 |                 max_distance_matrix[id2_index,id1_index] = max_distance
642 | 
643 |     return min_distance_matrix,max_distance_matrix,mean_distance_matrix
644 | 
645 | def delete_small_global_id(tracking_results,representative_nodes,global_serial_dict,**kwargs):
646 |     # delete global id that contains only a little serials from tracking_results
647 |     delete_gid_th = kwargs.get('delete_gid_th',10000)
648 |     delete_few_camera_cluter = kwargs.get('delete_few_camera_cluter',False)
649 |     print("delete_gid_th:",delete_gid_th)
650 |     print("delete_few_camera_cluter:",delete_few_camera_cluter)
651 |     delete_global_ids = []
652 |     save_global_ids = []    
653 | 
654 |     for global_id in global_serial_dict:
655 |         serial_counter = 0
656 |         camera_ids=[]
657 |         for camera_id in global_serial_dict[global_id]:
658 |             if global_serial_dict[global_id][camera_id] != []:
659 |                 camera_ids.append(camera_id)
660 |             for local_id,serial in global_serial_dict[global_id][camera_id]:                
661 |                 tmp_all_serials = representative_nodes[camera_id][local_id]["all_serials"] 
662 |                 serial_counter += len(tmp_all_serials)
663 | 
664 |         if serial_counter < delete_gid_th:
665 |             delete_global_ids.append(global_id)
666 |             continue
667 |         if delete_few_camera_cluter:
668 |             if len(set(camera_ids)) < 3:
669 |                 delete_global_ids.append(global_id)
670 |                 continue
671 |         save_global_ids.append(global_id)
672 | 
673 |     for camera_id in tracking_results:
674 |         tracking_dict = tracking_results[camera_id]
675 |         for serial in tracking_dict:
676 |             tmp_dict = tracking_dict[serial]
677 |             if "GlobalOfflineID" in tmp_dict:
678 |                 global_id = tmp_dict["GlobalOfflineID"]
679 |                 if global_id in delete_global_ids:
680 |                     del tmp_dict["GlobalOfflineID"]
681 |     unique_global_ids = sorted(list(set(save_global_ids)))
682 | 
683 |     return tracking_results, unique_global_ids
684 | 
685 | def measure_world_coordinate(scene_id,tracking_results, **kwargs):
686 |     # measur world coordinates in each node
687 |     mean_world_coordinate_th = kwargs.get("mean_world_coordinate_th",2)
688 |     model = kwargs.get("model","mmpose_hrnet")
689 | 
690 |     for camera_id in tracking_results:
691 |         tracking_dict = tracking_results[camera_id]       
692 |         with open(f"Original/scene_{str(scene_id).zfill(3)}/camera_{str(camera_id).zfill(4)}/calibration.json") as f:
693 |             calibration_json = json.load(f)
694 |         homography_matrix = np.array(calibration_json['homography matrix'])
695 |         for serial in tracking_dict:
696 |             value = tracking_dict[serial]
697 |             x1,y1,x2,y2 = value["Coordinate"].values()
698 |             x,y =  (x2+x1)/2,y2
699 |             bbox_w_c = translate_world_coordinate(x,y, homography_matrix)
700 |             value["WoorldCoordinate"] = {"x":bbox_w_c[0],"y":bbox_w_c[1]}
701 | 
702 |     for camera_id in tracking_results:
703 |         tracking_dict = tracking_results[camera_id]
704 |         for serial in tracking_dict:
705 |             value = tracking_dict[serial]
706 |     return tracking_results
707 | 
708 | def eval_noise_level(keypoints):
709 |     # evaluate noise level in images based on pose estimation
710 |     xs,ys,scores = zip(*keypoints)
711 |     th = 0.75
712 |     indices = [i for i,score in enumerate(scores) if score > th]
713 |     condition = 0
714 |     if len(indices)==2:
715 |         if min(indices) <= 4:
716 |             condition = 0 
717 |         else:
718 |             condition = 2
719 |     if len(indices)==1:
720 |         condition = 3
721 |     if len(indices)==0:
722 |         condition =4
723 |     return condition
724 | 
725 | def remove_noise_images(scene_id,tracking_results,**kwargs):
726 |     # remove noise images based on pose estimation
727 |     model = kwargs.get("model","mmpose_hrnet")
728 | 
729 |     del_serials = {camera_id:[] for camera_id in tracking_results}
730 | 
731 |     for camera_id in tracking_results:
732 |         tracking_dict = tracking_results[camera_id]
733 |         for serial in tracking_dict:
734 |             value = tracking_dict[serial]
735 |             if "GlobalOfflineID" not in value:
736 |                 del_serials[camera_id].append(serial)
737 |             
738 |     for camera_id in tracking_results:
739 |         tracking_dict = tracking_results[camera_id]
740 |         for serial in del_serials[camera_id]:
741 |             del tracking_dict[serial]
742 | 
743 |     for camera_id in tracking_results:
744 |         tracking_dict = tracking_results[camera_id]
745 |         keypoints_results = pose.PoseKeypoints(f"Pose/scene_{str(scene_id).zfill(3)}/camera_{str(camera_id).zfill(4)}/camera_{str(camera_id).zfill(4)}_out_keypoint.json")
746 |         keypoints_results.assign_serial_from_tracking_dict(tracking_dict=tracking_dict)
747 |         del_serials = []
748 |         for serial in tracking_dict:
749 |             value = tracking_dict[serial]
750 |             
751 |             kp = keypoints_results.get_keypoints(serial)
752 |             if kp == None:
753 |                 del_serials.append(serial)
754 |                 continue
755 |             keypoints = kp['Keypoints']
756 |             condition = eval_noise_level(keypoints)
757 |             coordinate = list(value["Coordinate"].values())
758 |             w,h = coordinate[2]-coordinate[0],coordinate[3]-coordinate[1]
759 |             if  w/h > 3 or  h/w > 5:
760 |                 del_serials.append(serial)
761 |                 continue
762 |             if condition >= 2:
763 |                 if condition==2 and min(w,h) < 100:
764 |                     continue
765 |                 del_serials.append(serial)
766 |         for serial in del_serials:
767 |             del tracking_dict[serial]
768 | 
769 |         local_ids = [tracking_dict[serial]["OfflineID"] for serial in tracking_dict]
770 |         unique_local_ids = sorted(set(local_ids))
771 |         if -1 in unique_local_ids:
772 |             unique_local_ids.remove(-1)
773 |         local_id_serials_dict = {local_id:[] for local_id in unique_local_ids}
774 |         [local_id_serials_dict[local_id].append(serial) for local_id,serial in zip(local_ids,tracking_dict)]
775 |         local_id_frames_dict = {local_id:[] for local_id in unique_local_ids}
776 |         [local_id_frames_dict[local_id].append(tracking_dict[serial]["Frame"]) for local_id,serial in zip(local_ids,tracking_dict)]
777 | 
778 |         del_serials = []
779 |         for local_id in local_id_serials_dict:
780 |             if local_id == -1:
781 |                 continue
782 |             frames, serials = zip(*sorted(zip(local_id_frames_dict[local_id], local_id_serials_dict[local_id])))
783 |             for i in range(len(frames[:-1])):
784 |                 if i == 0:
785 |                     continue
786 |                 past_frame = frames[i-1]
787 |                 frame = frames[i]
788 |                 future_frame = frames[i+1]
789 |                 if (frame - past_frame >30) and (future_frame - frame > 30):
790 |                     del_serials.append(serials[i])
791 | 
792 |     return tracking_results
793 | 
794 | 
795 | def delete_distant_persons(tracking_results,**kwargs):
796 |     # delete the node that has long distances to other nodes with the same global id
797 |  
798 |     gid_serials = {}
799 | 
800 |     for camera_id in tracking_results:
801 |         tracking_dict = tracking_results[camera_id]
802 |         for serial in tracking_dict:
803 |             value = tracking_dict[serial]
804 |             gid = value["GlobalOfflineID"]
805 |             gid_serials[gid] = []
806 |     for camera_id in tracking_results:
807 |         tracking_dict = tracking_results[camera_id]
808 |         for serial in tracking_dict:
809 |             value = tracking_dict[serial]
810 |             gid = value["GlobalOfflineID"]
811 |             frame = value["Frame"]
812 |             gid_serials[gid].append((camera_id,serial,frame))
813 |     delete_list= []
814 |     for gid in gid_serials:
815 |         value = gid_serials[gid]
816 |         camera_ids,serials,frames = zip(*value)
817 |         frames, serials,camera_ids = zip(*sorted(zip(frames, serials, camera_ids)))
818 |         
819 |         current_frame = frames[0]
820 |         current_serial = serials[0]
821 |         current_camera_ids = camera_ids[0]
822 |         tmp_frames = []
823 |         tmp_serials = []
824 |         tmp_camera_ids = []
825 |         for frame,serial,camera_id in zip(frames,serials,camera_ids):
826 |             if frame !=current_frame:
827 |                 
828 |                 if len(tmp_frames) >=2:
829 |                     world_coordinates = [] 
830 |                     for tmp_camera_id,tmp_serial in zip(tmp_camera_ids,tmp_serials):
831 |                         world_coordinate = tuple(tracking_results[tmp_camera_id][tmp_serial]["WorldCoordinate"].values())
832 |                         world_coordinates.append(world_coordinate)
833 |                     world_coordinates = np.array(world_coordinates)                   
834 |                     distance_matrix = squareform(pdist(world_coordinates, 'euclidean'))
835 |                     if len(distance_matrix)>2:
836 |                         if np.max(distance_matrix) >7:
837 |                             sum_row = np.sum(distance_matrix,axis=0)
838 |                             argmax = np.argmax(sum_row)
839 |                             delete_list.append((tmp_camera_ids[argmax],tmp_serials[argmax]))
840 |                                                         
841 |                 current_frame = frame
842 |                 current_serial = serial
843 |                 current_camera_id = camera_id
844 |                 tmp_frames = [frame]
845 |                 tmp_serials = [serial]
846 |                 tmp_camera_ids = [camera_id]
847 |             else:
848 |                 tmp_frames.append(frame)
849 |                 tmp_serials.append(serial)
850 |                 tmp_camera_ids.append(camera_id)
851 | 
852 |     for camera_id,serial in delete_list:
853 |         del tracking_results[camera_id][serial]
854 |     return tracking_results
855 | 


--------------------------------------------------------------------------------