├── additional_info ├── pedID.xlsx └── video_name_mapping.xlsx ├── split_clips_to_frames.py ├── reorganize_annotations.py ├── README.md └── pedestrian_intention_database_processing.py /additional_info/pedID.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PSI-Intention2022/PSI-Intention/HEAD/additional_info/pedID.xlsx -------------------------------------------------------------------------------- /additional_info/video_name_mapping.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PSI-Intention2022/PSI-Intention/HEAD/additional_info/video_name_mapping.xlsx -------------------------------------------------------------------------------- /split_clips_to_frames.py: -------------------------------------------------------------------------------- 1 | '''Given video path, extract frames for all videos. Check if frames exist first.''' 2 | 3 | import os 4 | import argparse 5 | from pathlib import Path 6 | import cv2 7 | from tqdm import tqdm 8 | 9 | video_path = './PSI_Intention/Dataset/RawVideos' 10 | frames_path = './PSI_Intention/Dataset/frames' 11 | 12 | #create 'data/frames' folder 13 | if not os.path.exists(frames_path): 14 | os.makedirs(frames_path) 15 | print("Created 'frames' folder.") 16 | 17 | for video in tqdm(os.listdir(video_path)): 18 | name = "video" + video[7:12] 19 | video_target = os.path.join(video_path, video) 20 | frames_target = os.path.join(frames_path, name) 21 | 22 | if not os.path.exists(frames_target): 23 | os.makedirs(frames_target) 24 | print(f'Created frames folder for video {name}') 25 | 26 | try: 27 | vidcap = cv2.VideoCapture(video_target) 28 | if not vidcap.isOpened(): 29 | raise Exception(f'Cannot open file {video_target}') 30 | except Exception as e: 31 | raise e 32 | 33 | cur_frame = 0 34 | while(True): 35 | success, frame = vidcap.read() 36 | if success: 37 | frame_num = str(cur_frame).zfill(3) 38 | cv2.imwrite(os.path.join(frames_target, f'{frame_num}.jpg'), frame) 39 | else: 40 | break 41 | cur_frame += 1 42 | vidcap.release() -------------------------------------------------------------------------------- /reorganize_annotations.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from pathlib import Path 4 | import glob 5 | import pandas as pd 6 | import shutil 7 | 8 | video_path = './PSI_Intention/Dataset/RawVideos' 9 | frames_path = './PSI_Intention/Dataset/frames' 10 | xml_path = './PSI_Intention/Dataset/XmlFiles' 11 | 12 | nlp_annotation_path = './PSI_Intention/Dataset/nlp_annotations' 13 | cv_annotation_path = './PSI_Intention/Dataset/cv_annotations' 14 | 15 | #create 'data/cv_annotations' folder 16 | if not os.path.exists(cv_annotation_path): 17 | os.makedirs(cv_annotation_path) 18 | print("Created 'cv_annotations' folder.") 19 | 20 | 21 | #create 'data/nlp_annotation_path' folder 22 | if not os.path.exists(nlp_annotation_path): 23 | os.makedirs(nlp_annotation_path) 24 | print("Created 'nlp_annotation' folder.") 25 | 26 | # re-organize cv annots 27 | for video_file in os.listdir(frames_path): 28 | video_num = video_file.split('_')[1] 29 | if not os.path.exists(os.path.join(cv_annotation_path, video_file)): 30 | os.mkdir(os.path.join(cv_annotation_path, video_file)) 31 | 32 | src = os.path.join(xml_path, video_num + '.xml') 33 | dst = os.path.join(cv_annotation_path, video_file, 'annotations.xml') 34 | try: 35 | shutil.copyfile(src, dst) 36 | except: 37 | print("Failed copying {} to {}".format(src, dst)) 38 | print("WARNING: video_0060 and video_0093 cv_annotations are missing. These two samples are abandoned.") 39 | 40 | # re-organize cv annots 41 | df = pd.read_excel('./PSI_Intention/Dataset/IntentAnnotations.xlsx') 42 | for video_file in os.listdir(frames_path): 43 | video_num = video_file.split('_')[1] 44 | if not os.path.exists(os.path.join(nlp_annotation_path, video_file)): 45 | os.mkdir(os.path.join(nlp_annotation_path, video_file)) 46 | 47 | try: 48 | sub_df = df[df['video_id'] == int(video_num)] 49 | dst = os.path.join(nlp_annotation_path, video_file, 'intentSegmentation.csv') 50 | sub_df.to_csv(dst, index=None, header=True) 51 | except: 52 | print("Failed create nlp annotations {}".format(dst)) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # News 2 | - 20230910: All PSI data including videos, CV annotations, and Cognitive Annotation (PSI1.0 & PSI 2.0) are public for download and future exploration! ~~[[Google Drive]()]~~ [[PSI Homepage](http://pedestriandataset.situated-intent.net)] :sparkler::zap: 3 | - 20230607 - :bangbang: We are hosting a competition about pedestrian behavior prediction, please check details and participate via [[IEEE ITSS PSI Student Competition](https://psi-intention2022.github.io)] 4 | - 20230607 - The new [[PSI 2.0 benchmark (Training & Validation & Test)](https://github.com/PSI-Intention2022/PSI-Dataset)] is released with more video samples and diverse multimodal interpretable annotations for pedestrian intent and driving decision! 5 | 6 | --- 7 | --- 8 | :exclamation: This repo is **deprecated**. Please check our new [PSI Dataset](https://github.com/PSI-Intention2022/PSI-Dataset) and the [[IEEE ITSS PSI Student Competition in Pedestrian Behavior Prediction](https://psi-intention2022.github.io)]. :exclamation: 9 | --- 10 | # IUPUI-CSRC Pedestrian Situated Intent (PSI) Dataset 11 | This repository contains IUPUI-CSRC Pedestrian Situated Intent (PSI) Dataset pre-processing and baseline. 12 | 13 | For more situated intent data and work, please see [Situated Intent](http://situated-intent.net)! 14 | 15 | ## Download dataset and extract 16 | Download the dataset from [link](http://situated-intent.net/pedestrian_dataset/), then extract via 17 | 18 | ```command 19 | unzip Dataset.zip 20 | ``` 21 | 22 | Output: 23 | 24 | ```command 25 | Archive: Dataset.zip 26 | creating: PSI_Intention/Dataset/ 27 | inflating: PSI_Intention/Dataset/VideoWithIndicator.zip 28 | inflating: PSI_Intention/Dataset/RawVideos.zip 29 | inflating: PSI_Intention/Dataset/README.txt 30 | inflating: PSI_Intention/Dataset/IntentAnnotations.xlsx 31 | inflating: PSI_Intention/Dataset/XmlFiles.zip 32 | ``` 33 | Extract videos and spatial annotations: 34 | ```command 35 | unzip ./PSI_Intention/Dataset/RawVideos.zip -d ./PSI_Intention/Dataset 36 | unzip ./PSI_Intention/Dataset/XmlFiles.zip -d ./PSI_Intention/Dataset 37 | ``` 38 | 39 | ## Video to frames 40 | ```python 41 | python split_clips_to_frames.py 42 | ``` 43 | The splited frames are organized as, e.g., 44 | ``` 45 | frames{ 46 | video_0001{ 47 | 000.jpg, 48 | 001.jpg, 49 | ... 50 | } 51 | } 52 | ``` 53 | ## CV_annotations and NLP_annotations re-organize 54 | ```python 55 | python reorganize_annotations.py 56 | ``` 57 | *Note*: video_0060 and video_0093 are removed due to the missing of spatial segmentation annotations. 58 | 59 | ## Create database with frames labeled 60 | ```python 61 | python pedestrian_intention_database_processing.py 62 | ``` 63 | Output: 64 | 65 | - **database_*.pkl**: The annotaions of reasoning and intention do not exactly match, i.e., the last several frames only have intention annotations without reasoning, because the reasoning is only for the previous time period before the last annotated time point, while the intention annotation lasts till the end of the video. 66 | - **database_*_overlap**.pkl: By removing the last frames only with intention labels, the length of annotated reasoning and intention frames are Equal now. 67 | 68 | ## Train/Val/Test split 69 | 70 | - train: [1 ~ 75] 71 | - val: [76 ~ 80] 72 | - test: [81 ~ 110] 73 | 74 | *Note*: Due to the missing of spatial segmentation annotations, video_0060 and video_0093 are removed. Besides, video_0003 and video_0028 are ignored as the annotated frame sequences are too short. 75 | 76 | In our PSI paper experiments, the observed tracks length is 15, while predicting the 16-th frame intention. The overlap rate is set as 0.8 for both train and test stages. 77 | 78 | # Citing 79 | ``` 80 | @article{chen2021psi, 81 | title = {PSI: A Pedestrian Behavior Dataset for Socially Intelligent Autonomous Car}, 82 | author = {Chen, Tina and Tian, Renran and Chen, Yaobin and Domeyer, Joshua and Toyoda, Heishiro and Sherony, Rini and Jing, Taotao and Ding, Zhengming}, 83 | journal = {arXiv preprint arXiv:2112.02604}, 84 | year = {2021} } 85 | ``` -------------------------------------------------------------------------------- /pedestrian_intention_database_processing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from pathlib import Path 4 | import glob 5 | import pandas as pd 6 | import shutil 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | import pickle 10 | import sys 11 | import math 12 | import time 13 | import xml.etree.ElementTree as ET 14 | import copy 15 | from tqdm import tqdm 16 | 17 | data_root = './PSI_Intention/Dataset' 18 | database_path = './PSI_Intention/Dataset/database' 19 | args = {} 20 | args['annot_path'] = os.path.join(data_root, 'cv_annotations') 21 | args['nlp_path'] = os.path.join(data_root, 'nlp_annotations') 22 | args['frames_path'] = os.path.join(data_root, 'frames') 23 | args['pedID_path'] = os.path.join(data_root, 'additional_info/pedID.xlsx') 24 | args['mapping_path'] = os.path.join(data_root, 'additional_info/video_name_mapping.xlsx') 25 | args['vf_path'] = os.path.join(data_root, 'visual_features') 26 | args['save_path'] = database_path 27 | 28 | 29 | ''' 30 | This function creates a .csv file mapping the pedeistrian's ID and the video ID 31 | ''' 32 | def get_pedID(root_dir, args): 33 | """creates dataframe with pedID, video name, and video ID""" 34 | cols = ['ID', 'NLP Annotation', 'video_name'] 35 | pedID_df = pd.read_excel(args['pedID_path'], usecols=cols) 36 | #removing rows that aren't a main pedestrian 37 | pedID_df = pedID_df.loc[pedID_df['NLP Annotation'] != 0] 38 | name_df = pd.read_excel(args['mapping_path']) 39 | 40 | merged_df = pd.merge(pedID_df, name_df, on='video_name') 41 | 42 | return merged_df 43 | pedID_df = get_pedID(root_dir=data_root, args=args) 44 | pid = pedID_df 45 | 46 | 47 | ''' 48 | This function initialize the database dict based on the pedID 49 | ''' 50 | ''' 51 | db = { 52 | 'video_0001': { 53 | '1_MC': { 54 | 'frames': None (lits of frame #s), pedestrians appear. 55 | 'mean_intention': None (0, 0.5, 1) 56 | 'major_intention': None 57 | 'disagree_score': None # consider all total votes as 24 58 | 'valid_disagree_score': None # only calculate the valid votes sum 59 | 'bbox': None 60 | 'reason_feats': None 61 | 'description_feats': None 62 | 'original_intention': list of all annotators 63 | 'original_reason': list of all annotators 64 | 'labeled_frames': list of frames with labels, overlap with 'frames' 65 | } 66 | } 67 | } 68 | ''' 69 | def create_db(root_dir, args, pedID_df): 70 | db = {} 71 | for index, row in pedID_df.iterrows(): 72 | video_name = 'video_' + str(row["video_id"]).zfill(4) 73 | pedID = row["ID"] 74 | db[video_name] = {pedID: {'frames': None, 'mean_intention': None, 'major_intention': None, 75 | 'disagree_score': None, 'labeled_frames': None, 76 | 'bbox' : None, 'reason_feats': None,'original_reason': None, 77 | 'valid_disagree_score': None,'original_intention': None}} 78 | 79 | 80 | #TODO: get cv annotations for the excluded videos 81 | db.pop('video_0060') 82 | db.pop('video_0093') 83 | return db 84 | database = create_db(data_root, args, pid) 85 | 86 | 87 | ''' 88 | Get samples with cv annotations 89 | ''' 90 | def load_xml(video, root_dir): 91 | #Loads XML file and gets bbox coordinates and creates id for each bbox in the XML file 92 | tree = ET.parse(os.path.join(root_dir, 'cv_annotations', video, 'annotations.xml')) 93 | root = tree.getroot() 94 | file_location = os.path.join(root_dir, 'visual_features', video) 95 | #finds all track nodes 96 | for obj in tqdm(root.findall('track')): 97 | #print(obj.get('label')) 98 | label = obj.get('label') 99 | #for the found track node, list out bbox attributes 100 | for box in obj.findall('box'): 101 | if box.get('outside') == '1': 102 | continue 103 | else: 104 | framenum = box.get('frame') 105 | framenum = framenum.zfill(3) 106 | bbox = (float(box.get('xtl')), 107 | float(box.get('ytl')), 108 | float(box.get('xbr')), 109 | float(box.get('ybr')) 110 | ) 111 | #Check whether 'ID' field is filled 112 | file_name = None 113 | for attribute in box.iter('attribute'): 114 | if attribute.get('name') == 'ID': 115 | #No ID 116 | if attribute.text == 'n/a': 117 | id = obj.get('id') 118 | file_name = video + '_' + 'f' + framenum + '_' + label + id + '.npz' 119 | file_location = os.path.join(root_dir, 'visual_features', video) 120 | #Specified ID 121 | else: 122 | id = (attribute.text) 123 | file_name = video + '_' + 'f' + framenum + '_' + label + id + '.npz' 124 | file_location = os.path.join(root_dir, 'visual_features', video) 125 | 126 | if not os.path.exists(file_location): 127 | os.makedirs(file_location) 128 | if file_name: 129 | if not os.path.exists(os.path.join(file_location, file_name)): 130 | features = np.array([]) #load_process_image(args, root_dir, video, framenum, bbox, model) 131 | save_path = os.path.join(file_location, file_name) 132 | np.savez_compressed(save_path, features) 133 | else: 134 | print("No attributes found frame {}_{}".format(framenum, label)) 135 | 136 | 137 | if not os.path.exists(os.path.join(data_root, 'visual_features')): 138 | for video in sorted(os.listdir(os.path.join(data_root, 'cv_annotations'))): 139 | try: 140 | print(f'Processing {video}.') 141 | load_xml(video, data_root) 142 | except: 143 | print("Faild processing {}".format(video)) 144 | else: 145 | print("Frame lists already exist!") 146 | 147 | 148 | ''' 149 | This function returns the frames number list of each specific pedID appears. 150 | Notice: This frames list is not obtained directly from xml annotations, but from the 151 | VGG features already processed based on each bbox. 152 | e.g., database['video_0001']['139_MC']['frames'] = [135, 136, ..., 256] 153 | ''' 154 | def get_frames(root_dir, args, db, df): 155 | 156 | for index, row in df.iterrows(): 157 | video_name = 'video_' + str(row["video_id"]).zfill(4) 158 | pedID = row["ID"] 159 | vf_path = os.path.join(args['vf_path'], video_name) 160 | # print(vf_path) 161 | try: 162 | vf_files = os.listdir(vf_path) 163 | # print(vf_files) 164 | vf_files.sort() 165 | f = [file_name[12:15] for file_name in vf_files if file_name[(-4 - len(pedID)):-4] == pedID] 166 | db[video_name][pedID]['frames'] = f 167 | except: 168 | print(f'Could not find {video_name} in database.') 169 | 170 | return db 171 | database = get_frames(data_root, args, database, pid) 172 | 173 | 174 | ''' 175 | Return the annotated pedestrians bbox list of each frame. 176 | Notice here only take the pedestrians bbox, so each frame has 1, all sequence of bbox 177 | has same length as the frames for each pedestrian. 178 | ''' 179 | def get_bbox(root_dir, args, db, df): 180 | for index, row in df.iterrows(): 181 | video_name = 'video_' + str(row["video_id"]).zfill(4) 182 | pedID = row["ID"] 183 | bbox = [] 184 | try: 185 | tree = ET.parse(os.path.join(args['annot_path'], video_name, 'annotations.xml')) 186 | root = tree.getroot() 187 | for frame in db[video_name][pedID]['frames']: 188 | # for each frame 189 | for obj in root.findall('track'): 190 | if obj.get('label') == 'pedestrian': 191 | # get the bbox labeled as 'pedestrian' 192 | for box in obj.findall('box'): 193 | if box.get('frame') == frame.lstrip('0'): 194 | for attribute in box.iter('attribute'): 195 | if attribute.get('name') == 'ID': 196 | # if the bbox pedID same as the feature extracted before 197 | if attribute.text == pedID: 198 | box = [float(box.get('xtl')), 199 | float(box.get('ytl')), 200 | float(box.get('xbr')), 201 | float(box.get('ybr'))] 202 | x1,y1,x2,y2 = box 203 | if (x2 - x1) < 1 or (y2 - y1) < 1: 204 | print(video_name, pedID, box) 205 | 206 | bbox.append(box) 207 | # Each frame will only have one specific pedestrian box, so concatenate as list 208 | db[video_name][pedID]['bbox'] = bbox 209 | 210 | except: 211 | print(f'Could not find {video_name} in database.') 212 | 213 | return db 214 | 215 | bbox_database = get_bbox(data_root, args, copy.deepcopy(database), pid) 216 | 217 | 218 | # video_name = 'video_' + str(83).zfill(4) 219 | 220 | # cols = ['video_time', 'ped_intention_cat', 'user_id', 'ped_reasoning'] 221 | # int_df = pd.read_csv(os.path.join(args['nlp_path'], video_name, 'intentSegmentation.csv'), usecols=cols) 222 | 223 | 224 | ''' 225 | This function get crossing intention of each pedestrians 226 | ''' 227 | def get_intention(root_dir, args, db, df): 228 | total = 0 229 | int_count = [0, 0, 0] 230 | for index, row in df.iterrows(): # For each ped_id & vid_id 231 | # if row['video_id'] != 2: 232 | # continue 233 | video_name = 'video_' + str(row["video_id"]).zfill(4) 234 | pedID = row["ID"] 235 | cols = ['video_time', 'ped_intention_cat', 'user_id', 'ped_reasoning']#'reasoning_labeled'] 236 | # int_df = pd.read_csv(os.path.join(args['nlp_path'], video_name, 'intentSegmentation_' + video_name[6:] + '_labeled.csv'), usecols=cols) 237 | int_df = pd.read_csv(os.path.join(args['nlp_path'], video_name, 'intentSegmentation.csv'), usecols=cols) 238 | 239 | # for each frame with annotations 240 | for row_id, row in int_df.iterrows(): 241 | #conver seconds to frames 242 | time = row['video_time'] 243 | int_df.at[row_id,'video_time'] = math.trunc(time * 30) # change time to frame # 244 | #convert text to numerical class 245 | intention = row['ped_intention_cat'] 246 | if intention == 'not_cross': 247 | int_df.at[row_id,'ped_intention_cat'] = 0 248 | elif intention == 'not_sure': 249 | int_df.at[row_id,'ped_intention_cat'] = 0.5 250 | elif intention == 'cross': 251 | int_df.at[row_id,'ped_intention_cat'] = 1 252 | int_df['video_time'] = int_df['video_time'].astype(int) # already changed to frame # 253 | #re-arrange dataframe so each column is a different user 254 | int_df = int_df.drop_duplicates(subset = ['video_time', 'user_id'], keep = 'last') 255 | ori_int_df = copy.deepcopy(int_df) 256 | 257 | isna = int_df['ped_reasoning'].isna() 258 | print(int_df['ped_intention_cat'].isna().sum(), " nan intention cat | ", isna.sum(), " nan reasoning labels") 259 | 260 | # print(int_df.shape) 261 | time_intent_map = int_df.pivot(index = 'video_time', columns='user_id', values = 'ped_intention_cat') 262 | 263 | start_frame, end_frame = time_intent_map.index[0], time_intent_map.index[-1] 264 | print("Start_frame: ", start_frame, " End frame: ", end_frame) 265 | total += 450 - start_frame + 1 #end_frame - start_frame + 1 266 | 267 | # time_intent_map = time_intent_map.reindex(list(range(0,451)),fill_value=np.nan).iloc[start_frame: end_frame+1, :] 268 | 269 | # Note: here all last frames are annotated with the last intention label, and they will have all reasons as 0s 270 | time_intent_map = time_intent_map.reindex(list(range(0,451)),fill_value=np.nan).iloc[start_frame: , :] 271 | 272 | time_intent_map.fillna(method = 'ffill', inplace=True) 273 | 274 | print(time_intent_map.isna().sum().sum(), " -1 are added.") 275 | 276 | time_intent_map.fillna(-1.0, inplace=True) 277 | # Scott: '-1' means this kind of labels should be ignored! 278 | 279 | # int_df['avg'] = int_df.mean(axis = 1) 280 | # print(int_df['avg'].values[100:]) 281 | # Scott: those filled with -1.0 values shouldn't be used. 282 | frame_length = time_intent_map.shape[0] 283 | major_intention = [-1] * frame_length 284 | mean_intention = [-1] * frame_length 285 | original_intention = [] 286 | disagree_score = [-1] * frame_length 287 | valid_disagree_score = [-1] * frame_length 288 | for i in range(frame_length): 289 | frame_id = start_frame + i 290 | # if frame_id != 60: 291 | # continue 292 | cur_frame_int = time_intent_map.values[i, :] # may contain -1, which should be ignored 293 | original_intention.append(cur_frame_int) 294 | int_lbl, votes = np.unique(cur_frame_int, return_counts=True) 295 | # print(int_lbl, votes) 296 | total_valid_votes = 0 297 | 298 | #************************************************** 299 | # Store the voted rates for 3 intention categories 300 | temp_int = [0, 0, 0] 301 | max_vote = 0 302 | for j in range(len(int_lbl)): # unique intent lbl list 303 | if int_lbl[j] == -1: 304 | continue 305 | else: 306 | if int_lbl[j] == 0.0: 307 | cur_int = 0 308 | elif int_lbl[j] == 0.5: 309 | cur_int = 1 310 | elif int_lbl[j] == 1.0: 311 | cur_int = 2 312 | else: 313 | raise Exception("Error int_lbl[j]") 314 | int_count[cur_int] += 1 315 | 316 | cur_vot = votes[j] # number of cur int votes 317 | total_valid_votes += votes[j] 318 | # print(cur_int, cur_vot, type(cur_int), type(cur_vot)) 319 | temp_int[cur_int] = cur_vot 320 | if cur_vot > max_vote: 321 | max_vote = cur_vot 322 | else: 323 | continue 324 | disagree_score[i] = 1 - max_vote / 24 325 | valid_disagree_score[i] = 1 - max_vote / total_valid_votes 326 | major_intention[i] = [temp_int[k] / total_valid_votes for k in range(3)] 327 | # major_intention[i] is 3 dimension list 328 | 329 | # Get mean intention votes 330 | temp_sum = 0 331 | temp_cnt = 0 332 | for j in range(len(int_lbl)): 333 | if int_lbl[j] == -1: 334 | continue 335 | else: 336 | temp_sum += int_lbl[j] * votes[j] 337 | temp_cnt += votes[j] 338 | assert temp_cnt == total_valid_votes 339 | assert temp_cnt > 0 340 | mean_intention[i] = temp_sum / temp_cnt 341 | # mean intention of one float in [0, 1] 342 | # print("temp sum: ", temp_sum) 343 | # print("mean intent: ", mean_intention[i], temp_cnt) 344 | 345 | # print("major intent: ", major_intention) 346 | # print("disagree score: ", disagree_score) 347 | # print("mean intent: ", mean_intention) 348 | 349 | try: 350 | db[video_name][pedID]['major_intention'] = major_intention 351 | db[video_name][pedID]['mean_intention'] = mean_intention 352 | db[video_name][pedID]['original_intention'] = original_intention 353 | db[video_name][pedID]['disagree_score'] = disagree_score 354 | db[video_name][pedID]['valid_disagree_score'] = valid_disagree_score 355 | db[video_name][pedID]['labeled_frames'] = time_intent_map.index.tolist() 356 | print("Ped appear frames: ", db[video_name][pedID]['frames'][0], " -- ", db[video_name][pedID]['frames'][-1]) 357 | print("Labeled frames: ", db[video_name][pedID]['labeled_frames'][0], ' -- ', db[video_name][pedID]['labeled_frames'][-1]) 358 | except: 359 | print(f'{video_name} not part of dataset.') 360 | 361 | 362 | # # Reason feats -------------------------- 363 | print("----- reason ------") 364 | time_rsn_map = ori_int_df.pivot(index = 'video_time', columns='user_id', values = 'ped_reasoning') 365 | start_frame, end_frame = time_rsn_map.index[0], time_rsn_map.index[-1] 366 | print("Start_frame: ", start_frame, " End frame: ", end_frame) 367 | 368 | 369 | # Note: last frames reasons are fill with 0s 370 | time_rsn_map = time_rsn_map.reindex(list(range(0,451)),fill_value=np.nan).iloc[start_frame: , :] 371 | 372 | time_rsn_map.fillna(method = 'bfill', inplace=True) 373 | print(time_rsn_map.isna().sum().sum(), " -1 are added.") 374 | 375 | time_rsn_map.fillna(-1.0, inplace=True) 376 | 377 | original_reason = [] 378 | reason_feats = [] 379 | for vtime, feats in time_rsn_map.iterrows(): # only labeled frames 380 | # vtime_sum_feats = [0] * 62 381 | vtime_ori_rsn = [] 382 | for uid in time_rsn_map.columns: # wr columns 383 | vtime_ori_rsn.append(feats[uid]) 384 | 385 | if feats[uid] == -1: 386 | vtime_ori_rsn.append(-1) 387 | # uid_rsn = [0 for _ in range(62)] 388 | # assert len(vtime_sum_feats) == len(uid_rsn) 389 | # vtime_sum_feats = [a+b for a,b in zip(vtime_sum_feats, uid_rsn)] 390 | else: 391 | vtime_ori_rsn.append(feats[uid]) 392 | # uid_rsn = [int(i) for i in feats[uid][1:-1].split(",")] 393 | # assert len(vtime_sum_feats) == len(uid_rsn) 394 | # vtime_sum_feats = [a+b for a,b in zip(vtime_sum_feats, uid_rsn)] 395 | # reason_feats.append(vtime_sum_feats) 396 | original_reason.append(vtime_ori_rsn) 397 | try: 398 | db[video_name][pedID]['original_reason'] = original_reason 399 | db[video_name][pedID]['reason_feats'] = reason_feats 400 | assert len(db[video_name][pedID]['reason_feats']) == len(db[video_name][pedID]['labeled_frames']) 401 | except: 402 | print(f'{video_name} not part of dataset.') 403 | print("Intention count: ", int_count, " | total=", total) 404 | return db 405 | 406 | 407 | 408 | intent_database = get_intention(data_root, args, copy.deepcopy(bbox_database), pid) 409 | # , intention, reason 410 | 411 | 412 | print(len(intent_database['video_0001']['139_MC']['original_reason'])) 413 | print(intent_database['video_0001']['139_MC']['original_reason'][-1]) 414 | 415 | 416 | print(len(intent_database['video_0027']['150_MC']['bbox'])) 417 | intent_database['video_0027']['150_MC']['bbox'][-5:] 418 | 419 | 420 | 421 | ''' 422 | Only keep the intention labels corresponding to each pedestrian, instead of all pedestrianID 423 | takes all frames intention labels 424 | Notice: Such operation will avoid frames no Pedestrian appears! 425 | Notice: Also should slice the reaoning/description features 426 | ''' 427 | def slice_intention(db): 428 | for video, value1 in db.items(): 429 | for pedID, value2 in db[video].items(): 430 | # print(video, pedID) 431 | db[video][pedID]['frames'] = [int(f) for f in db[video][pedID]['frames']] 432 | frames = db[video][pedID]['frames'] # original cv annotated frames 433 | labeled_frames = db[video][pedID]['labeled_frames'] # frames with intention labels 434 | frame_min, frame_max = int(min(frames)), int(max(frames)) 435 | labeled_min, labeled_max = int(min(labeled_frames)), int(max(labeled_frames)) 436 | 437 | # print(frame_min, frame_max) 438 | # print(labeled_min, labeled_max) 439 | 440 | # print(frames) 441 | # print(labeled_frames) 442 | max_start = max(frame_min, labeled_min) 443 | min_end = min(frame_max, labeled_max) 444 | try: 445 | frame_start_idx, frame_end_idx = frames.index(max_start), frames.index(min_end) 446 | labeled_start_idx, labeled_end_idx = labeled_frames.index(max_start), labeled_frames.index(min_end) 447 | 448 | except: 449 | print("No element in the list.", video, pedID, min_end - max_start) 450 | print("!!! Skip the cut of ", video, "!!!") 451 | continue 452 | # print(frames) 453 | # print(labeled_frames) 454 | # 1. frames, bbox 455 | db[video][pedID]['frames'] = db[video][pedID]['frames'][frame_start_idx: frame_end_idx+1] 456 | db[video][pedID]['bbox'] = db[video][pedID]['bbox'][frame_start_idx: frame_end_idx+1] 457 | 458 | # original_reason, original_intention 459 | db[video][pedID]['mean_intention'] = db[video][pedID]['mean_intention'][labeled_start_idx: labeled_end_idx+1] 460 | db[video][pedID]['major_intention'] = db[video][pedID]['major_intention'][labeled_start_idx: labeled_end_idx+1] 461 | db[video][pedID]['disagree_score'] = db[video][pedID]['disagree_score'][labeled_start_idx: labeled_end_idx+1] 462 | db[video][pedID]['valid_disagree_score'] = db[video][pedID]['valid_disagree_score'][labeled_start_idx: labeled_end_idx+1] 463 | 464 | db[video][pedID]['labeled_frames'] = db[video][pedID]['labeled_frames'][labeled_start_idx: labeled_end_idx+1] 465 | db[video][pedID]['reason_feats'] = []#db[video][pedID]['reason_feats'][labeled_start_idx: labeled_end_idx+1] 466 | db[video][pedID]['original_reason'] = db[video][pedID]['original_reason'][labeled_start_idx: labeled_end_idx+1] 467 | db[video][pedID]['original_intention'] = db[video][pedID]['original_intention'][labeled_start_idx: labeled_end_idx+1] 468 | 469 | if len(db[video][pedID]['frames']) != len(db[video][pedID]['labeled_frames']): 470 | print("Different frames v.s. labeled frames: ", video, pedID) 471 | print(len(db[video][pedID]['frames']), len(db[video][pedID]['bbox']), 472 | len(db[video][pedID]['mean_intention']),len(db[video][pedID]['major_intention']), 473 | len(db[video][pedID]['disagree_score']), len(db[video][pedID]['labeled_frames']), 474 | len(db[video][pedID]['reason_feats']), len(db[video][pedID]['original_reason']), 475 | len(db[video][pedID]['original_intention'])) 476 | return db 477 | 478 | 479 | sliced_database = slice_intention(copy.deepcopy(intent_database)) 480 | 481 | 482 | 483 | i = 0 484 | j = 0 485 | for v in sliced_database.keys(): 486 | for p in sliced_database[v].keys(): 487 | sample = sliced_database[v][p] 488 | # for reason in sample['reason_feats']: 489 | # if len(reason) == 0: 490 | # i += 1 491 | 492 | j += 1 493 | print("reason feats: ", i, j) 494 | 495 | i = 0 496 | j = 0 497 | for v in sliced_database.keys(): 498 | for p in sliced_database[v].keys(): 499 | sample = sliced_database[v][p] 500 | for intent in sample['major_intention']: 501 | if intent == -1: 502 | 503 | i += 1 504 | 505 | j += 1 506 | print("intent: ", i, j) 507 | 508 | 509 | 510 | def check_missing(db): 511 | for video, value1 in db.items(): 512 | for pedID, value2 in db[video].items(): 513 | if len(db[video][pedID]['frames']) != len(db[video][pedID]['labeled_frames']): 514 | print("Different frames v.s. labeled frames: ", video, pedID) 515 | print(len(db[video][pedID]['frames']), len(db[video][pedID]['bbox']), 516 | len(db[video][pedID]['mean_intention']),len(db[video][pedID]['major_intention']), 517 | len(db[video][pedID]['disagree_score']), len(db[video][pedID]['labeled_frames']), 518 | # len(db[video][pedID]['reason_feats']), len(db[video][pedID]['original_reason']), 519 | len(db[video][pedID]['original_intention'])) 520 | print("Frame start&end: ", db[video][pedID]['frames'][0], db[video][pedID]['frames'][-1]) 521 | print("labeled_frames start&end: ", db[video][pedID]['labeled_frames'][0], db[video][pedID]['labeled_frames'][-1]) 522 | missing_frames = [] 523 | for l in db[video][pedID]['labeled_frames']: 524 | if l not in db[video][pedID]['frames']: 525 | missing_frames.append(l) 526 | print("Missing frames: ", missing_frames) 527 | if missing_frames[-1] - missing_frames[0] + 1 == len(missing_frames): 528 | # only one missing piece, remove intentions labels 529 | print("Missing one range: ", missing_frames[0], " - ", missing_frames[-1]) 530 | missing_pieces = [missing_frames[0],missing_frames[-1]] 531 | else: 532 | # multiple missing pieces, find them 533 | missing_pieces = [] 534 | start = -1 535 | for f in range(len(missing_frames)-1): 536 | if start == -1: 537 | start = missing_frames[f] 538 | if missing_frames[f] + 1 == missing_frames[f+1]: 539 | if f + 1 == len(missing_frames) - 1: 540 | missing_pieces.append([start, missing_frames[f+1]]) 541 | continue 542 | else: 543 | # current f is the end of current piece 544 | missing_pieces.append([start, missing_frames[f]]) 545 | start = -1 546 | print("Splited missing pieces: ", missing_pieces) 547 | 548 | print("--------------------------------------------") 549 | else: 550 | if len(db[video][pedID]['frames']) != len(db[video][pedID]['bbox']): 551 | print("Different bbox length!", video) 552 | print(db[video][pedID]['frames'], db[video][pedID]['bbox'], db[video][pedID]['labeled_frames']) 553 | else: 554 | print("All lengths are the same! ", video) 555 | no_missing = True 556 | for f in db[video][pedID]['frames']: 557 | if f not in db[video][pedID]['labeled_frames']: 558 | print("frames ", f, " not in labeled_frames") 559 | no_missing = False 560 | for l in db[video][pedID]['labeled_frames']: 561 | if l not in db[video][pedID]['frames']: 562 | print("labeled_frames ", l, " not in frames") 563 | no_missing = False 564 | if no_missing: 565 | print("No missing frames! ") 566 | 567 | 568 | def remove_missing_intention(db): 569 | for video, value1 in db.items(): 570 | for pedID, value2 in db[video].items(): 571 | if len(db[video][pedID]['frames']) != len(db[video][pedID]['labeled_frames']) or len(db[video][pedID]['frames']) != len(db[video][pedID]['major_intention']) or len(db[video][pedID]['major_intention']) != len(db[video][pedID]['labeled_frames']): 572 | print("Different frames v.s. labeled frames: ", video, pedID) 573 | print(len(db[video][pedID]['frames']), len(db[video][pedID]['bbox']), 574 | len(db[video][pedID]['mean_intention']),len(db[video][pedID]['major_intention']), 575 | len(db[video][pedID]['disagree_score']), len(db[video][pedID]['valid_disagree_score']), 576 | len(db[video][pedID]['labeled_frames']), 577 | # len(db[video][pedID]['reason_feats']), len(db[video][pedID]['original_reason']), 578 | len(db[video][pedID]['original_intention'])) 579 | print("Frame start&end: ", db[video][pedID]['frames'][0], db[video][pedID]['frames'][-1]) 580 | print("labeled_frames start&end: ", db[video][pedID]['labeled_frames'][0], db[video][pedID]['labeled_frames'][-1]) 581 | missing_frames = [] 582 | for l in db[video][pedID]['labeled_frames']: 583 | if l not in db[video][pedID]['frames']: 584 | missing_frames.append(l) 585 | print("Missing frames: ", missing_frames) 586 | if missing_frames[-1] - missing_frames[0] + 1 == len(missing_frames): 587 | # only one missing piece, remove intentions labels 588 | print("Missing one range: ", missing_frames[0], " - ", missing_frames[-1]) 589 | missing_pieces = [[missing_frames[0],missing_frames[-1]]] 590 | else: 591 | # multiple missing pieces, find them 592 | missing_pieces = [] 593 | start = -1 594 | for f in range(len(missing_frames)-1): 595 | if start == -1: 596 | start = missing_frames[f] 597 | if missing_frames[f] + 1 == missing_frames[f+1]: 598 | if f + 1 == len(missing_frames) - 1: 599 | missing_pieces.append([start, missing_frames[f+1]]) 600 | continue 601 | else: 602 | # current f is the end of current piece 603 | missing_pieces.append([start, missing_frames[f]]) 604 | start = -1 605 | print("Splited missing pieces: ", missing_pieces) 606 | 607 | # remove missing frames (intention labels) 608 | for piece in missing_pieces: 609 | missing_start = db[video][pedID]['labeled_frames'].index(piece[0]) 610 | missing_end = db[video][pedID]['labeled_frames'].index(piece[1]) 611 | 612 | # original_reason, original_intention 613 | del db[video][pedID]['mean_intention'][missing_start: missing_end+1] 614 | del db[video][pedID]['major_intention'][missing_start: missing_end+1] 615 | del db[video][pedID]['disagree_score'][missing_start: missing_end+1] 616 | del db[video][pedID]['valid_disagree_score'][missing_start: missing_end+1] 617 | 618 | del db[video][pedID]['labeled_frames'][missing_start: missing_end+1] 619 | # del db[video][pedID]['reason_feats'][missing_start: missing_end+1] 620 | del db[video][pedID]['original_reason'][missing_start: missing_end+1] 621 | del db[video][pedID]['original_intention'][missing_start: missing_end+1] 622 | 623 | print("--------------------------------------------") 624 | else: 625 | print("Same frames and labels: ", video, pedID) 626 | if len(db[video][pedID]['frames']) != len(db[video][pedID]['bbox']): 627 | print("missing bbox ", len(db[video][pedID]['frames']) - len(db[video][pedID]['bbox'])) 628 | db[video][pedID]['bbox'].append(db[video][pedID]['bbox'][-1]) 629 | if len(db[video][pedID]['frames']) - len(db[video][pedID]['bbox']) > 1: 630 | print("Missing more than 1 bbox annotation! ") 631 | for f in db[video][pedID]['frames']: 632 | if f not in db[video][pedID]['labeled_frames']: 633 | print("frames ", f, " not in labeled_frames") 634 | 635 | for l in db[video][pedID]['labeled_frames']: 636 | if l not in db[video][pedID]['frames']: 637 | print("labeled_frames ", l, " not in frames") 638 | print("================================================") 639 | 640 | return db 641 | 642 | 643 | print(len(sliced_database['video_0083']['1_MC']['major_intention']), len(sliced_database['video_0083']['1_MC']['bbox'])) 644 | 645 | 646 | 647 | missing_database = copy.deepcopy(sliced_database) 648 | del missing_database['video_0003'] 649 | del missing_database['video_0028'] 650 | 651 | 652 | 653 | removed_missing_database = remove_missing_intention(missing_database) 654 | 655 | 656 | 657 | check_missing(removed_missing_database) 658 | 659 | 660 | 661 | uni_db = copy.deepcopy(removed_missing_database) 662 | 663 | 664 | 665 | for v in uni_db.keys(): 666 | for p in uni_db[v].keys(): 667 | sample = uni_db[v][p] 668 | if not (len(sample['frames']) == len(sample['major_intention']) == len(sample['bbox'])): 669 | # == len(sample['reason_feats'])): 670 | print(v, p, len(sample['frames']), len(sample['major_intention']), len(sample['bbox']) 671 | , len(sample['reason_feats'])) 672 | 673 | 674 | 675 | 676 | for k in uni_db['video_0023']['6_MC'].keys(): 677 | if uni_db['video_0023']['6_MC'][k]: 678 | print(k, len(uni_db['video_0023']['6_MC'][k])) 679 | 680 | 681 | 682 | 683 | for v in uni_db.keys(): 684 | for p in uni_db[v].keys(): 685 | sample = uni_db[v][p] 686 | if not (len(sample['frames']) == len(sample['major_intention']) == len(sample['bbox'])): 687 | # == len(sample['reason_feats'])): 688 | print(v, p, len(sample['frames']), len(sample['major_intention']), len(sample['bbox']) 689 | , len(sample['reason_feats'])) 690 | uni_db[v][p]['bbox'].append(uni_db[v][p]['bbox'][-1]) 691 | 692 | 693 | 694 | 695 | for v in uni_db.keys(): 696 | for p in uni_db[v].keys(): 697 | sample = uni_db[v][p] 698 | if not (len(sample['frames']) == len(sample['major_intention']) == len(sample['bbox'])): 699 | # == len(sample['reason_feats'])): 700 | print(v, p, len(sample['frames']), len(sample['major_intention']), len(sample['bbox']) 701 | , len(sample['reason_feats'])) 702 | 703 | 704 | database_name = 'database_' + time.strftime("%d%b%Y-%Hh%Mm%Ss") + '.pkl' 705 | if not os.path.exists(os.path.join(args['save_path'])): 706 | os.makedirs(os.path.join(args['save_path'])) 707 | with open(os.path.join(args['save_path'], database_name), 'wb') as fid: 708 | pickle.dump(uni_db, fid) 709 | 710 | 711 | 712 | overlap_db = copy.deepcopy(uni_db) 713 | 714 | int_reason_overlap = True 715 | if int_reason_overlap: 716 | for v in overlap_db.keys(): 717 | for p in overlap_db[v].keys(): 718 | sample = overlap_db[v][p] 719 | # print([(k, len(sample[k])) for k in sample.keys()]) 720 | print(v, p, len(sample['frames']), len(sample['major_intention']), len(sample['bbox']), len(sample['original_reason'])) 721 | mis_match_list = [] 722 | for i in range(len(sample['frames'])): 723 | if sum([1 if r==-1 else 0 for r in sample['original_reason'][i]]) == len(sample['original_reason'][i]): 724 | # print(i, 'ori_rsn empty: ', sample['original_reason'][i]) 725 | mis_match_list.append(i) 726 | # remove mis-match frames intention labels, because intention labels are always longer than reason, till the end of video 727 | 728 | if len(mis_match_list) > 0: 729 | del overlap_db[v][p]['frames'][mis_match_list[0]: mis_match_list[-1]+1] 730 | del overlap_db[v][p]['bbox'][mis_match_list[0]: mis_match_list[-1]+1] 731 | del overlap_db[v][p]['mean_intention'][mis_match_list[0]: mis_match_list[-1]+1] 732 | del overlap_db[v][p]['major_intention'][mis_match_list[0]: mis_match_list[-1]+1] 733 | del overlap_db[v][p]['disagree_score'][mis_match_list[0]: mis_match_list[-1]+1] 734 | del overlap_db[v][p]['valid_disagree_score'][mis_match_list[0]: mis_match_list[-1]+1] 735 | del overlap_db[v][p]['labeled_frames'][mis_match_list[0]: mis_match_list[-1]+1] 736 | # del db[video][pedID]['reason_feats'][missing_start: missing_end+1] 737 | del overlap_db[v][p]['original_reason'][mis_match_list[0]: mis_match_list[-1]+1] 738 | del overlap_db[v][p]['original_intention'][mis_match_list[0]: mis_match_list[-1]+1] 739 | print("Removed mismatch: ", v, p, len(sample['frames']), len(sample['major_intention']), len(sample['bbox']), len(sample['original_reason'])) 740 | # print([(k, len(sample[k])) for k in sample.keys()]) 741 | 742 | 743 | 744 | database_name = 'database_' + time.strftime("%d%b%Y-%Hh%Mm%Ss") + '_overlap.pkl' 745 | if not os.path.exists(os.path.join(args['save_path'])): 746 | os.makedirs(os.path.join(args['save_path'])) 747 | with open(os.path.join(args['save_path'], database_name), 'wb') as fid: 748 | pickle.dump(overlap_db, fid) 749 | 750 | 751 | 752 | --------------------------------------------------------------------------------