├── additional_info
    ├── pedID.xlsx
    └── video_name_mapping.xlsx
├── split_clips_to_frames.py
├── reorganize_annotations.py
├── README.md
└── pedestrian_intention_database_processing.py


/additional_info/pedID.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PSI-Intention2022/PSI-Intention/HEAD/additional_info/pedID.xlsx


--------------------------------------------------------------------------------
/additional_info/video_name_mapping.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PSI-Intention2022/PSI-Intention/HEAD/additional_info/video_name_mapping.xlsx


--------------------------------------------------------------------------------
/split_clips_to_frames.py:
--------------------------------------------------------------------------------
 1 | '''Given video path, extract frames for all videos. Check if frames exist first.'''
 2 | 
 3 | import os
 4 | import argparse
 5 | from pathlib import Path
 6 | import cv2
 7 | from tqdm import tqdm
 8 | 
 9 | video_path = './PSI_Intention/Dataset/RawVideos'
10 | frames_path = './PSI_Intention/Dataset/frames'
11 | 
12 | #create 'data/frames' folder
13 | if not os.path.exists(frames_path):
14 |     os.makedirs(frames_path)
15 |     print("Created 'frames' folder.")
16 |     
17 | for video in tqdm(os.listdir(video_path)):
18 |     name = "video" + video[7:12]
19 |     video_target = os.path.join(video_path, video)
20 |     frames_target = os.path.join(frames_path, name)
21 | 
22 |     if not os.path.exists(frames_target):
23 |         os.makedirs(frames_target)
24 |         print(f'Created frames folder for video {name}')
25 | 
26 |     try:
27 |         vidcap = cv2.VideoCapture(video_target)
28 |         if not vidcap.isOpened():
29 |             raise Exception(f'Cannot open file {video_target}')
30 |     except Exception as e:
31 |         raise e
32 | 
33 |     cur_frame = 0
34 |     while(True):
35 |         success, frame = vidcap.read()
36 |         if success:
37 |             frame_num = str(cur_frame).zfill(3)
38 |             cv2.imwrite(os.path.join(frames_target, f'{frame_num}.jpg'), frame)
39 |         else:
40 |             break
41 |         cur_frame += 1
42 |     vidcap.release()


--------------------------------------------------------------------------------
/reorganize_annotations.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | from pathlib import Path
 4 | import glob
 5 | import pandas as pd
 6 | import shutil
 7 | 
 8 | video_path = './PSI_Intention/Dataset/RawVideos'
 9 | frames_path = './PSI_Intention/Dataset/frames'
10 | xml_path = './PSI_Intention/Dataset/XmlFiles'
11 | 
12 | nlp_annotation_path = './PSI_Intention/Dataset/nlp_annotations'
13 | cv_annotation_path = './PSI_Intention/Dataset/cv_annotations'
14 | 
15 | #create 'data/cv_annotations' folder
16 | if not os.path.exists(cv_annotation_path):
17 |     os.makedirs(cv_annotation_path)
18 |     print("Created 'cv_annotations' folder.")
19 |     
20 |     
21 | #create 'data/nlp_annotation_path' folder
22 | if not os.path.exists(nlp_annotation_path):
23 |     os.makedirs(nlp_annotation_path)
24 |     print("Created 'nlp_annotation' folder.")
25 |     
26 | # re-organize cv annots
27 | for video_file in os.listdir(frames_path):
28 |     video_num = video_file.split('_')[1]
29 |     if not os.path.exists(os.path.join(cv_annotation_path, video_file)):
30 |         os.mkdir(os.path.join(cv_annotation_path, video_file))
31 |         
32 |     src = os.path.join(xml_path, video_num + '.xml')
33 |     dst = os.path.join(cv_annotation_path, video_file, 'annotations.xml')
34 |     try:
35 |         shutil.copyfile(src, dst)
36 |     except:
37 |         print("Failed copying {} to {}".format(src, dst))
38 | print("WARNING: video_0060 and video_0093 cv_annotations are missing. These two samples are abandoned.")
39 | 
40 | # re-organize cv annots
41 | df = pd.read_excel('./PSI_Intention/Dataset/IntentAnnotations.xlsx')
42 | for video_file in os.listdir(frames_path):
43 |     video_num = video_file.split('_')[1]
44 |     if not os.path.exists(os.path.join(nlp_annotation_path, video_file)):
45 |         os.mkdir(os.path.join(nlp_annotation_path, video_file))
46 |     
47 |     try:
48 |         sub_df = df[df['video_id'] == int(video_num)]
49 |         dst = os.path.join(nlp_annotation_path, video_file, 'intentSegmentation.csv')
50 |         sub_df.to_csv(dst, index=None, header=True)
51 |     except:
52 |         print("Failed create nlp annotations {}".format(dst))


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # News
 2 | - 20230910: All PSI data including videos, CV annotations, and Cognitive Annotation (PSI1.0 & PSI 2.0) are public for download and future exploration! ~~[[Google Drive]()]~~ [[PSI Homepage](http://pedestriandataset.situated-intent.net)] :sparkler::zap:
 3 | - 20230607 - :bangbang: We are hosting a competition about pedestrian behavior prediction, please check details and participate via [[IEEE ITSS PSI Student Competition](https://psi-intention2022.github.io)]
 4 | - 20230607 - The new [[PSI 2.0 benchmark (Training & Validation & Test)](https://github.com/PSI-Intention2022/PSI-Dataset)] is released with more video samples and diverse multimodal interpretable annotations for pedestrian intent and driving decision!
 5 | 
 6 | ---
 7 | ---
 8 | :exclamation: This repo is **deprecated**. Please check our new [PSI Dataset](https://github.com/PSI-Intention2022/PSI-Dataset) and the [[IEEE ITSS PSI Student Competition in Pedestrian Behavior Prediction](https://psi-intention2022.github.io)]. :exclamation: 
 9 | ---
10 | # IUPUI-CSRC Pedestrian Situated Intent (PSI) Dataset 
11 | This repository contains IUPUI-CSRC Pedestrian Situated Intent (PSI) Dataset pre-processing and baseline.
12 | 
13 | For more situated intent data and work, please see [Situated Intent](http://situated-intent.net)!
14 | 
15 | ## Download dataset and extract
16 | Download the dataset from [link](http://situated-intent.net/pedestrian_dataset/), then extract via
17 | 
18 | ```command
19 | unzip Dataset.zip
20 | ```
21 | 
22 | Output: 
23 | 
24 | ```command
25 | Archive:  Dataset.zip
26 | creating: PSI_Intention/Dataset/ 
27 | inflating: PSI_Intention/Dataset/VideoWithIndicator.zip  
28 | inflating: PSI_Intention/Dataset/RawVideos.zip  
29 | inflating: PSI_Intention/Dataset/README.txt  
30 | inflating: PSI_Intention/Dataset/IntentAnnotations.xlsx
31 | inflating: PSI_Intention/Dataset/XmlFiles.zip 
32 | ```
33 | Extract videos and spatial annotations:
34 | ```command
35 | unzip ./PSI_Intention/Dataset/RawVideos.zip -d ./PSI_Intention/Dataset
36 | unzip ./PSI_Intention/Dataset/XmlFiles.zip -d ./PSI_Intention/Dataset
37 | ```
38 | 
39 | ## Video to frames
40 | ```python
41 | python split_clips_to_frames.py
42 | ```
43 | The splited frames are organized as, e.g.,
44 | ```
45 | frames{
46 |     video_0001{
47 |         000.jpg,
48 |         001.jpg,
49 |         ...
50 |     }
51 | }
52 | ```
53 | ## CV_annotations and NLP_annotations re-organize
54 | ```python
55 | python reorganize_annotations.py
56 | ```
57 | *Note*: video_0060 and video_0093 are removed due to the missing of spatial segmentation annotations.
58 | 
59 | ## Create database with frames labeled
60 | ```python
61 | python pedestrian_intention_database_processing.py
62 | ```
63 | Output: 
64 | 
65 | - **database_*.pkl**: The annotaions of reasoning and intention do not exactly match, i.e., the last several frames only have intention annotations without reasoning, because the reasoning is only for the previous time period before the last annotated time point, while the intention annotation lasts till the end of the video. 
66 | - **database_*_overlap**.pkl: By removing the last frames only with intention labels, the length of annotated reasoning and intention frames are Equal now.
67 | 
68 | ## Train/Val/Test split
69 | 
70 | - train: [1 ~ 75]
71 | - val: [76 ~ 80]
72 | - test: [81 ~ 110]
73 | 
74 | *Note*: Due to the missing of spatial segmentation annotations, video_0060 and video_0093 are removed. Besides, video_0003 and video_0028 are ignored as the annotated frame sequences are too short.
75 | 
76 | In our PSI paper experiments, the observed tracks length is 15, while predicting the 16-th frame intention. The overlap rate is set as 0.8 for both train and test stages. 
77 | 
78 | # Citing
79 | ```
80 | @article{chen2021psi,
81 | title   = {PSI: A Pedestrian Behavior Dataset for Socially Intelligent Autonomous Car},
82 | author  = {Chen, Tina and Tian, Renran and Chen, Yaobin and Domeyer, Joshua and Toyoda, Heishiro and Sherony, Rini and Jing, Taotao and Ding, Zhengming},
83 | journal = {arXiv preprint arXiv:2112.02604},
84 | year    = {2021} }
85 | ```


--------------------------------------------------------------------------------
/pedestrian_intention_database_processing.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | from pathlib import Path
  4 | import glob
  5 | import pandas as pd
  6 | import shutil
  7 | import numpy as np
  8 | import matplotlib.pyplot as plt
  9 | import pickle
 10 | import sys
 11 | import math
 12 | import time
 13 | import xml.etree.ElementTree as ET
 14 | import copy
 15 | from tqdm import tqdm
 16 | 
 17 | data_root = './PSI_Intention/Dataset'
 18 | database_path = './PSI_Intention/Dataset/database'
 19 | args = {}
 20 | args['annot_path'] = os.path.join(data_root, 'cv_annotations')
 21 | args['nlp_path'] = os.path.join(data_root, 'nlp_annotations')
 22 | args['frames_path'] = os.path.join(data_root, 'frames')
 23 | args['pedID_path'] = os.path.join(data_root, 'additional_info/pedID.xlsx')
 24 | args['mapping_path'] = os.path.join(data_root, 'additional_info/video_name_mapping.xlsx')
 25 | args['vf_path'] = os.path.join(data_root, 'visual_features')
 26 | args['save_path'] = database_path
 27 | 
 28 | 
 29 | '''
 30 |     This function creates a .csv file mapping the pedeistrian's ID and the video ID
 31 | '''
 32 | def get_pedID(root_dir, args):
 33 |     """creates dataframe with pedID, video name, and video ID"""
 34 |     cols = ['ID', 'NLP Annotation', 'video_name']
 35 |     pedID_df = pd.read_excel(args['pedID_path'], usecols=cols)
 36 |     #removing rows that aren't a main pedestrian
 37 |     pedID_df = pedID_df.loc[pedID_df['NLP Annotation'] != 0]
 38 |     name_df = pd.read_excel(args['mapping_path'])
 39 | 
 40 |     merged_df = pd.merge(pedID_df, name_df, on='video_name')
 41 | 
 42 |     return merged_df
 43 | pedID_df = get_pedID(root_dir=data_root, args=args)
 44 | pid = pedID_df
 45 | 
 46 | 
 47 | '''
 48 |     This function initialize the database dict based on the pedID
 49 | '''
 50 | '''
 51 |     db = {
 52 |         'video_0001': {
 53 |             '1_MC': {
 54 |                 'frames': None (lits of frame #s), pedestrians appear. 
 55 |                 'mean_intention': None (0, 0.5, 1)
 56 |                 'major_intention': None
 57 |                 'disagree_score': None # consider all total votes as 24
 58 |                 'valid_disagree_score': None # only calculate the valid votes sum
 59 |                 'bbox': None
 60 |                 'reason_feats': None
 61 |                 'description_feats': None
 62 |                 'original_intention': list of all annotators
 63 |                 'original_reason': list of all annotators
 64 |                 'labeled_frames': list of frames with labels, overlap with 'frames'
 65 |             }
 66 |         }
 67 |     }
 68 | '''
 69 | def create_db(root_dir, args, pedID_df):
 70 |     db = {}
 71 |     for index, row in pedID_df.iterrows():
 72 |         video_name = 'video_' + str(row["video_id"]).zfill(4)
 73 |         pedID = row["ID"]
 74 |         db[video_name] = {pedID: {'frames': None, 'mean_intention': None, 'major_intention': None,
 75 |                                   'disagree_score': None, 'labeled_frames': None,
 76 |                                   'bbox' : None, 'reason_feats': None,'original_reason': None,
 77 |                                   'valid_disagree_score': None,'original_intention': None}}
 78 |                                   
 79 | 
 80 |     #TODO: get cv annotations for the excluded videos
 81 |     db.pop('video_0060')
 82 |     db.pop('video_0093')
 83 |     return db
 84 | database = create_db(data_root, args, pid)
 85 | 
 86 | 
 87 | '''
 88 |     Get samples with cv annotations
 89 | '''
 90 | def load_xml(video, root_dir):
 91 |     #Loads XML file and gets bbox coordinates and creates id for each bbox in the XML file
 92 |     tree = ET.parse(os.path.join(root_dir, 'cv_annotations', video, 'annotations.xml'))
 93 |     root = tree.getroot()
 94 |     file_location = os.path.join(root_dir, 'visual_features', video)
 95 |     #finds all track nodes
 96 |     for obj in tqdm(root.findall('track')):
 97 |         #print(obj.get('label'))
 98 |         label = obj.get('label')
 99 |         #for the found track node, list out bbox attributes
100 |         for box in obj.findall('box'):
101 |             if box.get('outside') == '1':
102 |                 continue
103 |             else:
104 |                 framenum = box.get('frame')
105 |                 framenum = framenum.zfill(3)
106 |                 bbox = (float(box.get('xtl')),
107 |                         float(box.get('ytl')),
108 |                         float(box.get('xbr')),
109 |                         float(box.get('ybr'))
110 |                         )
111 |                 #Check whether 'ID' field is filled
112 |                 file_name = None
113 |                 for attribute in box.iter('attribute'):
114 |                     if attribute.get('name') == 'ID':
115 |                         #No ID
116 |                         if attribute.text == 'n/a':
117 |                             id = obj.get('id')
118 |                             file_name = video + '_' + 'f' + framenum + '_' + label + id + '.npz'
119 |                             file_location = os.path.join(root_dir, 'visual_features', video)
120 |                         #Specified ID
121 |                         else:
122 |                             id = (attribute.text)
123 |                             file_name = video + '_' + 'f' + framenum + '_' + label + id + '.npz'
124 |                             file_location = os.path.join(root_dir, 'visual_features', video)
125 | 
126 |                 if not os.path.exists(file_location):
127 |                     os.makedirs(file_location)
128 |                 if file_name:
129 |                     if not os.path.exists(os.path.join(file_location, file_name)):
130 |                         features = np.array([]) #load_process_image(args, root_dir, video, framenum, bbox, model)
131 |                         save_path = os.path.join(file_location, file_name)
132 |                         np.savez_compressed(save_path, features)
133 |                 else:
134 |                     print("No attributes found frame {}_{}".format(framenum, label))
135 |                     
136 | 
137 | if not os.path.exists(os.path.join(data_root, 'visual_features')):
138 |     for video in sorted(os.listdir(os.path.join(data_root, 'cv_annotations'))):
139 |         try:
140 |             print(f'Processing {video}.')
141 |             load_xml(video, data_root)
142 |         except:
143 |             print("Faild processing {}".format(video))
144 | else:
145 |     print("Frame lists already exist!")
146 | 
147 | 
148 | '''
149 |     This function returns the frames number list of each specific pedID appears.
150 |     Notice: This frames list is not obtained directly from xml annotations, but from the
151 |     VGG features already processed based on each bbox.
152 |     e.g., database['video_0001']['139_MC']['frames'] = [135, 136, ..., 256]
153 | '''
154 | def get_frames(root_dir, args, db, df):
155 | 
156 |     for index, row in df.iterrows():
157 |         video_name = 'video_' + str(row["video_id"]).zfill(4)
158 |         pedID = row["ID"]
159 |         vf_path = os.path.join(args['vf_path'], video_name)
160 | #         print(vf_path)
161 |         try:
162 |             vf_files = os.listdir(vf_path)
163 | #             print(vf_files)
164 |             vf_files.sort()
165 |             f = [file_name[12:15] for file_name in vf_files if file_name[(-4 - len(pedID)):-4] == pedID]
166 |             db[video_name][pedID]['frames'] = f
167 |         except:
168 |             print(f'Could not find {video_name} in database.')
169 | 
170 |     return db
171 | database = get_frames(data_root, args, database, pid)
172 | 
173 | 
174 | '''
175 |     Return the annotated pedestrians bbox list of each frame.
176 |     Notice here only take the pedestrians bbox, so each frame has 1, all sequence of bbox
177 |     has same length as the frames for each pedestrian.
178 | '''
179 | def get_bbox(root_dir, args, db, df):
180 |     for index, row in df.iterrows():
181 |         video_name = 'video_' + str(row["video_id"]).zfill(4)
182 |         pedID = row["ID"]
183 |         bbox = []
184 |         try:
185 |             tree = ET.parse(os.path.join(args['annot_path'], video_name, 'annotations.xml'))
186 |             root = tree.getroot()
187 |             for frame in db[video_name][pedID]['frames']:
188 |                 # for each frame
189 |                 for obj in root.findall('track'):
190 |                     if obj.get('label') == 'pedestrian':
191 |                         # get the bbox labeled as 'pedestrian'
192 |                         for box in obj.findall('box'):
193 |                             if box.get('frame') == frame.lstrip('0'):
194 |                                 for attribute in box.iter('attribute'):
195 |                                     if attribute.get('name') == 'ID':
196 |                                         # if the bbox pedID same as the feature extracted before
197 |                                         if attribute.text == pedID:
198 |                                             box = [float(box.get('xtl')),
199 |                                                    float(box.get('ytl')),
200 |                                                    float(box.get('xbr')),
201 |                                                    float(box.get('ybr'))]
202 |                                             x1,y1,x2,y2 = box
203 |                                             if (x2 - x1) < 1 or (y2 - y1) < 1:
204 |                                                 print(video_name, pedID, box)
205 | 
206 |                                             bbox.append(box)
207 |             # Each frame will only have one specific pedestrian box, so concatenate as list
208 |             db[video_name][pedID]['bbox'] = bbox
209 | 
210 |         except:
211 |             print(f'Could not find {video_name} in database.')
212 | 
213 |     return db
214 | 
215 | bbox_database = get_bbox(data_root, args, copy.deepcopy(database), pid)
216 | 
217 | 
218 | # video_name = 'video_' + str(83).zfill(4)
219 | 
220 | # cols = ['video_time', 'ped_intention_cat', 'user_id', 'ped_reasoning']
221 | # int_df = pd.read_csv(os.path.join(args['nlp_path'], video_name, 'intentSegmentation.csv'), usecols=cols)
222 | 
223 | 
224 | '''
225 |     This function get crossing intention of each pedestrians
226 | '''
227 | def get_intention(root_dir, args, db, df):
228 |     total = 0
229 |     int_count = [0, 0, 0]
230 |     for index, row in df.iterrows(): # For each ped_id & vid_id
231 | #         if row['video_id'] != 2:
232 | #             continue
233 |         video_name = 'video_' + str(row["video_id"]).zfill(4)
234 |         pedID = row["ID"]
235 |         cols = ['video_time', 'ped_intention_cat', 'user_id', 'ped_reasoning']#'reasoning_labeled']
236 | #         int_df = pd.read_csv(os.path.join(args['nlp_path'], video_name, 'intentSegmentation_' + video_name[6:] + '_labeled.csv'), usecols=cols)
237 |         int_df = pd.read_csv(os.path.join(args['nlp_path'], video_name, 'intentSegmentation.csv'), usecols=cols)
238 |         
239 |         # for each frame with annotations
240 |         for row_id, row in int_df.iterrows():
241 |             #conver seconds to frames
242 |             time = row['video_time']
243 |             int_df.at[row_id,'video_time'] = math.trunc(time * 30) # change time to frame #
244 |             #convert text to numerical class
245 |             intention = row['ped_intention_cat']
246 |             if intention == 'not_cross':
247 |                 int_df.at[row_id,'ped_intention_cat'] = 0
248 |             elif intention == 'not_sure':
249 |                 int_df.at[row_id,'ped_intention_cat'] = 0.5
250 |             elif intention == 'cross':
251 |                 int_df.at[row_id,'ped_intention_cat'] = 1
252 |         int_df['video_time'] = int_df['video_time'].astype(int) # already changed to frame #
253 |         #re-arrange dataframe so each column is a different user
254 |         int_df = int_df.drop_duplicates(subset = ['video_time', 'user_id'], keep = 'last')
255 |         ori_int_df = copy.deepcopy(int_df)
256 |         
257 |         isna = int_df['ped_reasoning'].isna()
258 |         print(int_df['ped_intention_cat'].isna().sum(), " nan intention cat | ", isna.sum(), " nan reasoning labels")
259 | 
260 | #         print(int_df.shape)
261 |         time_intent_map = int_df.pivot(index = 'video_time', columns='user_id', values = 'ped_intention_cat')
262 | 
263 |         start_frame, end_frame = time_intent_map.index[0], time_intent_map.index[-1]
264 |         print("Start_frame: ", start_frame, " End frame: ", end_frame)
265 |         total += 450 - start_frame + 1 #end_frame - start_frame + 1
266 |         
267 | #         time_intent_map = time_intent_map.reindex(list(range(0,451)),fill_value=np.nan).iloc[start_frame: end_frame+1, :]
268 |         
269 |         # Note: here all last frames are annotated with the last intention label, and they will have all reasons as 0s
270 |         time_intent_map = time_intent_map.reindex(list(range(0,451)),fill_value=np.nan).iloc[start_frame: , :]
271 |         
272 |         time_intent_map.fillna(method = 'ffill', inplace=True)
273 |         
274 |         print(time_intent_map.isna().sum().sum(), " -1 are added.")
275 |         
276 |         time_intent_map.fillna(-1.0, inplace=True) 
277 |         # Scott: '-1' means this kind of labels should be ignored!
278 |         
279 | #         int_df['avg'] = int_df.mean(axis = 1) 
280 | #         print(int_df['avg'].values[100:])
281 |         # Scott: those filled with -1.0 values shouldn't be used.
282 |         frame_length = time_intent_map.shape[0]
283 |         major_intention = [-1] * frame_length
284 |         mean_intention = [-1] * frame_length
285 |         original_intention = []
286 |         disagree_score = [-1] * frame_length
287 |         valid_disagree_score = [-1] * frame_length
288 |         for i in range(frame_length):
289 |             frame_id = start_frame + i
290 | #             if frame_id != 60:
291 | #                 continue
292 |             cur_frame_int = time_intent_map.values[i, :] # may contain -1, which should be ignored
293 |             original_intention.append(cur_frame_int)
294 |             int_lbl, votes = np.unique(cur_frame_int, return_counts=True)
295 | #             print(int_lbl, votes)
296 |             total_valid_votes = 0
297 | 
298 |          #**************************************************
299 |             # Store the voted rates for 3 intention categories
300 |             temp_int = [0, 0, 0]
301 |             max_vote = 0
302 |             for j in range(len(int_lbl)): # unique intent lbl list
303 |                 if int_lbl[j] == -1:
304 |                     continue
305 |                 else:
306 |                     if int_lbl[j] == 0.0:
307 |                         cur_int = 0
308 |                     elif int_lbl[j] == 0.5:
309 |                         cur_int = 1
310 |                     elif int_lbl[j] == 1.0:
311 |                         cur_int = 2
312 |                     else:
313 |                         raise Exception("Error int_lbl[j]")
314 |                     int_count[cur_int] += 1
315 |                     
316 |                     cur_vot = votes[j] # number of cur int votes
317 |                     total_valid_votes += votes[j]
318 | #                     print(cur_int, cur_vot, type(cur_int), type(cur_vot))
319 |                     temp_int[cur_int] = cur_vot
320 |                     if cur_vot > max_vote:
321 |                         max_vote = cur_vot
322 |                     else:
323 |                         continue
324 |             disagree_score[i] = 1 - max_vote / 24
325 |             valid_disagree_score[i] = 1 - max_vote / total_valid_votes
326 |             major_intention[i] = [temp_int[k] / total_valid_votes for k in range(3)]
327 |             # major_intention[i] is 3 dimension list
328 |             
329 |             # Get mean intention votes
330 |             temp_sum = 0
331 |             temp_cnt = 0
332 |             for j in range(len(int_lbl)):
333 |                 if int_lbl[j] == -1:
334 |                     continue
335 |                 else:
336 |                     temp_sum += int_lbl[j] * votes[j]
337 |                     temp_cnt += votes[j]
338 |             assert temp_cnt == total_valid_votes
339 |             assert temp_cnt > 0
340 |             mean_intention[i] = temp_sum / temp_cnt
341 |             # mean intention of one float in [0, 1]
342 | #             print("temp sum: ", temp_sum)
343 | #             print("mean intent: ", mean_intention[i], temp_cnt)    
344 |             
345 | #         print("major intent: ", major_intention)
346 | #         print("disagree score: ", disagree_score)
347 | #         print("mean intent: ", mean_intention)
348 | 
349 |         try:
350 |             db[video_name][pedID]['major_intention'] = major_intention
351 |             db[video_name][pedID]['mean_intention'] = mean_intention
352 |             db[video_name][pedID]['original_intention'] = original_intention
353 |             db[video_name][pedID]['disagree_score'] = disagree_score
354 |             db[video_name][pedID]['valid_disagree_score'] = valid_disagree_score
355 |             db[video_name][pedID]['labeled_frames'] = time_intent_map.index.tolist()
356 |             print("Ped appear frames: ", db[video_name][pedID]['frames'][0], " -- ", db[video_name][pedID]['frames'][-1])
357 |             print("Labeled frames: ", db[video_name][pedID]['labeled_frames'][0], ' -- ', db[video_name][pedID]['labeled_frames'][-1])
358 |         except:
359 |             print(f'{video_name} not part of dataset.')
360 | 
361 |             
362 | #         # Reason feats --------------------------
363 |         print("----- reason ------")
364 |         time_rsn_map = ori_int_df.pivot(index = 'video_time', columns='user_id', values = 'ped_reasoning')
365 |         start_frame, end_frame = time_rsn_map.index[0], time_rsn_map.index[-1]
366 |         print("Start_frame: ", start_frame, " End frame: ", end_frame)
367 |         
368 | 
369 |         # Note: last frames reasons are fill with 0s
370 |         time_rsn_map = time_rsn_map.reindex(list(range(0,451)),fill_value=np.nan).iloc[start_frame: , :]
371 |         
372 |         time_rsn_map.fillna(method = 'bfill', inplace=True)
373 |         print(time_rsn_map.isna().sum().sum(), " -1 are added.")
374 |         
375 |         time_rsn_map.fillna(-1.0, inplace=True) 
376 |         
377 |         original_reason = []
378 |         reason_feats = []
379 |         for vtime, feats in time_rsn_map.iterrows(): # only labeled frames
380 | #             vtime_sum_feats = [0] * 62
381 |             vtime_ori_rsn = []
382 |             for uid in time_rsn_map.columns: # wr columns
383 |                 vtime_ori_rsn.append(feats[uid])
384 |                 
385 |                 if feats[uid] == -1:
386 |                     vtime_ori_rsn.append(-1)
387 | #                     uid_rsn = [0 for _ in range(62)]
388 | #                     assert len(vtime_sum_feats) == len(uid_rsn)
389 | #                     vtime_sum_feats = [a+b for a,b in zip(vtime_sum_feats, uid_rsn)]
390 |                 else:
391 |                     vtime_ori_rsn.append(feats[uid])
392 | #                     uid_rsn = [int(i) for i in feats[uid][1:-1].split(",")]
393 | #                     assert len(vtime_sum_feats) == len(uid_rsn)
394 | #                     vtime_sum_feats = [a+b for a,b in zip(vtime_sum_feats, uid_rsn)]
395 | #             reason_feats.append(vtime_sum_feats)
396 |             original_reason.append(vtime_ori_rsn)
397 |         try:
398 |             db[video_name][pedID]['original_reason'] = original_reason
399 |             db[video_name][pedID]['reason_feats'] = reason_feats
400 |             assert len(db[video_name][pedID]['reason_feats']) == len(db[video_name][pedID]['labeled_frames'])
401 |         except:
402 |             print(f'{video_name} not part of dataset.')
403 |     print("Intention count: ", int_count, " | total=", total)
404 |     return db
405 | 
406 | 
407 | 
408 | intent_database = get_intention(data_root, args, copy.deepcopy(bbox_database), pid)
409 | # , intention, reason
410 | 
411 | 
412 | print(len(intent_database['video_0001']['139_MC']['original_reason']))
413 | print(intent_database['video_0001']['139_MC']['original_reason'][-1])
414 | 
415 | 
416 | print(len(intent_database['video_0027']['150_MC']['bbox']))
417 | intent_database['video_0027']['150_MC']['bbox'][-5:]
418 | 
419 | 
420 | 
421 | '''
422 |     Only keep the intention labels corresponding to each pedestrian, instead of all pedestrianID
423 |     takes all frames intention labels
424 |     Notice: Such operation will avoid frames no Pedestrian appears!
425 |     Notice: Also should slice the reaoning/description features
426 | '''
427 | def slice_intention(db):
428 |     for video, value1 in db.items():
429 |         for pedID, value2 in db[video].items():
430 | #             print(video, pedID)
431 |             db[video][pedID]['frames'] = [int(f) for f in db[video][pedID]['frames']]
432 |             frames = db[video][pedID]['frames'] # original cv annotated frames
433 |             labeled_frames = db[video][pedID]['labeled_frames'] # frames with intention labels
434 |             frame_min, frame_max = int(min(frames)), int(max(frames))
435 |             labeled_min, labeled_max = int(min(labeled_frames)), int(max(labeled_frames))
436 |             
437 | #             print(frame_min, frame_max)
438 | #             print(labeled_min, labeled_max)
439 |             
440 | #             print(frames)
441 | #             print(labeled_frames)
442 |             max_start = max(frame_min, labeled_min)
443 |             min_end = min(frame_max, labeled_max)
444 |             try:
445 |                 frame_start_idx, frame_end_idx = frames.index(max_start), frames.index(min_end)
446 |                 labeled_start_idx, labeled_end_idx = labeled_frames.index(max_start), labeled_frames.index(min_end)
447 |                 
448 |             except:
449 |                 print("No element in the list.", video, pedID,  min_end - max_start)
450 |                 print("!!! Skip the cut of ", video, "!!!")
451 |                 continue
452 | #                 print(frames)
453 | #                 print(labeled_frames)
454 |             # 1. frames, bbox
455 |             db[video][pedID]['frames'] = db[video][pedID]['frames'][frame_start_idx: frame_end_idx+1]
456 |             db[video][pedID]['bbox'] = db[video][pedID]['bbox'][frame_start_idx: frame_end_idx+1]
457 |             
458 |             # original_reason, original_intention
459 |             db[video][pedID]['mean_intention'] = db[video][pedID]['mean_intention'][labeled_start_idx: labeled_end_idx+1]
460 |             db[video][pedID]['major_intention'] = db[video][pedID]['major_intention'][labeled_start_idx: labeled_end_idx+1]
461 |             db[video][pedID]['disagree_score'] = db[video][pedID]['disagree_score'][labeled_start_idx: labeled_end_idx+1]
462 |             db[video][pedID]['valid_disagree_score'] = db[video][pedID]['valid_disagree_score'][labeled_start_idx: labeled_end_idx+1]
463 |             
464 |             db[video][pedID]['labeled_frames'] = db[video][pedID]['labeled_frames'][labeled_start_idx: labeled_end_idx+1]
465 |             db[video][pedID]['reason_feats'] = []#db[video][pedID]['reason_feats'][labeled_start_idx: labeled_end_idx+1]
466 |             db[video][pedID]['original_reason'] = db[video][pedID]['original_reason'][labeled_start_idx: labeled_end_idx+1]
467 |             db[video][pedID]['original_intention'] = db[video][pedID]['original_intention'][labeled_start_idx: labeled_end_idx+1]
468 | 
469 |             if len(db[video][pedID]['frames']) != len(db[video][pedID]['labeled_frames']):
470 |                 print("Different frames v.s. labeled frames: ", video, pedID)
471 |                 print(len(db[video][pedID]['frames']), len(db[video][pedID]['bbox']),
472 |                   len(db[video][pedID]['mean_intention']),len(db[video][pedID]['major_intention']), 
473 |                   len(db[video][pedID]['disagree_score']), len(db[video][pedID]['labeled_frames']), 
474 |                   len(db[video][pedID]['reason_feats']), len(db[video][pedID]['original_reason']),
475 |                   len(db[video][pedID]['original_intention']))
476 |     return db
477 | 
478 | 
479 | sliced_database = slice_intention(copy.deepcopy(intent_database))
480 | 
481 | 
482 | 
483 | i = 0
484 | j = 0
485 | for v in sliced_database.keys():
486 |     for p in sliced_database[v].keys():
487 |         sample = sliced_database[v][p]
488 | #         for reason in sample['reason_feats']:
489 | #             if len(reason) == 0:
490 | #                 i += 1
491 |     
492 |         j += 1
493 | print("reason feats: ", i, j)
494 | 
495 | i = 0
496 | j = 0
497 | for v in sliced_database.keys():
498 |     for p in sliced_database[v].keys():
499 |         sample = sliced_database[v][p]
500 |         for intent in sample['major_intention']:
501 |             if intent == -1:
502 | 
503 |                 i += 1
504 |     
505 |             j += 1
506 | print("intent: ", i, j)
507 | 
508 | 
509 | 
510 | def check_missing(db):
511 |     for video, value1 in db.items():
512 |         for pedID, value2 in db[video].items():
513 |             if len(db[video][pedID]['frames']) != len(db[video][pedID]['labeled_frames']):
514 |                 print("Different frames v.s. labeled frames: ", video, pedID)
515 |                 print(len(db[video][pedID]['frames']), len(db[video][pedID]['bbox']),
516 |                   len(db[video][pedID]['mean_intention']),len(db[video][pedID]['major_intention']), 
517 |                   len(db[video][pedID]['disagree_score']), len(db[video][pedID]['labeled_frames']), 
518 | #                   len(db[video][pedID]['reason_feats']), len(db[video][pedID]['original_reason']),
519 |                   len(db[video][pedID]['original_intention']))
520 |                 print("Frame start&end: ", db[video][pedID]['frames'][0], db[video][pedID]['frames'][-1])
521 |                 print("labeled_frames start&end: ", db[video][pedID]['labeled_frames'][0], db[video][pedID]['labeled_frames'][-1])
522 |                 missing_frames = []
523 |                 for l in db[video][pedID]['labeled_frames']:
524 |                     if l not in db[video][pedID]['frames']:
525 |                         missing_frames.append(l)
526 |                 print("Missing frames: ", missing_frames)
527 |                 if missing_frames[-1] - missing_frames[0] + 1 == len(missing_frames):
528 |                     # only one missing piece, remove intentions labels
529 |                     print("Missing one range: ", missing_frames[0], " - ", missing_frames[-1])
530 |                     missing_pieces = [missing_frames[0],missing_frames[-1]]
531 |                 else:
532 |                     # multiple missing pieces, find them
533 |                     missing_pieces = []
534 |                     start = -1
535 |                     for f in range(len(missing_frames)-1):
536 |                         if start == -1:
537 |                             start = missing_frames[f]
538 |                         if missing_frames[f] + 1 == missing_frames[f+1]:
539 |                             if f + 1 == len(missing_frames) - 1:
540 |                                 missing_pieces.append([start, missing_frames[f+1]])
541 |                             continue
542 |                         else:
543 |                             # current f is the end of current piece
544 |                             missing_pieces.append([start, missing_frames[f]])
545 |                             start = -1
546 |                     print("Splited missing pieces: ", missing_pieces)
547 |                 
548 |                 print("--------------------------------------------")
549 |             else:
550 |                 if len(db[video][pedID]['frames']) != len(db[video][pedID]['bbox']):
551 |                     print("Different bbox length!", video)
552 |                     print(db[video][pedID]['frames'], db[video][pedID]['bbox'], db[video][pedID]['labeled_frames'])
553 |                 else:
554 |                     print("All lengths are the same! ", video)
555 |                 no_missing = True
556 |                 for f in db[video][pedID]['frames']:
557 |                     if f not in db[video][pedID]['labeled_frames']:
558 |                         print("frames ", f, " not in labeled_frames")
559 |                         no_missing = False
560 |                 for l in db[video][pedID]['labeled_frames']:
561 |                     if l not in db[video][pedID]['frames']:
562 |                         print("labeled_frames ", l, " not in frames")
563 |                         no_missing = False
564 |                 if no_missing:
565 |                     print("No missing frames! ")
566 | 
567 | 
568 | def remove_missing_intention(db):
569 |     for video, value1 in db.items():
570 |         for pedID, value2 in db[video].items():
571 |             if len(db[video][pedID]['frames']) != len(db[video][pedID]['labeled_frames']) or             len(db[video][pedID]['frames']) != len(db[video][pedID]['major_intention']) or             len(db[video][pedID]['major_intention']) != len(db[video][pedID]['labeled_frames']):
572 |                 print("Different frames v.s. labeled frames: ", video, pedID)
573 |                 print(len(db[video][pedID]['frames']), len(db[video][pedID]['bbox']),
574 |                   len(db[video][pedID]['mean_intention']),len(db[video][pedID]['major_intention']), 
575 |                   len(db[video][pedID]['disagree_score']), len(db[video][pedID]['valid_disagree_score']), 
576 |                       len(db[video][pedID]['labeled_frames']), 
577 | #                   len(db[video][pedID]['reason_feats']), len(db[video][pedID]['original_reason']),
578 |                   len(db[video][pedID]['original_intention']))
579 |                 print("Frame start&end: ", db[video][pedID]['frames'][0], db[video][pedID]['frames'][-1])
580 |                 print("labeled_frames start&end: ", db[video][pedID]['labeled_frames'][0], db[video][pedID]['labeled_frames'][-1])
581 |                 missing_frames = []
582 |                 for l in db[video][pedID]['labeled_frames']:
583 |                     if l not in db[video][pedID]['frames']:
584 |                         missing_frames.append(l)
585 |                 print("Missing frames: ", missing_frames)
586 |                 if missing_frames[-1] - missing_frames[0] + 1 == len(missing_frames):
587 |                     # only one missing piece, remove intentions labels
588 |                     print("Missing one range: ", missing_frames[0], " - ", missing_frames[-1])
589 |                     missing_pieces = [[missing_frames[0],missing_frames[-1]]]
590 |                 else:
591 |                     # multiple missing pieces, find them
592 |                     missing_pieces = []
593 |                     start = -1
594 |                     for f in range(len(missing_frames)-1):
595 |                         if start == -1:
596 |                             start = missing_frames[f]
597 |                         if missing_frames[f] + 1 == missing_frames[f+1]:
598 |                             if f + 1 == len(missing_frames) - 1:
599 |                                 missing_pieces.append([start, missing_frames[f+1]])
600 |                             continue
601 |                         else:
602 |                             # current f is the end of current piece
603 |                             missing_pieces.append([start, missing_frames[f]])
604 |                             start = -1
605 |                     print("Splited missing pieces: ", missing_pieces)
606 |                 
607 |                 # remove missing frames (intention labels)
608 |                 for piece in missing_pieces:
609 |                     missing_start = db[video][pedID]['labeled_frames'].index(piece[0])
610 |                     missing_end = db[video][pedID]['labeled_frames'].index(piece[1])
611 | 
612 |                     # original_reason, original_intention
613 |                     del db[video][pedID]['mean_intention'][missing_start: missing_end+1]
614 |                     del db[video][pedID]['major_intention'][missing_start: missing_end+1]
615 |                     del db[video][pedID]['disagree_score'][missing_start: missing_end+1]
616 |                     del db[video][pedID]['valid_disagree_score'][missing_start: missing_end+1]
617 |                     
618 |                     del db[video][pedID]['labeled_frames'][missing_start: missing_end+1]
619 | #                     del db[video][pedID]['reason_feats'][missing_start: missing_end+1]
620 |                     del db[video][pedID]['original_reason'][missing_start: missing_end+1]
621 |                     del db[video][pedID]['original_intention'][missing_start: missing_end+1]
622 |                     
623 |                 print("--------------------------------------------")
624 |             else:
625 |                 print("Same frames and labels: ", video, pedID)
626 |                 if len(db[video][pedID]['frames']) != len(db[video][pedID]['bbox']):
627 |                     print("missing bbox ", len(db[video][pedID]['frames']) - len(db[video][pedID]['bbox']))
628 |                     db[video][pedID]['bbox'].append(db[video][pedID]['bbox'][-1])
629 |                     if len(db[video][pedID]['frames']) - len(db[video][pedID]['bbox']) > 1:
630 |                         print("Missing more than 1 bbox annotation! ")
631 |                 for f in db[video][pedID]['frames']:
632 |                     if f not in db[video][pedID]['labeled_frames']:
633 |                         print("frames ", f, " not in labeled_frames")
634 |                 
635 |                 for l in db[video][pedID]['labeled_frames']:
636 |                     if l not in db[video][pedID]['frames']:
637 |                         print("labeled_frames ", l, " not in frames")
638 |                 print("================================================")
639 | 
640 |     return db         
641 | 
642 | 
643 | print(len(sliced_database['video_0083']['1_MC']['major_intention']), len(sliced_database['video_0083']['1_MC']['bbox']))
644 | 
645 | 
646 | 
647 | missing_database = copy.deepcopy(sliced_database)
648 | del missing_database['video_0003']
649 | del missing_database['video_0028']
650 | 
651 | 
652 | 
653 | removed_missing_database = remove_missing_intention(missing_database)
654 | 
655 | 
656 | 
657 | check_missing(removed_missing_database)
658 | 
659 | 
660 | 
661 | uni_db = copy.deepcopy(removed_missing_database)
662 | 
663 | 
664 | 
665 | for v in uni_db.keys():
666 |     for p in uni_db[v].keys():
667 |         sample = uni_db[v][p]
668 |         if not (len(sample['frames']) == len(sample['major_intention']) == len(sample['bbox'])):
669 | #                == len(sample['reason_feats'])):
670 |             print(v, p, len(sample['frames']), len(sample['major_intention']), len(sample['bbox'])
671 |                , len(sample['reason_feats']))
672 | 
673 | 
674 | 
675 | 
676 | for k in uni_db['video_0023']['6_MC'].keys():
677 |     if uni_db['video_0023']['6_MC'][k]:
678 |         print(k, len(uni_db['video_0023']['6_MC'][k]))
679 | 
680 | 
681 | 
682 | 
683 | for v in uni_db.keys():
684 |     for p in uni_db[v].keys():
685 |         sample = uni_db[v][p]
686 |         if not (len(sample['frames']) == len(sample['major_intention']) == len(sample['bbox'])):
687 | #                == len(sample['reason_feats'])):
688 |             print(v, p, len(sample['frames']), len(sample['major_intention']), len(sample['bbox'])
689 |                , len(sample['reason_feats']))
690 |             uni_db[v][p]['bbox'].append(uni_db[v][p]['bbox'][-1])
691 | 
692 | 
693 | 
694 | 
695 | for v in uni_db.keys():
696 |     for p in uni_db[v].keys():
697 |         sample = uni_db[v][p]
698 |         if not (len(sample['frames']) == len(sample['major_intention']) == len(sample['bbox'])):
699 | #                == len(sample['reason_feats'])):
700 |             print(v, p, len(sample['frames']), len(sample['major_intention']), len(sample['bbox'])
701 |                , len(sample['reason_feats']))
702 | 
703 | 
704 | database_name = 'database_' + time.strftime("%d%b%Y-%Hh%Mm%Ss") + '.pkl'
705 | if not os.path.exists(os.path.join(args['save_path'])):
706 |     os.makedirs(os.path.join(args['save_path']))
707 | with open(os.path.join(args['save_path'], database_name), 'wb') as fid:
708 |     pickle.dump(uni_db, fid)
709 | 
710 | 
711 | 
712 | overlap_db = copy.deepcopy(uni_db)
713 | 
714 | int_reason_overlap = True
715 | if int_reason_overlap:
716 |     for v in overlap_db.keys():
717 |         for p in overlap_db[v].keys():
718 |             sample = overlap_db[v][p]
719 | #             print([(k, len(sample[k])) for k in sample.keys()])
720 |             print(v, p, len(sample['frames']), len(sample['major_intention']), len(sample['bbox']), len(sample['original_reason']))
721 |             mis_match_list = []
722 |             for i in range(len(sample['frames'])):
723 |                 if sum([1 if r==-1 else 0 for r in sample['original_reason'][i]]) == len(sample['original_reason'][i]):
724 | #                     print(i, 'ori_rsn empty: ', sample['original_reason'][i])
725 |                     mis_match_list.append(i)
726 |             # remove mis-match frames intention labels, because intention labels are always longer than reason, till the end of video
727 |             
728 |             if len(mis_match_list) > 0:
729 |                 del overlap_db[v][p]['frames'][mis_match_list[0]: mis_match_list[-1]+1]
730 |                 del overlap_db[v][p]['bbox'][mis_match_list[0]: mis_match_list[-1]+1]
731 |                 del overlap_db[v][p]['mean_intention'][mis_match_list[0]: mis_match_list[-1]+1]
732 |                 del overlap_db[v][p]['major_intention'][mis_match_list[0]: mis_match_list[-1]+1]   
733 |                 del overlap_db[v][p]['disagree_score'][mis_match_list[0]: mis_match_list[-1]+1]
734 |                 del overlap_db[v][p]['valid_disagree_score'][mis_match_list[0]: mis_match_list[-1]+1]
735 |                 del overlap_db[v][p]['labeled_frames'][mis_match_list[0]: mis_match_list[-1]+1]
736 |                 # del db[video][pedID]['reason_feats'][missing_start: missing_end+1]
737 |                 del overlap_db[v][p]['original_reason'][mis_match_list[0]: mis_match_list[-1]+1]
738 |                 del overlap_db[v][p]['original_intention'][mis_match_list[0]: mis_match_list[-1]+1]
739 |                 print("Removed mismatch: ", v, p, len(sample['frames']), len(sample['major_intention']), len(sample['bbox']), len(sample['original_reason']))
740 | #             print([(k, len(sample[k])) for k in sample.keys()])
741 | 
742 | 
743 | 
744 | database_name = 'database_' + time.strftime("%d%b%Y-%Hh%Mm%Ss") + '_overlap.pkl'
745 | if not os.path.exists(os.path.join(args['save_path'])):
746 |     os.makedirs(os.path.join(args['save_path']))
747 | with open(os.path.join(args['save_path'], database_name), 'wb') as fid:
748 |     pickle.dump(overlap_db, fid)
749 | 
750 | 
751 | 
752 | 


--------------------------------------------------------------------------------