├── .DS_Store ├── images ├── .DS_Store ├── statistics.png ├── dataset_multimodal.jpg └── dataset_temporal.jpg ├── split_clips_to_frames.py ├── extend_driving_decision_annotation.py ├── README.md └── extend_intent_annotation.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PSI-Intention2022/PSI-Dataset/HEAD/.DS_Store -------------------------------------------------------------------------------- /images/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PSI-Intention2022/PSI-Dataset/HEAD/images/.DS_Store -------------------------------------------------------------------------------- /images/statistics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PSI-Intention2022/PSI-Dataset/HEAD/images/statistics.png -------------------------------------------------------------------------------- /images/dataset_multimodal.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PSI-Intention2022/PSI-Dataset/HEAD/images/dataset_multimodal.jpg -------------------------------------------------------------------------------- /images/dataset_temporal.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PSI-Intention2022/PSI-Dataset/HEAD/images/dataset_temporal.jpg -------------------------------------------------------------------------------- /split_clips_to_frames.py: -------------------------------------------------------------------------------- 1 | '''Given video path, extract frames for all videos. Check if frames exist first.''' 2 | 3 | import os 4 | import argparse 5 | from pathlib import Path 6 | import cv2 7 | from tqdm import tqdm 8 | import sys 9 | 10 | if __name__ == '__main__': 11 | root_path = sys.argv[1] 12 | video_path = os.path.join(root_path, 'PSI_Videos/videos') 13 | frames_path = os.path.join(root_path, 'frames') 14 | 15 | #create 'data/frames' folder 16 | if not os.path.exists(frames_path): 17 | os.makedirs(frames_path) 18 | print("Created 'frames' folder.") 19 | 20 | for video in tqdm(sorted(os.listdir(video_path))): 21 | name = video.split('.mp4')[0] 22 | video_target = os.path.join(video_path, video) 23 | frames_target = os.path.join(frames_path, name) 24 | 25 | if not os.path.exists(frames_target): 26 | os.makedirs(frames_target) 27 | print(f'Created frames folder for video {name}') 28 | 29 | try: 30 | vidcap = cv2.VideoCapture(video_target) 31 | if not vidcap.isOpened(): 32 | raise Exception(f'Cannot open file {video}') 33 | except Exception as e: 34 | raise e 35 | 36 | success, frame = vidcap.read() 37 | cur_frame = 0 38 | while(success): 39 | frame_num = str(cur_frame).zfill(3) 40 | cv2.imwrite(os.path.join(frames_target, f'{frame_num}.jpg'), frame) 41 | success, frame = vidcap.read() 42 | cur_frame += 1 43 | vidcap.release() 44 | # break 45 | 46 | -------------------------------------------------------------------------------- /extend_driving_decision_annotation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import sys 4 | import copy 5 | 6 | 7 | if __name__ == '__main__': 8 | print("Extend Driving Decision Annotations of PSI 2.0 Dataset.") 9 | 10 | root_path = sys.argv[1] 11 | 12 | key_frame_anotation_path = os.path.join(root_path, 'PSI2.0_TrainVal/annotations/cognitive_annotation_key_frame') 13 | extended_annotation_path = os.path.join(root_path, 'PSI2.0_TrainVal/annotations/cognitive_annotation_extended') 14 | cv_annotation_path = os.path.join(root_path, 'PSI2.0_TrainVal/annotations/cv_annotation') 15 | 16 | if not os.path.exists(extended_annotation_path): 17 | os.makedirs(extended_annotation_path) 18 | 19 | video_list = sorted(os.listdir(key_frame_anotation_path)) 20 | 21 | for vname in video_list: 22 | # 1. load key-frame annotations 23 | key_dd_ann_file = os.path.join(key_frame_anotation_path, vname, 'driving_decision.json') 24 | with open(key_dd_ann_file, 'r') as f: 25 | key_dd_ann = json.load(f) 26 | 27 | # 2. extend annotations (driving decision) - decision to the future frames, description to the prior frames 28 | extended_dd_ann = copy.deepcopy(key_dd_ann) 29 | 30 | annotator_list = key_dd_ann['frames'][list(key_dd_ann['frames'].keys())[0]]['cognitive_annotation'].keys() 31 | frame_list = sorted(key_dd_ann['frames'].keys()) 32 | for annotator_k in annotator_list: 33 | pivot_speed = "maintainSpeed" # at the beginning no labels, use "goStraight" == "maintainSpeed" == 0 34 | pivot_direction = "goStraight" 35 | for i in range(len(frame_list)): 36 | frame_id = frame_list[i] 37 | if extended_dd_ann['frames'][frame_id]['cognitive_annotation'][annotator_k]['driving_decision_speed'] == "": 38 | extended_dd_ann['frames'][frame_id]['cognitive_annotation'][annotator_k]['driving_decision_speed'] = pivot_speed 39 | else: 40 | pivot_speed = extended_dd_ann['frames'][frame_id]['cognitive_annotation'][annotator_k]['driving_decision_speed'] 41 | if extended_dd_ann['frames'][frame_id]['cognitive_annotation'][annotator_k]['driving_decision_direction'] == "": 42 | extended_dd_ann['frames'][frame_id]['cognitive_annotation'][annotator_k]['driving_decision_direction'] = pivot_direction 43 | else: 44 | pivot_direction = extended_dd_ann['frames'][frame_id]['cognitive_annotation'][annotator_k]['driving_decision_direction'] 45 | 46 | last_frame_id = frame_list[-1] 47 | pivot_des = extended_dd_ann['frames'][last_frame_id]['cognitive_annotation'][annotator_k]['explanation'] 48 | for i in range(len(frame_list)-1, -1, -1): 49 | frame_id = frame_list[i] 50 | if extended_dd_ann['frames'][frame_id]['cognitive_annotation'][annotator_k]['explanation'] == '': 51 | extended_dd_ann['frames'][frame_id]['cognitive_annotation'][annotator_k]['explanation'] = pivot_des 52 | else: 53 | pivot_des = extended_dd_ann['frames'][frame_id]['cognitive_annotation'][annotator_k]['explanation'] 54 | # Note: after this operation, some frames at the end of the observed frame list do not have descriptions, ['description']== "" 55 | 56 | # 3. output extended annotations 57 | output_dir = os.path.join(extended_annotation_path, vname) 58 | if not os.path.exists(output_dir): 59 | os.makedirs(output_dir) 60 | 61 | # write json to file 62 | extended_dd_ann_file = os.path.join(extended_annotation_path, vname, 'driving_decision.json') 63 | with open(extended_dd_ann_file, 'w') as file: 64 | json_string = json.dumps(extended_dd_ann, default=lambda o: o.__dict__, sort_keys=False, indent=4) 65 | file.write(json_string) 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pedestrian Situated Intent (PSI) Bencharmark 2 | 3 | This Repository contains the scripts and instructions about preparing the **Pedestrian Situated Intent (PSI) 1.0 & 2.0** dataset. 4 | 5 | ![image](./images/dataset_multimodal.jpg) 6 | ![image](./images/dataset_temporal.jpg) 7 | 8 | - **PSI 1.0**: video_0001 ~ video_0110 9 | - **PSI 2.0**: video_0001 ~ video_0204 10 | 11 | **NOTE: You may only need to use PSI 2.0 dataset for the [[IEEE ITSS PSI Competition](https://psi-intention2022.github.io)]**. But the PSI 1.0 dataset is also provided, and feel free to use it if you would explore additional knowledge beyond the PSI 2.0. 12 | 13 | # 1. PSI 2.0 Dataset 14 | 15 | ## Part 1. Prepare the dataset 16 | ***Step 1.*** Download the PSI 2.0 Dataset videos from ~~[[Google Drive]()]~~ [[PSI Homepage](http://pedestriandataset.situated-intent.net)]. Move *\*.zip* files to the dataset *ROOT_PATH*, and unzip them by 17 | 18 | ```shell 19 | cd ROOT_PATH # e.g., root/Dataset 20 | unzip '*.zip' -d . 21 | rm *.zip 22 | ``` 23 | The extracted folder contains all videos (Train/Val): 24 | - *ROOT_PATH/PSI_Videos/videos*. 25 | 26 | ***Step 2*** Download PSI 2.0 data annotations from ~~[[Google Drive]()]~~ [[PSI Homepage](http://pedestriandataset.situated-intent.net)]. Move downloaded *\*.zip* files to the dataset *ROOT_PATH*. 27 | 28 | ```python 29 | unzip '*.zip' -d . 30 | rm *.zip 31 | ``` 32 | 33 | The extracted folder contains all annotations of the PSI 2.0 Dataset (Train/Val) 34 | - *ROOT_PATH/PSI2.0_TrainVal/annotations/cognitive_annotation_key_frame* 35 | - *ROOT_PATH/PSI2.0_TrainVal/annotations/cv_annotation* 36 | 37 | and the train/val/test splits: 38 | - *ROOT_PATH/PSI2.0_TrainVal/splits/PSI2_split.json*. 39 | 40 | 41 | ***Step 3*** Split the videos into frames by 42 | 43 | ```shell 44 | python split_clips_to_frames.py *ROOT_PATH* 45 | ``` 46 | and the output frames are saved as: 47 | - *ROOT_PATH/frames*. 48 | 49 | 50 | ## Part 2. Extend Key-frame Cognitive Annotations 51 | 52 | **TASK 1 - Pedestrian Intent**: The frame at which one annotator explicitly make a cross intent annotation is treated as the "key-frame". Every annotator would give one "intent" annotation about the crossing intent estimation of the target pedestrian, together with one "reansoning/explanation" of the estimation. For these two annotation: 53 | 54 | - Crossing Intent: We extend the crossing intent annotation to the frames following the current key-frame, until the next frame that one of the annotators make another estimation (no matter if the two annotations are the same or not). 55 | - Reasoning/Explanation: We extend the reasoning/description of the intent estimation to the frames prior to the current key-frame, until the last key-frame that one of the annotators made another estimation, assuming the description is about the scenes observed by the annotators to support the intent estimation. 56 | 57 | *Already-crossed*: When a pedestrian has already crossed in front of the moving vehicle, we categorize the status after the target pedestrian crosses the middle line of the ego-view as "*Already-crossed*." In this scenario, there is no necessity to predict any further crossing intent, as the target pedestrian has already safely crossed the road. 58 | 59 | 60 | ```shell 61 | python extend_intent_annotation.py *ROOT_PATH* 62 | ``` 63 | and the output frames are saved as: 64 | - *ROOT_PATH*/PSI2.0_TrainVal/annotations/cognitive_annotation_extended. 65 | 66 | **TASK 2 - Pedestrian Trajectory**: Pedestrian trajectory prediction task uses only the visual annotations (bounding boxes) of the target pedestrian, there is no need to exntend the cognitive annotations to all frames. 67 | 68 | **TASK 3 - Driving Decision**: The frame at which one of the annotators explicitly make a driving decision is treated as the "key-frame". Every annotator would give one "decision" at the key-frame, and provide one "reansoning/description" of the decision made. For these two cognitive annotations: 69 | 70 | - Driving Decision: We extend the driving decision annotation to the frames following the current key-frame, until the next frame that one of the annotators make another driving decision (e.g., turn or go straight). 71 | - Reasoning/Description: We extend the reasoning/description to the frames prior to the current key-frame, until the last key-frame that one of the annotators made another estimation, assuming the description is about the scenes observed by the annotators to support the driving decision. 72 | 73 | ```shell 74 | python extend_driving_decision_annotation.py *ROOT_PATH* 75 | ``` 76 | and the output frames are saved as: 77 | - *ROOT_PATH*/PSI2.0_TrainVal/annotations/cognitive_annotation_extended. 78 | 79 | ![image](./images/statistics.png) 80 | 81 | ## Part 3. Baselines for Different Tasks Using the PSI Dataset 82 | 83 | We provide baselines for all tracks of challenges as hints about using the PSI 2.0 dataset for a quick start. 84 | 85 | ***Track 1 ([Pedestrian Intent Prediction (PIP)](https://github.com/PSI-Intention2022/PSI-Intent-Prediction.git))*** 86 | 87 | ***Track 2 ([Pedestrian Trajectory Prediction (PTP)](https://github.com/PSI-Intention2022/PSI-Trajectory-Prediction.git))*** 88 | 89 | ***Track 3 ([Driver Decision Prediction (DDP)](https://github.com/PSI-Intention2022/PSI-DriverDecision-Prediction.git))*** 90 | 91 | 92 | # 2. PSI 1.0 Dataset 93 | 94 | If you would like to use the PSI 1.0 dataset annotations, download PSI 1.0 data annotations from ~~[[Google Drive]()]~~ [[PSI Homepage](http://pedestriandataset.situated-intent.net)]. Move downloaded *\*.zip* files to the dataset *ROOT_PATH*. 95 | 96 | ```shell 97 | unzip '*.zip' -d . 98 | rm *.zip 99 | ``` 100 | 101 | The extracted PSI 1.0 dataset follows the same format as PSI 2.0, so feel free to use the *Cognitive Annotation Extention* scripts and *Baselines* prepared for PSI 2.0 to explore the PSI 1.0 knowledge. 102 | 103 | (*Note:* PSI 2.0 and PSI 1.0 share the first 110 videos, but have different annotations. Please check the [[paper](https://arxiv.org/pdf/2112.02604v2.pdf)] and our future udpates for more information. ) -------------------------------------------------------------------------------- /extend_intent_annotation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import sys 4 | import copy 5 | 6 | def most_frequent(List): 7 | # return the most frequent intent estimation made by all annotators 8 | counter = 0 9 | num = List[0] 10 | 11 | for i in List: 12 | curr_frequency = List.count(i) 13 | if(curr_frequency> counter): 14 | counter = curr_frequency 15 | num = i 16 | 17 | return num 18 | 19 | 20 | if __name__ == '__main__': 21 | print("Extend Intent Annotations of PSI 2.0 Dataset.") 22 | 23 | root_path = sys.argv[1] 24 | 25 | key_frame_anotation_path = os.path.join(root_path, 'PSI2.0_TrainVal/annotations/cognitive_annotation_key_frame') 26 | extended_annotation_path = os.path.join(root_path, 'PSI2.0_TrainVal/annotations/cognitive_annotation_extended') 27 | cv_annotation_path = os.path.join(root_path, 'PSI2.0_TrainVal/annotations/cv_annotation') 28 | 29 | if not os.path.exists(extended_annotation_path): 30 | os.makedirs(extended_annotation_path) 31 | 32 | video_list = sorted(os.listdir(key_frame_anotation_path)) 33 | 34 | for vname in video_list: 35 | # 1. load key-frame annotations 36 | key_intent_ann_file = os.path.join(key_frame_anotation_path, vname, 'pedestrian_intent.json') 37 | with open(key_intent_ann_file, 'r') as f: 38 | key_intent_ann = json.load(f) 39 | 40 | # 2. extend annotations (intent & description) - intent to the future frames, description to the prior frames 41 | extended_intent_ann = copy.deepcopy(key_intent_ann) 42 | 43 | for ped_k in key_intent_ann['pedestrians'].keys(): 44 | observed_frames = key_intent_ann['pedestrians'][ped_k]['observed_frames'] 45 | for ann_k in key_intent_ann['pedestrians'][ped_k]['cognitive_annotations'].keys(): 46 | intent_list = key_intent_ann['pedestrians'][ped_k]['cognitive_annotations'][ann_k]['intent'] 47 | key_frame_list = key_intent_ann['pedestrians'][ped_k]['cognitive_annotations'][ann_k]['key_frame'] 48 | description_list = key_intent_ann['pedestrians'][ped_k]['cognitive_annotations'][ann_k]['description'] 49 | assert len(intent_list) == len(key_frame_list) == len(description_list) 50 | 51 | pivot_int = 'not_sure' # 0.5 # at the beginning if no labels, use "not_sure" 52 | for frame_k in range(len(observed_frames)): 53 | if intent_list[frame_k] == "": 54 | extended_intent_ann['pedestrians'][ped_k]['cognitive_annotations'][ann_k]['intent'][frame_k] = pivot_int 55 | else: 56 | pivot_int = intent_list[frame_k] 57 | 58 | pivot_des = description_list[-1] 59 | for frame_k in range(len(observed_frames)-1, -1, -1): 60 | if description_list[frame_k] == "": 61 | extended_intent_ann['pedestrians'][ped_k]['cognitive_annotations'][ann_k]['description'][frame_k] = pivot_des 62 | else: 63 | pivot_des = description_list[frame_k] 64 | # Note: after this operation, some frames at the end of the observed frame list do not have descriptions, ['description']== "" 65 | 66 | # 3. Ignore 'Already-crossed' frames 67 | for ped_k in extended_intent_ann['pedestrians'].keys(): 68 | observed_frames = extended_intent_ann['pedestrians'][ped_k]['observed_frames'] 69 | last_intents = [] 70 | last_key_frames = [] 71 | for ann_k in extended_intent_ann['pedestrians'][ped_k]['cognitive_annotations'].keys(): 72 | intent_list = extended_intent_ann['pedestrians'][ped_k]['cognitive_annotations'][ann_k]['intent'] 73 | key_frame_list = extended_intent_ann['pedestrians'][ped_k]['cognitive_annotations'][ann_k]['key_frame'] 74 | last_intents.append(intent_list[-1]) 75 | for j in range(len(observed_frames)-1, -1, -1): 76 | if key_frame_list[j] != 0: 77 | last_key_frames.append(j) # Note! Here 'j' is not the frame number, it's the idx/position of the frame in the 'observed_frame' list 78 | break 79 | else: 80 | continue 81 | 82 | if most_frequent(last_intents) == 'cross': # only apply to the 'cross' cases 83 | start_box = extended_intent_ann['pedestrians'][ped_k]['cv_annotations']['bboxes'][0] 84 | last_key_box = extended_intent_ann['pedestrians'][ped_k]['cv_annotations']['bboxes'][max(last_key_frames)] 85 | end_box = extended_intent_ann['pedestrians'][ped_k]['cv_annotations']['bboxes'][-1] 86 | if ((last_key_box[0]+last_key_box[2])/2 - 640) * ((end_box[0]+end_box[2])/2 - 640) >= 0: 87 | # Situation 1: By the last key-frame annotation, the target pedestrian already crossed the middle line of the ego-view, and is on the same side as the final position. 88 | # In such case, we use the last annotated key-frame as the end of "intent estimation" task 89 | last_intent_estimate_idx = max(last_key_frames) 90 | else: # < 0 91 | # Situation 2: By the last key-frame annotation, the target pedestrian is at a different position compared to the final observed position. 92 | # In such case, we use the moment when the target pedestrian crossed the middle line of the ego-view as the last frame of "intent estimation" task 93 | for cur_frame_k in range(max(last_key_frames), len(observed_frames)): # pedestrian could change positions several times, e.g., vehicle is turning. Thus starts from the last key-frame 94 | current_box = extended_intent_ann['pedestrians'][ped_k]['cv_annotations']['bboxes'][cur_frame_k] 95 | if ((current_box[0]+current_box[2])/2 - 640) * ((end_box[0]+end_box[2])/2 - 640) >= 0: 96 | # once the pedestrian crossed the middle line of ego-view, to the same side as the last frame, use this moment as the last intent estimation task frame 97 | last_intent_estimate_idx = cur_frame_k 98 | break 99 | else: 100 | continue 101 | # Cut redundant intent extended annotations that not usable for "intent estimation" task 102 | del extended_intent_ann['pedestrians'][ped_k]['observed_frames'][last_intent_estimate_idx+1:] 103 | del extended_intent_ann['pedestrians'][ped_k]['cv_annotations']['bboxes'][last_intent_estimate_idx+1:] 104 | for ann_k in extended_intent_ann['pedestrians'][ped_k]['cognitive_annotations'].keys(): 105 | del extended_intent_ann['pedestrians'][ped_k]['cognitive_annotations'][ann_k]['intent'][last_intent_estimate_idx+1:] 106 | del extended_intent_ann['pedestrians'][ped_k]['cognitive_annotations'][ann_k]['key_frame'][last_intent_estimate_idx+1:] 107 | del extended_intent_ann['pedestrians'][ped_k]['cognitive_annotations'][ann_k]['description'][last_intent_estimate_idx+1:] 108 | 109 | 110 | # 4. output extended annotations 111 | output_dir = os.path.join(extended_annotation_path, vname) 112 | if not os.path.exists(output_dir): 113 | os.makedirs(output_dir) 114 | 115 | # write json to file 116 | extended_intent_ann_file = os.path.join(extended_annotation_path, vname, 'pedestrian_intent.json') 117 | with open(extended_intent_ann_file, 'w') as file: 118 | json_string = json.dumps(extended_intent_ann, default=lambda o: o.__dict__, sort_keys=False, indent=4) 119 | file.write(json_string) 120 | 121 | print(vname, ": Original observed frames: {} --> valid intent estimation frames: {}".format( 122 | len(key_intent_ann['pedestrians'][ped_k]['observed_frames']), 123 | len(extended_intent_ann['pedestrians'][ped_k]['observed_frames']) 124 | )) 125 | --------------------------------------------------------------------------------