├── figures ├── different-light.jpg └── sample-sequences.jpg ├── LICENSE ├── process.py ├── README.md └── dataset.py /figures/different-light.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lujiaxuan0520/THU-EACT-50/HEAD/figures/different-light.jpg -------------------------------------------------------------------------------- /figures/sample-sequences.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lujiaxuan0520/THU-EACT-50/HEAD/figures/sample-sequences.jpg -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2023 Tsinghua University 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this data and associated documentation files (the “Data”), to deal in the Data without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Data, and to permit persons to whom the Data is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Data. 8 | 9 | THE Data IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE Data OR THE USE OR OTHER DEALINGS IN THE Data. -------------------------------------------------------------------------------- /process.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import io 3 | import numba 4 | import time 5 | # from matplotlib import pyplot as plt 6 | 7 | @numba.jit(nopython=True) # for accelerating 8 | def get_eventFrame(ts, x, y, p, repr_size=(260,346), time_num=1): 9 | """ 10 | get the event frame with multi time window of the events 11 | :param ts: 12 | :param x: 13 | :param y: 14 | :param p: 15 | :param repr_size: 16 | :param time_num: split how many windows in the temporal dimension 17 | :return: numpy with the shape (time_num,repr_size[0],repr_size[1]) 18 | """ 19 | 20 | img = np.zeros(shape=(time_num,repr_size[0],repr_size[1]), dtype=np.float32) 21 | 22 | # process each temporal window 23 | batch_bum = int(ts.size / time_num) 24 | for time_idx in range(time_num): 25 | # extract the corresponding info 26 | ts_part = ts[time_idx * batch_bum: (time_idx + 1) * batch_bum] 27 | p_part = p[time_idx * batch_bum: (time_idx + 1) * batch_bum] 28 | x_part = x[time_idx * batch_bum: (time_idx + 1) * batch_bum] 29 | y_part = y[time_idx * batch_bum: (time_idx + 1) * batch_bum] 30 | 31 | # change polarity to (0,1) if it is (1,-1) 32 | p_part = ((p_part + 1) / 2).astype(np.int32) 33 | 34 | for i in range(len(ts_part)): 35 | img[time_idx, y_part[i], x_part[i]] = (2.0 * p_part[i] - 1) 36 | 37 | # draw image 38 | # fig = plt.figure() 39 | # fig.suptitle('Event Frame') 40 | # plt.imshow(img[time_idx], cmap='gray') 41 | # plt.xlabel("x [pixels]") 42 | # plt.ylabel("y [pixels]") 43 | # plt.colorbar() 44 | # # plt.savefig('event_frame.jpg') 45 | # plt.show() 46 | return img 47 | 48 | @numba.jit(nopython=True) # for accelerating 49 | def get_eventCount(ts, x, y, p, repr_size=(260, 346), time_num=1): 50 | """ 51 | get the event frame with multi time window of the events 52 | :param ts: 53 | :param x: 54 | :param y: 55 | :param p: 56 | :param repr_size: 57 | :param time_num: split how many windows in the temporal dimension 58 | :return: numpy with the shape (time_num, repr_size[0], repr_size[1]) 59 | """ 60 | 61 | img = np.zeros(shape=(time_num, repr_size[0], repr_size[1]), dtype=np.float32) 62 | 63 | # process each temporal window 64 | batch_bum = int(ts.size / time_num) 65 | for time_idx in range(time_num): 66 | # extract the corresponding info 67 | ts_part = ts[time_idx * batch_bum: (time_idx + 1) * batch_bum] 68 | p_part = p[time_idx * batch_bum: (time_idx + 1) * batch_bum] 69 | x_part = x[time_idx * batch_bum: (time_idx + 1) * batch_bum] 70 | y_part = y[time_idx * batch_bum: (time_idx + 1) * batch_bum] 71 | 72 | # change polarity to (0,1) if it is (1,-1) 73 | p_part = ((p_part + 1) / 2).astype(np.int32) 74 | 75 | # count number of points on each pixel 76 | pixel_counts = np.zeros(shape=(repr_size[0], repr_size[1]), dtype=np.int32) 77 | for i in range(len(ts_part)): 78 | pixel_counts[y_part[i], x_part[i]] += 1 79 | 80 | img[time_idx, :, :] = pixel_counts 81 | 82 | # normalize to (-1,1) 83 | img = 2 * ((img - img.min()) / (img.max() - img.min())) - 1 84 | return img 85 | 86 | 87 | @numba.jit(nopython=True) # for accelerating 88 | def get_eventAccuFrame(ts, x, y, p, repr_size=(260,346), time_num=1): 89 | """ 90 | get the event accumulate frame with multi time window of the events 91 | :param ts: 92 | :param x: 93 | :param y: 94 | :param p: 95 | :param repr_size: 96 | :param time_num: split how many windows in the temporal dimension 97 | :return: numpy with the shape (time_num,repr_size[0],repr_size[1]) 98 | """ 99 | 100 | img = np.zeros(shape=(time_num,repr_size[0],repr_size[1]), dtype=np.float32) 101 | 102 | # process each temporal window 103 | batch_bum = int(ts.size / time_num) 104 | for time_idx in range(time_num): 105 | # extract the corresponding info 106 | ts_part = ts[time_idx * batch_bum: (time_idx + 1) * batch_bum] 107 | p_part = p[time_idx * batch_bum: (time_idx + 1) * batch_bum] 108 | x_part = x[time_idx * batch_bum: (time_idx + 1) * batch_bum] 109 | y_part = y[time_idx * batch_bum: (time_idx + 1) * batch_bum] 110 | 111 | # change polarity to (0,1) if it is (1,-1) 112 | p_part = ((p_part + 1) / 2).astype(np.int32) 113 | 114 | for i in range(len(ts_part)): 115 | img[time_idx, y_part[i], x_part[i]] += (2.0 * p_part[i] - 1) 116 | 117 | # draw image 118 | # fig = plt.figure() 119 | # fig.suptitle('Event Frame') 120 | # plt.imshow(img[time_idx], cmap='gray') 121 | # plt.xlabel("x [pixels]") 122 | # plt.ylabel("y [pixels]") 123 | # plt.colorbar() 124 | # # plt.savefig('event_frame.jpg') 125 | # plt.show() 126 | # normalize to (-1,1) 127 | # img = 2 * ((img - img.min()) / (img.max() - img.min())) - 1 128 | return img 129 | 130 | 131 | @numba.jit(nopython=True) # for accelerating 132 | def get_timeSurface(ts, x, y, p, repr_size=(260,346), time_num=1): 133 | """ 134 | get the time surface with multi time window of the events 135 | :param ts: 136 | :param x: 137 | :param y: 138 | :param p: 139 | :param repr_size: 140 | :param time_num: split how many windows in the temporal dimension 141 | :return: numpy with the shape (time_num,repr_size[0],repr_size[1]) 142 | """ 143 | 144 | # parameters for Time Surface 145 | tau = 50e-3 # 50ms 146 | 147 | # sae = np.zeros(repr_size, np.float32) 148 | sae = np.zeros((time_num,repr_size[0],repr_size[1]), np.float32) 149 | 150 | # process each temporal window 151 | batch_bum = int(ts.size / time_num) 152 | for time_idx in range(time_num): 153 | # extract the corresponding info 154 | ts_part = ts[time_idx * batch_bum: (time_idx + 1) * batch_bum] 155 | p_part = p[time_idx * batch_bum: (time_idx + 1) * batch_bum] 156 | x_part = x[time_idx * batch_bum: (time_idx + 1) * batch_bum] 157 | y_part = y[time_idx * batch_bum: (time_idx + 1) * batch_bum] 158 | 159 | # calculate timesurface using expotential decay 160 | t_ref = ts_part[-1] # 'current' time 161 | for i in range(len(ts_part)): 162 | if (p_part[i] > 0): 163 | sae[time_idx, y_part[i], x_part[i]] = np.exp(-(t_ref - ts_part[i]) / tau) 164 | else: 165 | sae[time_idx, y_part[i], x_part[i]] = -np.exp(-(t_ref - ts_part[i]) / tau) 166 | 167 | ## none-polarity Timesurface 168 | # sae[y[i], x[i]] = np.exp(-(t_ref-ts[i]) / tau) 169 | 170 | # fig = plt.figure() 171 | # fig.suptitle('Time surface') 172 | # plt.imshow(sae[time_idx], cmap='gray') 173 | # plt.xlabel("x [pixels]") 174 | # plt.ylabel("y [pixels]") 175 | # plt.colorbar() 176 | # # plt.savefig('time_surface.jpg') 177 | # plt.show() 178 | return sae 179 | 180 | 181 | if __name__ == '__main__': 182 | file_name = "/home/Event_camera_action/DHP19/h5_dataset_7500_events/346x260/S10_session1_mov6_7500events.mat" 183 | whole_events = io.loadmat(file_name)['events'].astype(np.float32) 184 | 185 | # Important for DHP19 186 | # choose the camera_id for trainging and testing 187 | events = whole_events[whole_events[:, -1] == 0][:, :-1] 188 | 189 | # normalize the timestamps 190 | _min = events[:, 2].min() 191 | _max = events[:, 2].max() 192 | events[:, 2] = (events[:, 2] - _min) / (_max - _min) 193 | 194 | # change the original (x.y) ([1,346],[1,260]) to ([0,345],[0,259]) 195 | events[:, 0] = events[:, 0] - 1 196 | events[:, 1] = events[:, 1] - 1 197 | 198 | # randomly choose part of the events, avoiding too large events for OOM 199 | row_total = events.shape[0] 200 | row_needed = int(1.0 * row_total) 201 | row_needed = min(row_needed, 1000000) 202 | row_sequence = np.random.choice(row_total, row_needed, replace=False, p=None) 203 | row_sequence.sort() 204 | events = events[row_sequence, :] 205 | 206 | start_time = time.time() 207 | # img = get_timeSurface(events[:,2], events[:,0].astype(np.int32), events[:,1].astype(np.int32), events[:,3], 208 | # repr_size=(260, 346), time_num=9) 209 | img = get_eventFrame(events[:, 2], events[:, 0].astype(np.int32), events[:, 1].astype(np.int32), events[:, 3], 210 | repr_size=(260, 346), time_num=9) 211 | # img = get_eventCount(events[:, 2], events[:, 0].astype(np.int32), events[:, 1].astype(np.int32), events[:, 3], 212 | # repr_size=(260, 346), time_num=9) 213 | # img = get_eventAccuFrame(events[:, 2], events[:, 0].astype(np.int32), events[:, 1].astype(np.int32), events[:, 3], 214 | # repr_size=(260, 346), time_num=9) 215 | elapsed_time = time.time() - start_time 216 | print(f"Function execution time: {elapsed_time:.4f} seconds") 217 | 218 | # save_dir = '../vis/event_accu_frame/' 219 | # for time_idx in range(len(img)): 220 | # fig = plt.figure() 221 | # fig.suptitle('event_accu_frame') 222 | # plt.imshow(img[time_idx], cmap='gray') 223 | # plt.xlabel("x [pixels]") 224 | # plt.ylabel("y [pixels]") 225 | # plt.colorbar() 226 | # plt.savefig(save_dir + str(time_idx) + '.jpg') 227 | # print("Finish.") 228 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # THUE-ACT-50: A Real-World Event-Based Action Recognition Benchmark 2 | 3 | > **📢 Update:** We are excited to announce the release of a larger and more comprehensive dataset, **THUMV-EACT-50**, which extends the THUE-ACT-50 to include multi-view action recognition. For more details, please visit [THU-MV-EACT-50](https://github.com/lujiaxuan0520/THU-MV-EACT-50). 4 | 5 | Introduced by the paper "[Action Recognition and Benchmark Using Event Cameras](https://ieeexplore.ieee.org/abstract/document/10198747)" in TPAMI 2023, **THUE-ACT-50** stands as a large-scale, real-world event-specific action recognition dataset with more than 4 times the size of the current largest event-based action recognition dataset. It contains 50 action categories and is primarily designed for whole-body motions and indoor healthcare applications. This repository provides access to the dataset, alongside detailed information about its contents and structure. 6 | 7 | Sample-sequences 8 | 9 | ## Dataset Overview 10 | 11 | **THUE-ACT-50** is designed to address the limitations of existing event-based action recognition datasets, which are often too small and limited in the range of actions they cover. The dataset consists of two parts: the standard **THUE-ACT-50** and a more challenging version,**THUE-ACT-50 CHL**, which is designed to test the robustness of algorithms under challenging conditions. 12 | 13 | The dataset comprises a diverse set of action categories, including whole-body motions, indoor healthcare applications, detail-oriented actions, confusing actions, human-object interactions, and two-player interactive movements. With a total of 10,500 video recordings for the standard **THUE-ACT-50** and 2,330 recordings for the challenging **THUE-ACT-50 CHL**, this dataset provides an extensive and varied collection of action sequences for researchers to explore and evaluate their models. 14 | 15 | ## Dataset Description 16 | 17 | ### Standard THUE-ACT-50 18 | 19 | - 50 event-specific action categories 20 | - 105 socially recruited subjects 21 | - 10,500 video recordings 22 | - CeleX-V event camera with a spatial resolution of 1280x800 23 | - Two oblique front views of the actor 24 | 25 | ### Challenging THUE-ACT-50 CHL 26 | 27 | - Challenging scenarios with different illumination conditions and action magnitudes 28 | - 50 event-specific action categories 29 | - 18 on-campus students as subjects 30 | - 2,330 video recordings 31 | - DAVIS346 event camera with a spatial resolution of 346x260 32 | - Front, left, right, and back views 33 | - Two different scenarios: long corridor and open hall 34 | - Challenging conditions including: 35 | Different-light 36 | 37 | ## List of Actions 38 | 39 | | ID | Action | ID | Action | ID | Action | ID | Action | ID | Action | 40 | |-----------|-------------------------|-----------|-------------------------|-----------|-------------------------|-----------|-------------------------|-----------|---------------------------------------| 41 | | A0 | Walking | A10 | Cross arms | A20 | Calling with phone | A30 | Fan | A40 | Check time | 42 | | A1 | Running | A11 | Salute | A21 | Reading | A31 | Open umbrella | A41 | Drink water | 43 | | A2 | Jump up | A12 | Squat down | A22 | Tai chi | A32 | Close umbrella | A42 | Wipe face | 44 | | A3 | Running in circles | A13 | Sit down | A23 | Swing objects | A33 | Put on glasses | A43 | Long jump | 45 | | A4 | Falling down | A14 | Stand up | A24 | Throw | A34 | Take off glasses | A44 | Push up | 46 | | A5 | Waving one hand | A15 | Sit and stand | A25 | Staggering | A35 | Pick up | A45 | Sit up | 47 | | A6 | Waving two hands | A16 | Knead face | A26 | Headache | A36 | Put on bag | A46 | Shake hands (two-players) | 48 | | A7 | Clap | A17 | Nod head | A27 | Stomachache | A37 | Take off bag | A47 | Fighting (two-players) | 49 | | A8 | Rub hands | A18 | Shake head | A28 | Back pain | A38 | Put object into bag | A48 | Handing objects (two-players) | 50 | | A9 | Punch | A19 | Thumb up | A29 | Vomit | A39 | Take object out of bag | A49 | Lifting chairs (two-players) | 51 | 52 | ## Evaluation Criteria 53 | 54 | To evaluate the performance of event-based action recognition methods on the **THUE-ACT-50** and **THUE-ACT-50 CHL** datasets, we divided the subjects in a ratio of 8:2 to create disjoint identity sets for training and testing. The training and test sets of the **THUE-ACT-50** dataset contain 85 and 20 persons, respectively, while the training and test sets of the **THUE-ACT-50 CHL** dataset contain 14 and 4 persons, respectively. 55 | 56 | We report the following evaluation metrics for each dataset: 57 | 58 | - **Top-1 Accuracy:** The percentage of test videos for which the model correctly predicts the action category with the highest confidence. 59 | - **Top-N Accuracy:** The percentage of test videos for which the correct action category is within the top N predictions made by the model. 60 | 61 | ## Dataset Download 62 | 63 | We're pleased to announce the release of the **THUE-ACT-50** and **THUE-ACT-50 CHL** datasets. 64 | 65 | ### **THUE-ACT-50** 66 | 67 | + **OneDrive:** [Download Here](https://mailstsinghuaeducn-my.sharepoint.com/:u:/g/personal/lujx20_mails_tsinghua_edu_cn/EVAfzCmMfH9KtQhHh37hCFIBXrszDqLXtOfjBir2__GTjg?e=rnUht0) 68 | + **BaiduYun:** [Download Here](https://pan.baidu.com/s/1ohCswORXFMyEho3A6nKnSg) (Access Code: `4csp`) 69 | 70 | *Note*: After decompression, the dataset will require about 332GB of storage space. 71 | 72 | ### **THUE-ACT-50 CHL** 73 | 74 | + **Google Drive:** [Download Here](https://drive.google.com/file/d/1a5r6cw0nVX0Xe-ZzVLAhEwa9oMm4MUbS/view?usp=sharing) 75 | + **BaiduYun:** [Download Here](https://pan.baidu.com/s/1R6Q2U5By_h16S_TdkCRM4A) (Access Code: `fdnd`) 76 | 77 | *Note*: After decompression, the dataset will occupy approximately 4.6GB of storage space. 78 | 79 | ## Dataset Format 80 | 81 | In the two datasets, the division for training and test sets can be found in the `train.txt` and `test.txt` files, respectively. Each line consists of **File Name** and **Action ID**. 82 | 83 | The preprocessing operations for the 2 datasets can be found in `dataset.py`. 84 | 85 | ### **THUE-ACT-50** 86 | 87 | In the THU-EACT-50 dataset, which is provided in the .csv format, the data is structured with 5 columns as follows: 88 | 89 | + y: Represents the y-coordinate of the event. 90 | + x: Represents the x-coordinate of the event. 91 | + b: This is an additional brightness value provided by the CeleX-V camera. It's worth noting that for our method, this value is not utilized. 92 | + p: The polarity value. It contains three categories: 1, -1, and 0. In our experiments, we ignore the 0 values and consider 1 as positive polarity and -1 as negative polarity. 93 | + t: Represents the timestamp of the event. 94 | 95 | ### **THUE-ACT-50 CHL** 96 | 97 | For the THU-EACT-50-CHL dataset, which is available in the .npy format, each line contains 4 elements: 98 | 99 | + x: Represents the x-coordinate of the event. 100 | + y: Represents the y-coordinate of the event. 101 | + t: Represents the timestamp of the event. 102 | + p: The polarity value. In this dataset, the polarity only includes standard values of 1 and 0. Here, 1 represents positive polarity, and 0 represents negative polarity. 103 | 104 | ## Acknowledgements 105 | 106 | We would like to express our sincere gratitude to Tsinghua University, partner companies, and organizations for their invaluable support and collaboration in making this dataset possible. Additionally, we extend our thanks to all the volunteers who participated in the data collection process. Their contributions have been instrumental in the development and evaluation of this benchmark. 107 | 108 | ## License 109 | 110 | This dataset is licensed under the MIT License. 111 | 112 | 113 | ## Citing Our Work 114 | 115 | If you find this dataset beneficial for your research, please cite our works: 116 | 117 | ```bibtex 118 | @article{gao2023action, 119 | title={Action Recognition and Benchmark Using Event Cameras}, 120 | author={Gao, Yue and Lu, Jiaxuan and Li, Siqi and Ma, Nan and Du, Shaoyi and Li, Yipeng and Dai, Qionghai}, 121 | journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, 122 | year={2023}, 123 | volume={45}, 124 | number={12}, 125 | pages={14081-14097}, 126 | publisher={IEEE} 127 | } 128 | 129 | @article{gao2024hypergraph, 130 | title={Hypergraph-Based Multi-View Action Recognition Using Event Cameras}, 131 | author={Gao, Yue and Lu, Jiaxuan and Li, Siqi and Li, Yipeng and Du, Shaoyi}, 132 | journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, 133 | year={2024}, 134 | publisher={IEEE} 135 | } 136 | ``` 137 | -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import re 4 | from os import listdir 5 | from os.path import join 6 | from scipy import io 7 | import pandas as pd 8 | # from torch.utils.data import DataLoader, Dataset 9 | from process import * 10 | 11 | repr_map = {'eventFrame':get_eventFrame, 12 | 'eventAccuFrame':get_eventAccuFrame, 13 | 'timeSurface':get_timeSurface, 14 | 'eventCount':get_eventCount} 15 | 16 | # left or right move all event locations randomly 17 | def random_shift_events(events, max_shift=20, resolution=(180, 240)): 18 | H, W = resolution 19 | x_shift, y_shift = np.random.randint(-max_shift, max_shift+1, size=(2,)) 20 | events[:,0] += x_shift 21 | events[:,1] += y_shift 22 | 23 | valid_events = (events[:,0] >= 0) & (events[:,0] < W) & (events[:,1] >= 0) & (events[:,1] < H) 24 | events = events[valid_events] 25 | 26 | return events 27 | 28 | # flip half of the event images along the x dimension 29 | def random_flip_events_along_x(events, resolution=(180, 240), p=0.5): 30 | H, W = resolution 31 | if np.random.random() < p: 32 | events[:,0] = W - 1 - events[:,0] 33 | return events 34 | 35 | 36 | 37 | class DHP19: 38 | def __init__(self, datafile="../DHP19", eval=False, augmentation=False, camera_id=3, 39 | repr=['timeSurface'], time_num=9): 40 | list_file_name = join(datafile,"test.txt") if eval else join(datafile,"train.txt") 41 | 42 | self.files = [] 43 | self.labels = [] 44 | self.augmentation = augmentation 45 | self.camera_id = camera_id 46 | 47 | self.repr = repr 48 | self.time_num = time_num 49 | 50 | list_file = open(list_file_name, "r") 51 | for line in list_file: 52 | file, label = line.split(" ") 53 | self.files.append(file) 54 | self.labels.append(int(label)) 55 | list_file.close() 56 | 57 | self.classes = np.unique(self.labels) 58 | 59 | def __len__(self): 60 | return len(self.files) 61 | 62 | def __getitem__(self, idx): 63 | """ 64 | returns events and label, loading events from aedat 65 | :param idx: 66 | :return: x,y,t,p, label 67 | """ 68 | label = self.labels[idx] 69 | f = self.files[idx] 70 | whole_events = io.loadmat(f)['events'].astype(np.float32) 71 | 72 | # Important for DHP19 73 | # choose the camera_id for training and testing 74 | events = whole_events[whole_events[:, -1] == self.camera_id][:,:-1] 75 | 76 | # normalize the timestamps 77 | _min = events[:,2].min() 78 | _max = events[:,2].max() 79 | events[:,2] = (events[:,2] - _min) / (_max - _min) 80 | 81 | # change the original (x.y) ([1,346],[1,260]) to ([0,345],[0,259]) 82 | events[:, 0] = events[:, 0] - 1 83 | events[:, 1] = events[:, 1] - 1 84 | 85 | if self.augmentation: 86 | events = random_shift_events(events) 87 | events = random_flip_events_along_x(events) 88 | 89 | # return events, label 90 | 91 | reprs = [] 92 | for repr_name in self.repr: 93 | repr_array = repr_map[repr_name](events[:, 2], events[:, 0].astype(np.int32), events[:, 1].astype(np.int32), events[:, 3], 94 | repr_size=(260, 346), time_num=self.time_num) 95 | 96 | # standardization 97 | # mu = np.mean(repr_array) 98 | # sigma = np.std(repr_array) 99 | # repr_array = (repr_array - mu) / sigma 100 | 101 | reprs.append(repr_array) 102 | 103 | reprs = np.array(reprs) 104 | return reprs, label 105 | 106 | 107 | class THU_EACT_50_CHL: 108 | def __init__(self, datafile="../THU-EACT-50-CHL", eval=False, augmentation=False, 109 | repr=['timeSurface'], time_num=9, ret_file_name=False, demo=False): 110 | list_file_name = join(datafile,"test.txt") if eval else join(datafile,"train.txt") 111 | if demo: 112 | list_file_name = join(datafile, "test-demo.txt") if eval else join(datafile, "train-demo.txt") 113 | 114 | self.files = [] 115 | self.labels = [] 116 | self.augmentation = augmentation 117 | self.datafile = datafile 118 | 119 | self.repr = repr 120 | self.time_num = time_num 121 | self.ret_file_name = ret_file_name 122 | 123 | list_file = open(list_file_name, "r") 124 | for line in list_file: 125 | file, label = line.split(" ") 126 | self.files.append(file) 127 | self.labels.append(int(label)) 128 | list_file.close() 129 | 130 | self.classes = np.unique(self.labels) 131 | 132 | def __len__(self): 133 | return len(self.files) 134 | 135 | def __getitem__(self, idx): 136 | """ 137 | returns events and label, loading events from aedat 138 | :param idx: 139 | :return: x,y,t,p, label 140 | """ 141 | label = self.labels[idx] 142 | f = self.files[idx] 143 | f = f.split('DVS-action-data-npy/')[-1] 144 | f = os.path.join(self.datafile, f) 145 | 146 | events = np.load(f).astype(np.float32) 147 | 148 | # normalize the timestamps 149 | _min = events[:,2].min() 150 | _max = events[:,2].max() 151 | events[:,2] = (events[:,2] - _min) / (_max - _min) 152 | 153 | if self.augmentation: 154 | events = random_shift_events(events) 155 | events = random_flip_events_along_x(events) 156 | 157 | reprs = [] 158 | for repr_name in self.repr: 159 | repr_array = repr_map[repr_name](events[:, 2], events[:, 0].astype(np.int32), events[:, 1].astype(np.int32), 160 | events[:, 3], 161 | repr_size=(260, 346), time_num=self.time_num) 162 | 163 | # standardization 164 | # mu = np.mean(repr_array) 165 | # sigma = np.std(repr_array) 166 | # repr_array = (repr_array - mu) / sigma 167 | 168 | reprs.append(repr_array) 169 | 170 | reprs = np.array(reprs) 171 | if self.ret_file_name: 172 | # file_name = re.findall(r'A[\w-]+', f)[0] 173 | file_name = f.split('/')[-1].split('.')[0] 174 | return reprs, label, file_name 175 | else: 176 | return reprs, label 177 | 178 | 179 | class THU_EACT_50: 180 | def __init__(self, datafile="../THU_EACT_50", mode="front", eval=False, augmentation=False, max_points=1000000, 181 | repr=['timeSurface'], time_num=9): 182 | list_file_name = None 183 | if mode == "front": # front views (C1-C2) 184 | list_file_name = join(datafile,"test.txt") if eval else join(datafile,"train.txt") 185 | elif mode.startswith("view_"): # just a single view 186 | list_file_name = join(datafile, "test_" + mode + ".txt") if eval else join(datafile, "train_" + mode + ".txt") 187 | 188 | self.files = [] 189 | self.labels = [] 190 | self.augmentation = augmentation 191 | self.max_points = max_points 192 | self.datafile = datafile 193 | 194 | self.repr = repr 195 | self.time_num = time_num 196 | 197 | list_file = open(list_file_name, "r") 198 | for line in list_file: 199 | file, label = line.split(",") 200 | self.files.append(file) 201 | self.labels.append(int(label)) 202 | list_file.close() 203 | 204 | self.classes = np.unique(self.labels) 205 | 206 | def __len__(self): 207 | return len(self.files) 208 | 209 | def __getitem__(self, idx): 210 | """ 211 | returns events and label, loading events from aedat 212 | :param idx: 213 | :return: x,y,t,p, label 214 | """ 215 | label = self.labels[idx] 216 | f = os.path.join(self.datafile, self.files[idx]) 217 | 218 | 219 | # read the raw csv data and calculate the representations 220 | pd_reader = pd.read_csv(f, header=None).values 221 | events = np.vstack((pd_reader[:, 1], pd_reader[:, 0], pd_reader[:, 4], pd_reader[:, 3])).T.astype(np.float32) 222 | events = events[events[:,3]!=0.] # delete all the points that have the polarity of 0 223 | 224 | # normalize the timestamps 225 | _min = events[:,2].min() 226 | _max = events[:,2].max() 227 | events[:,2] = (events[:,2] - _min) / (_max - _min) 228 | 229 | 230 | if self.augmentation: 231 | events = random_shift_events(events) 232 | events = random_flip_events_along_x(events) 233 | 234 | reprs = [] 235 | for repr_name in self.repr: 236 | repr_array = repr_map[repr_name](events[:, 2], events[:, 0].astype(np.int32), events[:, 1].astype(np.int32), 237 | events[:, 3], repr_size=(800, 1280), time_num=self.time_num) 238 | # standardization 239 | # mu = np.mean(repr_array) 240 | # sigma = np.std(repr_array) 241 | # repr_array = (repr_array - mu) / sigma 242 | 243 | reprs.append(repr_array) 244 | reprs = np.array(reprs) 245 | return reprs, label 246 | 247 | 248 | if __name__ == '__main__': 249 | # for THU-EACT-50 250 | data_directory = "H:/Event_camera_action/THU-EACT-50" 251 | repr = ['timeSurface'] 252 | dataset = THU_EACT_50(datafile=data_directory, mode="front", eval=True, augmentation=False, repr=repr) 253 | 254 | # for THU-EACT-50-CHL 255 | # data_directory = "H:/Event_camera_action/THU-EACT-50-CHL" 256 | # repr = ['timeSurface'] 257 | # dataset = THU_EACT_50_CHL(datafile=data_directory, eval=True, augmentation=False, repr=repr) 258 | 259 | index_to_test = 0 # index of the sample you want to test 260 | single_sample_reprs, single_sample_label = dataset.__getitem__(index_to_test) 261 | 262 | # Output the results 263 | print("Representation Shape:", single_sample_reprs.shape) 264 | print("Label:", single_sample_label) --------------------------------------------------------------------------------