├── figures
├── different-light.jpg
└── sample-sequences.jpg
├── LICENSE
├── process.py
├── README.md
└── dataset.py
/figures/different-light.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lujiaxuan0520/THU-EACT-50/HEAD/figures/different-light.jpg
--------------------------------------------------------------------------------
/figures/sample-sequences.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lujiaxuan0520/THU-EACT-50/HEAD/figures/sample-sequences.jpg
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2023 Tsinghua University
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this data and associated documentation files (the “Data”), to deal in the Data without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Data, and to permit persons to whom the Data is furnished to do so, subject to the following conditions:
6 |
7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Data.
8 |
9 | THE Data IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE Data OR THE USE OR OTHER DEALINGS IN THE Data.
--------------------------------------------------------------------------------
/process.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy import io
3 | import numba
4 | import time
5 | # from matplotlib import pyplot as plt
6 |
7 | @numba.jit(nopython=True) # for accelerating
8 | def get_eventFrame(ts, x, y, p, repr_size=(260,346), time_num=1):
9 | """
10 | get the event frame with multi time window of the events
11 | :param ts:
12 | :param x:
13 | :param y:
14 | :param p:
15 | :param repr_size:
16 | :param time_num: split how many windows in the temporal dimension
17 | :return: numpy with the shape (time_num,repr_size[0],repr_size[1])
18 | """
19 |
20 | img = np.zeros(shape=(time_num,repr_size[0],repr_size[1]), dtype=np.float32)
21 |
22 | # process each temporal window
23 | batch_bum = int(ts.size / time_num)
24 | for time_idx in range(time_num):
25 | # extract the corresponding info
26 | ts_part = ts[time_idx * batch_bum: (time_idx + 1) * batch_bum]
27 | p_part = p[time_idx * batch_bum: (time_idx + 1) * batch_bum]
28 | x_part = x[time_idx * batch_bum: (time_idx + 1) * batch_bum]
29 | y_part = y[time_idx * batch_bum: (time_idx + 1) * batch_bum]
30 |
31 | # change polarity to (0,1) if it is (1,-1)
32 | p_part = ((p_part + 1) / 2).astype(np.int32)
33 |
34 | for i in range(len(ts_part)):
35 | img[time_idx, y_part[i], x_part[i]] = (2.0 * p_part[i] - 1)
36 |
37 | # draw image
38 | # fig = plt.figure()
39 | # fig.suptitle('Event Frame')
40 | # plt.imshow(img[time_idx], cmap='gray')
41 | # plt.xlabel("x [pixels]")
42 | # plt.ylabel("y [pixels]")
43 | # plt.colorbar()
44 | # # plt.savefig('event_frame.jpg')
45 | # plt.show()
46 | return img
47 |
48 | @numba.jit(nopython=True) # for accelerating
49 | def get_eventCount(ts, x, y, p, repr_size=(260, 346), time_num=1):
50 | """
51 | get the event frame with multi time window of the events
52 | :param ts:
53 | :param x:
54 | :param y:
55 | :param p:
56 | :param repr_size:
57 | :param time_num: split how many windows in the temporal dimension
58 | :return: numpy with the shape (time_num, repr_size[0], repr_size[1])
59 | """
60 |
61 | img = np.zeros(shape=(time_num, repr_size[0], repr_size[1]), dtype=np.float32)
62 |
63 | # process each temporal window
64 | batch_bum = int(ts.size / time_num)
65 | for time_idx in range(time_num):
66 | # extract the corresponding info
67 | ts_part = ts[time_idx * batch_bum: (time_idx + 1) * batch_bum]
68 | p_part = p[time_idx * batch_bum: (time_idx + 1) * batch_bum]
69 | x_part = x[time_idx * batch_bum: (time_idx + 1) * batch_bum]
70 | y_part = y[time_idx * batch_bum: (time_idx + 1) * batch_bum]
71 |
72 | # change polarity to (0,1) if it is (1,-1)
73 | p_part = ((p_part + 1) / 2).astype(np.int32)
74 |
75 | # count number of points on each pixel
76 | pixel_counts = np.zeros(shape=(repr_size[0], repr_size[1]), dtype=np.int32)
77 | for i in range(len(ts_part)):
78 | pixel_counts[y_part[i], x_part[i]] += 1
79 |
80 | img[time_idx, :, :] = pixel_counts
81 |
82 | # normalize to (-1,1)
83 | img = 2 * ((img - img.min()) / (img.max() - img.min())) - 1
84 | return img
85 |
86 |
87 | @numba.jit(nopython=True) # for accelerating
88 | def get_eventAccuFrame(ts, x, y, p, repr_size=(260,346), time_num=1):
89 | """
90 | get the event accumulate frame with multi time window of the events
91 | :param ts:
92 | :param x:
93 | :param y:
94 | :param p:
95 | :param repr_size:
96 | :param time_num: split how many windows in the temporal dimension
97 | :return: numpy with the shape (time_num,repr_size[0],repr_size[1])
98 | """
99 |
100 | img = np.zeros(shape=(time_num,repr_size[0],repr_size[1]), dtype=np.float32)
101 |
102 | # process each temporal window
103 | batch_bum = int(ts.size / time_num)
104 | for time_idx in range(time_num):
105 | # extract the corresponding info
106 | ts_part = ts[time_idx * batch_bum: (time_idx + 1) * batch_bum]
107 | p_part = p[time_idx * batch_bum: (time_idx + 1) * batch_bum]
108 | x_part = x[time_idx * batch_bum: (time_idx + 1) * batch_bum]
109 | y_part = y[time_idx * batch_bum: (time_idx + 1) * batch_bum]
110 |
111 | # change polarity to (0,1) if it is (1,-1)
112 | p_part = ((p_part + 1) / 2).astype(np.int32)
113 |
114 | for i in range(len(ts_part)):
115 | img[time_idx, y_part[i], x_part[i]] += (2.0 * p_part[i] - 1)
116 |
117 | # draw image
118 | # fig = plt.figure()
119 | # fig.suptitle('Event Frame')
120 | # plt.imshow(img[time_idx], cmap='gray')
121 | # plt.xlabel("x [pixels]")
122 | # plt.ylabel("y [pixels]")
123 | # plt.colorbar()
124 | # # plt.savefig('event_frame.jpg')
125 | # plt.show()
126 | # normalize to (-1,1)
127 | # img = 2 * ((img - img.min()) / (img.max() - img.min())) - 1
128 | return img
129 |
130 |
131 | @numba.jit(nopython=True) # for accelerating
132 | def get_timeSurface(ts, x, y, p, repr_size=(260,346), time_num=1):
133 | """
134 | get the time surface with multi time window of the events
135 | :param ts:
136 | :param x:
137 | :param y:
138 | :param p:
139 | :param repr_size:
140 | :param time_num: split how many windows in the temporal dimension
141 | :return: numpy with the shape (time_num,repr_size[0],repr_size[1])
142 | """
143 |
144 | # parameters for Time Surface
145 | tau = 50e-3 # 50ms
146 |
147 | # sae = np.zeros(repr_size, np.float32)
148 | sae = np.zeros((time_num,repr_size[0],repr_size[1]), np.float32)
149 |
150 | # process each temporal window
151 | batch_bum = int(ts.size / time_num)
152 | for time_idx in range(time_num):
153 | # extract the corresponding info
154 | ts_part = ts[time_idx * batch_bum: (time_idx + 1) * batch_bum]
155 | p_part = p[time_idx * batch_bum: (time_idx + 1) * batch_bum]
156 | x_part = x[time_idx * batch_bum: (time_idx + 1) * batch_bum]
157 | y_part = y[time_idx * batch_bum: (time_idx + 1) * batch_bum]
158 |
159 | # calculate timesurface using expotential decay
160 | t_ref = ts_part[-1] # 'current' time
161 | for i in range(len(ts_part)):
162 | if (p_part[i] > 0):
163 | sae[time_idx, y_part[i], x_part[i]] = np.exp(-(t_ref - ts_part[i]) / tau)
164 | else:
165 | sae[time_idx, y_part[i], x_part[i]] = -np.exp(-(t_ref - ts_part[i]) / tau)
166 |
167 | ## none-polarity Timesurface
168 | # sae[y[i], x[i]] = np.exp(-(t_ref-ts[i]) / tau)
169 |
170 | # fig = plt.figure()
171 | # fig.suptitle('Time surface')
172 | # plt.imshow(sae[time_idx], cmap='gray')
173 | # plt.xlabel("x [pixels]")
174 | # plt.ylabel("y [pixels]")
175 | # plt.colorbar()
176 | # # plt.savefig('time_surface.jpg')
177 | # plt.show()
178 | return sae
179 |
180 |
181 | if __name__ == '__main__':
182 | file_name = "/home/Event_camera_action/DHP19/h5_dataset_7500_events/346x260/S10_session1_mov6_7500events.mat"
183 | whole_events = io.loadmat(file_name)['events'].astype(np.float32)
184 |
185 | # Important for DHP19
186 | # choose the camera_id for trainging and testing
187 | events = whole_events[whole_events[:, -1] == 0][:, :-1]
188 |
189 | # normalize the timestamps
190 | _min = events[:, 2].min()
191 | _max = events[:, 2].max()
192 | events[:, 2] = (events[:, 2] - _min) / (_max - _min)
193 |
194 | # change the original (x.y) ([1,346],[1,260]) to ([0,345],[0,259])
195 | events[:, 0] = events[:, 0] - 1
196 | events[:, 1] = events[:, 1] - 1
197 |
198 | # randomly choose part of the events, avoiding too large events for OOM
199 | row_total = events.shape[0]
200 | row_needed = int(1.0 * row_total)
201 | row_needed = min(row_needed, 1000000)
202 | row_sequence = np.random.choice(row_total, row_needed, replace=False, p=None)
203 | row_sequence.sort()
204 | events = events[row_sequence, :]
205 |
206 | start_time = time.time()
207 | # img = get_timeSurface(events[:,2], events[:,0].astype(np.int32), events[:,1].astype(np.int32), events[:,3],
208 | # repr_size=(260, 346), time_num=9)
209 | img = get_eventFrame(events[:, 2], events[:, 0].astype(np.int32), events[:, 1].astype(np.int32), events[:, 3],
210 | repr_size=(260, 346), time_num=9)
211 | # img = get_eventCount(events[:, 2], events[:, 0].astype(np.int32), events[:, 1].astype(np.int32), events[:, 3],
212 | # repr_size=(260, 346), time_num=9)
213 | # img = get_eventAccuFrame(events[:, 2], events[:, 0].astype(np.int32), events[:, 1].astype(np.int32), events[:, 3],
214 | # repr_size=(260, 346), time_num=9)
215 | elapsed_time = time.time() - start_time
216 | print(f"Function execution time: {elapsed_time:.4f} seconds")
217 |
218 | # save_dir = '../vis/event_accu_frame/'
219 | # for time_idx in range(len(img)):
220 | # fig = plt.figure()
221 | # fig.suptitle('event_accu_frame')
222 | # plt.imshow(img[time_idx], cmap='gray')
223 | # plt.xlabel("x [pixels]")
224 | # plt.ylabel("y [pixels]")
225 | # plt.colorbar()
226 | # plt.savefig(save_dir + str(time_idx) + '.jpg')
227 | # print("Finish.")
228 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # THUE-ACT-50: A Real-World Event-Based Action Recognition Benchmark
2 |
3 | > **📢 Update:** We are excited to announce the release of a larger and more comprehensive dataset, **THUMV-EACT-50**, which extends the THUE-ACT-50 to include multi-view action recognition. For more details, please visit [THU-MV-EACT-50](https://github.com/lujiaxuan0520/THU-MV-EACT-50).
4 |
5 | Introduced by the paper "[Action Recognition and Benchmark Using Event Cameras](https://ieeexplore.ieee.org/abstract/document/10198747)" in TPAMI 2023, **THUE-ACT-50** stands as a large-scale, real-world event-specific action recognition dataset with more than 4 times the size of the current largest event-based action recognition dataset. It contains 50 action categories and is primarily designed for whole-body motions and indoor healthcare applications. This repository provides access to the dataset, alongside detailed information about its contents and structure.
6 |
7 |
8 |
9 | ## Dataset Overview
10 |
11 | **THUE-ACT-50** is designed to address the limitations of existing event-based action recognition datasets, which are often too small and limited in the range of actions they cover. The dataset consists of two parts: the standard **THUE-ACT-50** and a more challenging version,**THUE-ACT-50 CHL**, which is designed to test the robustness of algorithms under challenging conditions.
12 |
13 | The dataset comprises a diverse set of action categories, including whole-body motions, indoor healthcare applications, detail-oriented actions, confusing actions, human-object interactions, and two-player interactive movements. With a total of 10,500 video recordings for the standard **THUE-ACT-50** and 2,330 recordings for the challenging **THUE-ACT-50 CHL**, this dataset provides an extensive and varied collection of action sequences for researchers to explore and evaluate their models.
14 |
15 | ## Dataset Description
16 |
17 | ### Standard THUE-ACT-50
18 |
19 | - 50 event-specific action categories
20 | - 105 socially recruited subjects
21 | - 10,500 video recordings
22 | - CeleX-V event camera with a spatial resolution of 1280x800
23 | - Two oblique front views of the actor
24 |
25 | ### Challenging THUE-ACT-50 CHL
26 |
27 | - Challenging scenarios with different illumination conditions and action magnitudes
28 | - 50 event-specific action categories
29 | - 18 on-campus students as subjects
30 | - 2,330 video recordings
31 | - DAVIS346 event camera with a spatial resolution of 346x260
32 | - Front, left, right, and back views
33 | - Two different scenarios: long corridor and open hall
34 | - Challenging conditions including:
35 |
36 |
37 | ## List of Actions
38 |
39 | | ID | Action | ID | Action | ID | Action | ID | Action | ID | Action |
40 | |-----------|-------------------------|-----------|-------------------------|-----------|-------------------------|-----------|-------------------------|-----------|---------------------------------------|
41 | | A0 | Walking | A10 | Cross arms | A20 | Calling with phone | A30 | Fan | A40 | Check time |
42 | | A1 | Running | A11 | Salute | A21 | Reading | A31 | Open umbrella | A41 | Drink water |
43 | | A2 | Jump up | A12 | Squat down | A22 | Tai chi | A32 | Close umbrella | A42 | Wipe face |
44 | | A3 | Running in circles | A13 | Sit down | A23 | Swing objects | A33 | Put on glasses | A43 | Long jump |
45 | | A4 | Falling down | A14 | Stand up | A24 | Throw | A34 | Take off glasses | A44 | Push up |
46 | | A5 | Waving one hand | A15 | Sit and stand | A25 | Staggering | A35 | Pick up | A45 | Sit up |
47 | | A6 | Waving two hands | A16 | Knead face | A26 | Headache | A36 | Put on bag | A46 | Shake hands (two-players) |
48 | | A7 | Clap | A17 | Nod head | A27 | Stomachache | A37 | Take off bag | A47 | Fighting (two-players) |
49 | | A8 | Rub hands | A18 | Shake head | A28 | Back pain | A38 | Put object into bag | A48 | Handing objects (two-players) |
50 | | A9 | Punch | A19 | Thumb up | A29 | Vomit | A39 | Take object out of bag | A49 | Lifting chairs (two-players) |
51 |
52 | ## Evaluation Criteria
53 |
54 | To evaluate the performance of event-based action recognition methods on the **THUE-ACT-50** and **THUE-ACT-50 CHL** datasets, we divided the subjects in a ratio of 8:2 to create disjoint identity sets for training and testing. The training and test sets of the **THUE-ACT-50** dataset contain 85 and 20 persons, respectively, while the training and test sets of the **THUE-ACT-50 CHL** dataset contain 14 and 4 persons, respectively.
55 |
56 | We report the following evaluation metrics for each dataset:
57 |
58 | - **Top-1 Accuracy:** The percentage of test videos for which the model correctly predicts the action category with the highest confidence.
59 | - **Top-N Accuracy:** The percentage of test videos for which the correct action category is within the top N predictions made by the model.
60 |
61 | ## Dataset Download
62 |
63 | We're pleased to announce the release of the **THUE-ACT-50** and **THUE-ACT-50 CHL** datasets.
64 |
65 | ### **THUE-ACT-50**
66 |
67 | + **OneDrive:** [Download Here](https://mailstsinghuaeducn-my.sharepoint.com/:u:/g/personal/lujx20_mails_tsinghua_edu_cn/EVAfzCmMfH9KtQhHh37hCFIBXrszDqLXtOfjBir2__GTjg?e=rnUht0)
68 | + **BaiduYun:** [Download Here](https://pan.baidu.com/s/1ohCswORXFMyEho3A6nKnSg) (Access Code: `4csp`)
69 |
70 | *Note*: After decompression, the dataset will require about 332GB of storage space.
71 |
72 | ### **THUE-ACT-50 CHL**
73 |
74 | + **Google Drive:** [Download Here](https://drive.google.com/file/d/1a5r6cw0nVX0Xe-ZzVLAhEwa9oMm4MUbS/view?usp=sharing)
75 | + **BaiduYun:** [Download Here](https://pan.baidu.com/s/1R6Q2U5By_h16S_TdkCRM4A) (Access Code: `fdnd`)
76 |
77 | *Note*: After decompression, the dataset will occupy approximately 4.6GB of storage space.
78 |
79 | ## Dataset Format
80 |
81 | In the two datasets, the division for training and test sets can be found in the `train.txt` and `test.txt` files, respectively. Each line consists of **File Name** and **Action ID**.
82 |
83 | The preprocessing operations for the 2 datasets can be found in `dataset.py`.
84 |
85 | ### **THUE-ACT-50**
86 |
87 | In the THU-EACT-50 dataset, which is provided in the .csv format, the data is structured with 5 columns as follows:
88 |
89 | + y: Represents the y-coordinate of the event.
90 | + x: Represents the x-coordinate of the event.
91 | + b: This is an additional brightness value provided by the CeleX-V camera. It's worth noting that for our method, this value is not utilized.
92 | + p: The polarity value. It contains three categories: 1, -1, and 0. In our experiments, we ignore the 0 values and consider 1 as positive polarity and -1 as negative polarity.
93 | + t: Represents the timestamp of the event.
94 |
95 | ### **THUE-ACT-50 CHL**
96 |
97 | For the THU-EACT-50-CHL dataset, which is available in the .npy format, each line contains 4 elements:
98 |
99 | + x: Represents the x-coordinate of the event.
100 | + y: Represents the y-coordinate of the event.
101 | + t: Represents the timestamp of the event.
102 | + p: The polarity value. In this dataset, the polarity only includes standard values of 1 and 0. Here, 1 represents positive polarity, and 0 represents negative polarity.
103 |
104 | ## Acknowledgements
105 |
106 | We would like to express our sincere gratitude to Tsinghua University, partner companies, and organizations for their invaluable support and collaboration in making this dataset possible. Additionally, we extend our thanks to all the volunteers who participated in the data collection process. Their contributions have been instrumental in the development and evaluation of this benchmark.
107 |
108 | ## License
109 |
110 | This dataset is licensed under the MIT License.
111 |
112 |
113 | ## Citing Our Work
114 |
115 | If you find this dataset beneficial for your research, please cite our works:
116 |
117 | ```bibtex
118 | @article{gao2023action,
119 | title={Action Recognition and Benchmark Using Event Cameras},
120 | author={Gao, Yue and Lu, Jiaxuan and Li, Siqi and Ma, Nan and Du, Shaoyi and Li, Yipeng and Dai, Qionghai},
121 | journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
122 | year={2023},
123 | volume={45},
124 | number={12},
125 | pages={14081-14097},
126 | publisher={IEEE}
127 | }
128 |
129 | @article{gao2024hypergraph,
130 | title={Hypergraph-Based Multi-View Action Recognition Using Event Cameras},
131 | author={Gao, Yue and Lu, Jiaxuan and Li, Siqi and Li, Yipeng and Du, Shaoyi},
132 | journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
133 | year={2024},
134 | publisher={IEEE}
135 | }
136 | ```
137 |
--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | import re
4 | from os import listdir
5 | from os.path import join
6 | from scipy import io
7 | import pandas as pd
8 | # from torch.utils.data import DataLoader, Dataset
9 | from process import *
10 |
11 | repr_map = {'eventFrame':get_eventFrame,
12 | 'eventAccuFrame':get_eventAccuFrame,
13 | 'timeSurface':get_timeSurface,
14 | 'eventCount':get_eventCount}
15 |
16 | # left or right move all event locations randomly
17 | def random_shift_events(events, max_shift=20, resolution=(180, 240)):
18 | H, W = resolution
19 | x_shift, y_shift = np.random.randint(-max_shift, max_shift+1, size=(2,))
20 | events[:,0] += x_shift
21 | events[:,1] += y_shift
22 |
23 | valid_events = (events[:,0] >= 0) & (events[:,0] < W) & (events[:,1] >= 0) & (events[:,1] < H)
24 | events = events[valid_events]
25 |
26 | return events
27 |
28 | # flip half of the event images along the x dimension
29 | def random_flip_events_along_x(events, resolution=(180, 240), p=0.5):
30 | H, W = resolution
31 | if np.random.random() < p:
32 | events[:,0] = W - 1 - events[:,0]
33 | return events
34 |
35 |
36 |
37 | class DHP19:
38 | def __init__(self, datafile="../DHP19", eval=False, augmentation=False, camera_id=3,
39 | repr=['timeSurface'], time_num=9):
40 | list_file_name = join(datafile,"test.txt") if eval else join(datafile,"train.txt")
41 |
42 | self.files = []
43 | self.labels = []
44 | self.augmentation = augmentation
45 | self.camera_id = camera_id
46 |
47 | self.repr = repr
48 | self.time_num = time_num
49 |
50 | list_file = open(list_file_name, "r")
51 | for line in list_file:
52 | file, label = line.split(" ")
53 | self.files.append(file)
54 | self.labels.append(int(label))
55 | list_file.close()
56 |
57 | self.classes = np.unique(self.labels)
58 |
59 | def __len__(self):
60 | return len(self.files)
61 |
62 | def __getitem__(self, idx):
63 | """
64 | returns events and label, loading events from aedat
65 | :param idx:
66 | :return: x,y,t,p, label
67 | """
68 | label = self.labels[idx]
69 | f = self.files[idx]
70 | whole_events = io.loadmat(f)['events'].astype(np.float32)
71 |
72 | # Important for DHP19
73 | # choose the camera_id for training and testing
74 | events = whole_events[whole_events[:, -1] == self.camera_id][:,:-1]
75 |
76 | # normalize the timestamps
77 | _min = events[:,2].min()
78 | _max = events[:,2].max()
79 | events[:,2] = (events[:,2] - _min) / (_max - _min)
80 |
81 | # change the original (x.y) ([1,346],[1,260]) to ([0,345],[0,259])
82 | events[:, 0] = events[:, 0] - 1
83 | events[:, 1] = events[:, 1] - 1
84 |
85 | if self.augmentation:
86 | events = random_shift_events(events)
87 | events = random_flip_events_along_x(events)
88 |
89 | # return events, label
90 |
91 | reprs = []
92 | for repr_name in self.repr:
93 | repr_array = repr_map[repr_name](events[:, 2], events[:, 0].astype(np.int32), events[:, 1].astype(np.int32), events[:, 3],
94 | repr_size=(260, 346), time_num=self.time_num)
95 |
96 | # standardization
97 | # mu = np.mean(repr_array)
98 | # sigma = np.std(repr_array)
99 | # repr_array = (repr_array - mu) / sigma
100 |
101 | reprs.append(repr_array)
102 |
103 | reprs = np.array(reprs)
104 | return reprs, label
105 |
106 |
107 | class THU_EACT_50_CHL:
108 | def __init__(self, datafile="../THU-EACT-50-CHL", eval=False, augmentation=False,
109 | repr=['timeSurface'], time_num=9, ret_file_name=False, demo=False):
110 | list_file_name = join(datafile,"test.txt") if eval else join(datafile,"train.txt")
111 | if demo:
112 | list_file_name = join(datafile, "test-demo.txt") if eval else join(datafile, "train-demo.txt")
113 |
114 | self.files = []
115 | self.labels = []
116 | self.augmentation = augmentation
117 | self.datafile = datafile
118 |
119 | self.repr = repr
120 | self.time_num = time_num
121 | self.ret_file_name = ret_file_name
122 |
123 | list_file = open(list_file_name, "r")
124 | for line in list_file:
125 | file, label = line.split(" ")
126 | self.files.append(file)
127 | self.labels.append(int(label))
128 | list_file.close()
129 |
130 | self.classes = np.unique(self.labels)
131 |
132 | def __len__(self):
133 | return len(self.files)
134 |
135 | def __getitem__(self, idx):
136 | """
137 | returns events and label, loading events from aedat
138 | :param idx:
139 | :return: x,y,t,p, label
140 | """
141 | label = self.labels[idx]
142 | f = self.files[idx]
143 | f = f.split('DVS-action-data-npy/')[-1]
144 | f = os.path.join(self.datafile, f)
145 |
146 | events = np.load(f).astype(np.float32)
147 |
148 | # normalize the timestamps
149 | _min = events[:,2].min()
150 | _max = events[:,2].max()
151 | events[:,2] = (events[:,2] - _min) / (_max - _min)
152 |
153 | if self.augmentation:
154 | events = random_shift_events(events)
155 | events = random_flip_events_along_x(events)
156 |
157 | reprs = []
158 | for repr_name in self.repr:
159 | repr_array = repr_map[repr_name](events[:, 2], events[:, 0].astype(np.int32), events[:, 1].astype(np.int32),
160 | events[:, 3],
161 | repr_size=(260, 346), time_num=self.time_num)
162 |
163 | # standardization
164 | # mu = np.mean(repr_array)
165 | # sigma = np.std(repr_array)
166 | # repr_array = (repr_array - mu) / sigma
167 |
168 | reprs.append(repr_array)
169 |
170 | reprs = np.array(reprs)
171 | if self.ret_file_name:
172 | # file_name = re.findall(r'A[\w-]+', f)[0]
173 | file_name = f.split('/')[-1].split('.')[0]
174 | return reprs, label, file_name
175 | else:
176 | return reprs, label
177 |
178 |
179 | class THU_EACT_50:
180 | def __init__(self, datafile="../THU_EACT_50", mode="front", eval=False, augmentation=False, max_points=1000000,
181 | repr=['timeSurface'], time_num=9):
182 | list_file_name = None
183 | if mode == "front": # front views (C1-C2)
184 | list_file_name = join(datafile,"test.txt") if eval else join(datafile,"train.txt")
185 | elif mode.startswith("view_"): # just a single view
186 | list_file_name = join(datafile, "test_" + mode + ".txt") if eval else join(datafile, "train_" + mode + ".txt")
187 |
188 | self.files = []
189 | self.labels = []
190 | self.augmentation = augmentation
191 | self.max_points = max_points
192 | self.datafile = datafile
193 |
194 | self.repr = repr
195 | self.time_num = time_num
196 |
197 | list_file = open(list_file_name, "r")
198 | for line in list_file:
199 | file, label = line.split(",")
200 | self.files.append(file)
201 | self.labels.append(int(label))
202 | list_file.close()
203 |
204 | self.classes = np.unique(self.labels)
205 |
206 | def __len__(self):
207 | return len(self.files)
208 |
209 | def __getitem__(self, idx):
210 | """
211 | returns events and label, loading events from aedat
212 | :param idx:
213 | :return: x,y,t,p, label
214 | """
215 | label = self.labels[idx]
216 | f = os.path.join(self.datafile, self.files[idx])
217 |
218 |
219 | # read the raw csv data and calculate the representations
220 | pd_reader = pd.read_csv(f, header=None).values
221 | events = np.vstack((pd_reader[:, 1], pd_reader[:, 0], pd_reader[:, 4], pd_reader[:, 3])).T.astype(np.float32)
222 | events = events[events[:,3]!=0.] # delete all the points that have the polarity of 0
223 |
224 | # normalize the timestamps
225 | _min = events[:,2].min()
226 | _max = events[:,2].max()
227 | events[:,2] = (events[:,2] - _min) / (_max - _min)
228 |
229 |
230 | if self.augmentation:
231 | events = random_shift_events(events)
232 | events = random_flip_events_along_x(events)
233 |
234 | reprs = []
235 | for repr_name in self.repr:
236 | repr_array = repr_map[repr_name](events[:, 2], events[:, 0].astype(np.int32), events[:, 1].astype(np.int32),
237 | events[:, 3], repr_size=(800, 1280), time_num=self.time_num)
238 | # standardization
239 | # mu = np.mean(repr_array)
240 | # sigma = np.std(repr_array)
241 | # repr_array = (repr_array - mu) / sigma
242 |
243 | reprs.append(repr_array)
244 | reprs = np.array(reprs)
245 | return reprs, label
246 |
247 |
248 | if __name__ == '__main__':
249 | # for THU-EACT-50
250 | data_directory = "H:/Event_camera_action/THU-EACT-50"
251 | repr = ['timeSurface']
252 | dataset = THU_EACT_50(datafile=data_directory, mode="front", eval=True, augmentation=False, repr=repr)
253 |
254 | # for THU-EACT-50-CHL
255 | # data_directory = "H:/Event_camera_action/THU-EACT-50-CHL"
256 | # repr = ['timeSurface']
257 | # dataset = THU_EACT_50_CHL(datafile=data_directory, eval=True, augmentation=False, repr=repr)
258 |
259 | index_to_test = 0 # index of the sample you want to test
260 | single_sample_reprs, single_sample_label = dataset.__getitem__(index_to_test)
261 |
262 | # Output the results
263 | print("Representation Shape:", single_sample_reprs.shape)
264 | print("Label:", single_sample_label)
--------------------------------------------------------------------------------