├── .github
    └── FUNDING.yml
├── .gitignore
├── Drivedata.md
├── LICENSE
├── README.md
├── assets
    ├── Affiliation.png
    ├── Drivedata_overview.jpg
    ├── Drivedata_timeline.jpg
    ├── cvpr24_genad_poster.png
    ├── opendv_examples.png
    ├── overview.png
    └── vista-teaser.gif
└── opendv
    ├── .gitignore
    ├── README.md
    ├── configs
        ├── download.json
        └── video2img.json
    ├── requirements.txt
    ├── scripts
        ├── meta_preprocess.py
        ├── video2img.py
        └── youtube_download.py
    └── utils
        ├── cmd2caption.py
        ├── download.py
        ├── easydict.py
        └── frame_extraction.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: [OpenDriveLab] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/Drivedata.md:
--------------------------------------------------------------------------------
   1 | <div id="top">
   2 | 
   3 | # Open-sourced Data Ecosystem in Autonomous Driving: the Present and Future
   4 | 
   5 | >  **This repo is all you need for Open-sourced Data Ecosystem in Autonomous Driving.**
   6 | 
   7 | We present comprehensive paper collections, leaderboards, and challenges.
   8 | 
   9 | <!-- ![](https://img.shields.io/badge/Record-137-673ab7.svg)
  10 | ![](https://img.shields.io/badge/License-MIT-lightgrey.svg) -->
  11 | 
  12 | ## Table of Contents
  13 | 
  14 | - [Open-sourced Data Ecosystem in Autonomous Driving: the Present and Future](#open-sourced-data-ecosystem-in-autonomous-driving-the-present-and-future)
  15 |   - [Table of Contents](#table-of-contents)
  16 |   - [Citation](#citation)
  17 |   - [Challenges and Leaderboards](#challenges-and-leaderboards)
  18 |   - [Dataset Collection](#dataset-collection)
  19 |     - [Perception](#perception)
  20 |     - [Mapping](#mapping)
  21 |     - [Prediction and Planning](#prediction-and-planning)
  22 |   <!-- - [Data Engine](#data-engine)
  23 |     - [Commercial Platforms](#commercial-platforms)
  24 |     - [Label Tools](#label-tools) -->
  25 |   - [License](#license)
  26 | 
  27 | ## Citation
  28 | If you find this project useful in your research, please consider to cite:
  29 | ```BibTeX
  30 | @misc{li2023opensourced,
  31 |       title={Open-sourced Data Ecosystem in Autonomous Driving: the Present and Future}, 
  32 |       author={Hongyang Li and Yang Li and Huijie Wang and Jia Zeng and Pinlong Cai and Huilin Xu and Dahua Lin and Junchi Yan and Feng Xu and Lu Xiong and Jingdong Wang and Futang Zhu and Kai Yan and Chunjing Xu and Tiancai Wang and Beipeng Mu and Shaoqing Ren and Zhihui Peng and Yu Qiao},
  33 |       year={2023},
  34 |       eprint={2312.03408},
  35 |       archivePrefix={arXiv},
  36 |       primaryClass={cs.CV}
  37 | }
  38 | ```
  39 | 
  40 | <p align="right">(<a href="#top">back to top</a>)</p>
  41 | 
  42 | ## Challenges and Leaderboards
  43 | 
  44 | <table>
  45 | <capital></capital>
  46 | <tr align="middle"> </tr>
  47 | <tr align="middle">
  48 |     <th >Title</th>
  49 |     <th >Host</th>
  50 |     <th >Year</th>
  51 |     <th >Task</th>
  52 |     <th >Entry</th>
  53 | </tr>
  54 | 
  55 | <tr align="middle">
  56 |       <td rowspan=7 ><a href="https://opendrivelab.com/AD23Challenge.html" target="_blank" title="Autonomous Driving Challenge">Autonomous Driving Challenge</a></td>
  57 |   	  <td rowspan=7 > OpenDriveLab</td>
  58 |       <td rowspan=7 >CVPR2023</td>
  59 |        <td>Perception / OpenLane Topology</td>
  60 |     	<td rowspan=7> 111 </td>
  61 | </tr>
  62 | <tr align="middle"> </tr>
  63 | <tr align="middle">
  64 |        <td>Perception / Online HD Map Construction</td>
  65 | </tr>
  66 | <tr align="middle"> </tr>
  67 | <tr align="middle">
  68 |        <td>Perception / 3D Occupancy Prediction</td>
  69 | </tr>
  70 | <tr align="middle"> </tr>
  71 | <tr align="middle">
  72 |         <td>Prediction & Planning / nuPlan Planning</td>
  73 | </tr>
  74 | 
  75 | <tr align="middle">
  76 |       <td rowspan=23 ><a href="https://waymo.com/open/challenges/" target="_blank" title="Waymo Open Dataset
  77 | Challenges">Waymo Open Dataset Challenges</a></td>
  78 |   	  <td rowspan=23 > Waymo</td>
  79 |       <td rowspan=8>CVPR2023</td>
  80 |        <td>Perception / 2D Video Panoptic Segmentation</td>
  81 |     	<td rowspan=8> 35 </td>
  82 | </tr>
  83 | <tr align="middle"> </tr>
  84 | <tr align="middle">
  85 |        <td>Perception / Pose Estimation</td>
  86 | </tr>
  87 | <tr align="middle"> </tr>
  88 | <tr align="middle">
  89 |        <td>Prediction / Motion Prediction</td>
  90 | </tr>
  91 | <tr align="middle"> </tr>
  92 | <tr align="middle">
  93 |     <td>Prediction / Sim Agents</td>
  94 | </tr>
  95 | <tr align="middle"> </tr>
  96 | <tr align="middle"> 
  97 |       <td rowspan=8>CVPR2022</td>
  98 |        <td>Prediction / Motion Prediction</td>
  99 |     	<td rowspan=8> 128 </td>
 100 | </tr>
 101 | <tr align="middle"> </tr>
 102 | <tr align="middle">
 103 |        <td>Prediction / Occupancy and Flow Prediction</td>
 104 | </tr>
 105 | <tr align="middle"> </tr>
 106 | <tr align="middle">
 107 |        <td>Perception / 3D Semantic Segmentation</td>
 108 | </tr>
 109 | <tr align="middle"> </tr>
 110 | <tr align="middle">
 111 |        <td>Perception / 3D Camera-only Detection</td>
 112 | </tr>
 113 | <tr align="middle"> </tr>
 114 | <tr align="middle"> 
 115 |  <td rowspan=7>CVPR2021</td>
 116 |        <td>Prediction / Motion Prediction</td>
 117 |     	<td rowspan=7> 115 </td>
 118 |   </tr>
 119 | <tr align="middle"> </tr>
 120 | <tr align="middle">
 121 |        <td>Prediction / Interaction Prediction</td>
 122 | </tr>
 123 | <tr align="middle"> </tr>
 124 | <tr align="middle">
 125 |        <td>Perception / Real-time 3D Detection</td>
 126 | </tr>
 127 | <tr align="middle"> </tr>
 128 | <tr align="middle">
 129 |        <td>Perception / Real-time 2D Detection</td>
 130 | </tr>
 131 | <tr align="middle">
 132 |       <td rowspan=19 ><a href="https://www.argoverse.org/tasks.html" target="_blank" title="Argoverse
 133 | Challenges">Argoverse Challenges</a></td>
 134 |   	  <td rowspan=19 > Argoverse</td>
 135 |       <td rowspan=8>CVPR2023</td>
 136 |        <td>Prediction / Multi-agent Forecasting</td>
 137 |     	<td rowspan=8> 81 </td>
 138 | </tr>
 139 | <tr align="middle"> </tr>
 140 | <tr align="middle">
 141 |        <td>Perception & Prediction / Unified Sensorbased Detection, Tracking, and Forecasting</td>
 142 | </tr>
 143 | <tr align="middle"> </tr>
 144 | <tr align="middle">
 145 |        <td>Perception / LiDAR Scene Flow</td>
 146 | </tr>
 147 | <tr align="middle"> </tr>
 148 | <tr align="middle">
 149 |        <td>Prediction / 3D Occupancy Forecasting</td>
 150 | </tr>
 151 | <tr align="middle"> </tr>
 152 | <tr align="middle">
 153 |   <td rowspan=6>CVPR2022</td>
 154 |        <td>Perception / 3D Object Detection</td>
 155 |     	<td rowspan=6> 81 </td> 
 156 | </tr>
 157 | <tr align="middle"> </tr>
 158 | <tr align="middle">
 159 |        <td>Prediction / Motion Forecasting</td>
 160 | </tr>
 161 | <tr align="middle"> </tr>
 162 | <tr align="middle">
 163 |        <td>Perception / Stereo Depth Estimation</td>
 164 | </tr>
 165 | <tr align="middle"> </tr>
 166 | <tr align="middle"> 
 167 |       <td rowspan=5>CVPR2021</td>
 168 |        <td>Perception / Stereo Depth Estimation</td>
 169 |     	<td rowspan=5> 368 </td>
 170 | </tr>
 171 | <tr align="middle"> </tr>
 172 | <tr align="middle">
 173 |        <td>Prediction / Motion Forecasting</td>
 174 | </tr>
 175 | <tr align="middle"> </tr>
 176 | <tr align="middle">
 177 |        <td>Perception / Streaming 2D Detection</td>
 178 | </tr>
 179 | 
 180 | <tr align="middle">
 181 |       <td rowspan=5 ><a href="https://carlachallenge.org/" target="_blank" title="CARLA Autonomous Driving Challenge">CARLA Autonomous Driving Challenge</a></td>
 182 |   	  <td rowspan=5 > CARLA Team, Intel</td>
 183 |       <td rowspan=2 >2023</td>
 184 |       <td>Planning / CARLA AD Challenge 2.0</td>
 185 |     	<td rowspan=2> - </td>
 186 | </tr>
 187 | <tr align="middle"> </tr>
 188 | <tr align="middle">
 189 |        <td rowspan=2 >NeurIPS2022</td>
 190 |        <td>Planning / CARLA AD Challenge 1.0</td>
 191 |        <td rowspan=2> 19 </td>
 192 | </tr>
 193 | <tr align="middle"> </tr>
 194 | <tr align="middle">
 195 |        <td rowspan=1 >NeurIPS2021</td>
 196 |        <td>Planning / CARLA AD Challenge 1.0</td>
 197 |        <td rowspan=1> - </td>
 198 | </tr>
 199 | 
 200 | <tr align="middle">
 201 |       <td rowspan=7 ><a href="https://iacc.pazhoulab-huangpu.com/" target="_blank" title="粤港澳大湾区
 202 |       （黄埔）国际算法算例大赛">粤港澳大湾区
 203 | （黄埔）国际算法算例大赛</a></td>
 204 |   	  <td rowspan=7> 琶洲实验室</td>
 205 |       <td rowspan=4>2023</td>
 206 |        <td>感知 / 跨场景单目深度估计</td>
 207 |     	<td> - </td>
 208 | </tr>
 209 | <tr align="middle"> </tr>
 210 | <tr align="middle">
 211 |        <td>感知 / 路侧毫米波雷达标定和目标跟踪</td>
 212 |        <td> - </td>
 213 | </tr>
 214 | <tr align="middle"> </tr>
 215 | <tr align="middle">
 216 |       <td rowspan=3>2022</td>
 217 |        <td>感知 / 路侧三维感知算法</td>
 218 |        <td> - </td>
 219 | </tr>
 220 | <tr align="middle"> </tr>
 221 | <tr align="middle">
 222 |        <td>感知 / 街景图像店面招牌文字识别</td>
 223 |        <td> - </td>
 224 | </tr>
 225 | 
 226 | <tr align="middle">
 227 |       <td rowspan=9 ><a href="https://driving-olympics.ai/" target="_blank" title="AI Driving Olympics">AI Driving Olympics</a></td>
 228 |   	  <td rowspan=9 > ETH Zurich, University of Montreal,Motional</td>
 229 |       <td> NeurIP2021 </td>
 230 |       <td rowspan=1>Perception / nuScenes Panoptic</td>
 231 |     	<td> 11 </td>
 232 | </tr>
 233 | <tr align="middle"> </tr>
 234 | <tr align="middle">
 235 |       <td rowspan=7>ICRA2021</td>
 236 |        <td>Perception / nuScenes Detection</td>
 237 |        <td rowspan=7> 456 </td>
 238 | </tr>
 239 | <tr align="middle"> </tr>
 240 | <tr align="middle">
 241 |        <td>Perception / nuScenes Tracking</td>
 242 | </tr>
 243 | <tr align="middle"> </tr>
 244 | <tr align="middle">
 245 |        <td>Prediction / nuScenes Prediction</td>
 246 | </tr>
 247 | <tr align="middle"> </tr>
 248 | <tr align="middle">
 249 |        <td>Perception / nuScenes LiDAR Segmentation</td>
 250 | </tr>
 251 | 
 252 | <tr align="middle">
 253 |       <td rowspan=1 ><a href="https://cg.cs.tsinghua.edu.cn/jittor/news/2021-1-22-13-14-comp/" target="_blank" title="计图 (Jittor)人工智能算法挑战赛">计图 (Jittor)人工智能算法挑战赛</a></td>
 254 |   	  <td rowspan=1 > 国家自然科学基金委信息科学部</td>
 255 |       <td> 2021 </td>
 256 |       <td rowspan=1>感知 / 交通标志检测</td>
 257 |     	<td> 37 </td>
 258 | </tr>
 259 | 
 260 | <tr align="middle">
 261 |       <td rowspan=1 ><a href="https://www.cvlibs.net/datasets/kitti/" target="_blank" title="KITTI Vision Benchmark Suite">KITTI Vision Benchmark Suite</a></td>
 262 |   	  <td rowspan=1 > University of Tübingen </td>
 263 |       <td> 2012 </td>
 264 |       <td rowspan=1>Perception / Stereo, Flow, Scene Flow, Depth,
 265 | Odometry, Object, Tracking, Road, Semantics</td>
 266 |     	<td> 5,610 </td>
 267 | </tr>
 268 | 
 269 | </table>
 270 | <p align="right">(<a href="#top">back to top</a>)</p>
 271 | 
 272 | ## Dataset Collection
 273 | 
 274 | 
 275 | 
 276 | ### Perception 
 277 | 
 278 | <!-- 
 279 | <table>
 280 | <capital>感知类数据集：</capital>
 281 | <tr align="middle">
 282 | <th>普通表头</th>
 283 | <th align="right"><i>斜体表头而且居右</th>
 284 | <th colspan=2>表头横向合并单元格</th>
 285 | <td width="80px">限制列宽为80px超出会自动换行</td>
 286 | </tr>
 287 | 
 288 | <tr align="middle">
 289 | <th>左边也可以有表头</th>
 290 | <td bgcolor=#ffffcc>涂个颜色</td>
 291 | <td><mark>高亮文本</mark>但不全高亮</td>
 292 | <td><b>有时候加粗</b><i>有时候斜体</i></td>
 293 | <td width="20px">20px小于80px服从80px列宽命令无效</td>
 294 | </tr>
 295 | 
 296 | <tr align="middle">
 297 | <td>表头不一定是一整行或者一整列的</td>
 298 | <td rowspan=2>纵向合并单元格要注意<br>下一行少一个单元格<br>字太多必要时我会换行</td>
 299 | <td rowspan=2 colspan=2>单元格也可以从两个方向合并</td>
 300 | <td rowspan=2 width="10%">百分比和像素是可以混用的具体服从哪个取决于哪个大</td>
 301 | </tr>
 302 | <td align="left"> 简单做个居左 </td>
 303 | </tr>
 304 | </table> -->
 305 | 
 306 | <table>
 307 | <capital></capital>
 308 | <tr align="middle"> </tr>
 309 | <tr align="middle">
 310 |     <th rowspan=3 colspan=1>Dataset</th>
 311 |     <th rowspan=3 >Year</td>
 312 |     <th  align="middle" colspan=3 >Diversity</th>
 313 |     <th  align="middle" colspan=3 >Sensor</th>
 314 |     <th rowspan=3 colspan=1>Annotation</th>
 315 |     <th rowspan=3 colspan=1>Paper</th>
 316 | </tr> 
 317 | <tr align="middle"> </tr>
 318 | <tr align="middle">
 319 |   	  <th> Scenes</th>
 320 |     	<th> Hours </th>
 321 |     	<th> Region </th>
 322 |   	  <th> Camera</th>
 323 |     	<th> Lidar </th>
 324 |     	<th> Other </th>
 325 | </tr>
 326 | 
 327 | <tr align="middle">
 328 |       <td><a href="https://www.cvlibs.net/datasets/kitti/" target="_blank" title="Homepage">KITTI</a></td>  	  
 329 |       <td> 2012</td>
 330 |     	<td> 50 </td>
 331 |     	<td> 6 </td>
 332 |   	  <td> EU</td>
 333 |     	<td> Font-view </td>
 334 |       <td> ✗</td>
 335 |     	<td> GPS & IMU </td>
 336 |       <td>2D BBox & 3D BBox</td>
 337 |       <td><a href="https://www.cvlibs.net/publications/Geiger2012CVPR.pdf" target="_blank" title="Homepage">Link</a></td>
 338 | </tr>
 339 | 
 340 | <tr align="middle">
 341 |       <td><a href="https://www.cityscapes-dataset.com/" target="_blank" title="Homepage">Cityscapes</a></td>  	  <td> 2016</td>
 342 |     	<td> - </td>
 343 |     	<td> - </td>
 344 |   	  <td> EU</td>
 345 |     	<td> Font-view </td>
 346 |       <td> ✗ </td>
 347 |     	<td> </td>
 348 |       <td>2D Seg</td>
 349 |       <td><a href="https://arxiv.org/abs/1604.01685" target="_blank" title="Homepage">Link</a></td>
 350 | </tr>
 351 | 
 352 | <tr align="middle">
 353 |       <td><a href="http://ww1.6d-vision.com/lostandfounddataset" target="_blank" 
 354 |       title="Homepage">Lost and Found</a></td>  	  <td> 2016</td>
 355 |     	<td> 112 </td>
 356 |     	<td> - </td>
 357 |   	  <td> -</td>
 358 |     	<td> Font-view </td>
 359 |       <td> ✗ </td>
 360 |     	<td> </td>
 361 |       <td>2D Seg</td>
 362 |       <td><a href="https://arxiv.org/abs/1609.04653" target="_blank" title="Homepage">Link</a></td>
 363 | </tr>
 364 | 
 365 | <tr align="middle">
 366 |       <td><a href="https://eval-vistas.mapillary.com/" target="_blank" 
 367 |       title="Homepage">Mapillary</a></td>  	  
 368 |       <td> 2016</td>
 369 |     	<td> - </td>
 370 |     	<td> - </td>
 371 |   	  <td> Global</td>
 372 |     	<td> Street-view </td>
 373 |       <td> ✗ </td>
 374 |     	<td> </td>
 375 |       <td>2D Seg</td>
 376 |       <td><a href="https://openaccess.thecvf.com/content_ICCV_2017/papers/Neuhold_The_Mapillary_Vistas_ICCV_2017_paper.pdf" target="_blank" title="Homepage">Link</a></td>
 377 | </tr>
 378 | 
 379 | <tr align="middle">
 380 |       <td><a href="http://sensors.ini.uzh.ch/news_page/DDD17.html" target="_blank" 
 381 |       title="Homepage">DDD17</a></td>  	  
 382 |       <td> 2017</td>
 383 |     	<td> 36</td>
 384 |     	<td> 12 </td>
 385 |   	  <td> EU</td>
 386 |     	<td> Front-view </td>
 387 |       <td> ✗ </td>
 388 |     	<td> GPS & CAN-bus & Event Camera</td>
 389 |       <td>-</td>
 390 |       <td><a href="https://arxiv.org/pdf/1711.01458.pdf" target="_blank" title="Homepage">Link</a></td>
 391 | </tr>
 392 | 
 393 | <tr align="middle">
 394 |       <td><a href="https://github.com/ApolloScapeAuto/dataset-api" target="_blank" 
 395 |       title="Homepage">Apolloscape</a></td>  	  
 396 |       <td> 2016</td>
 397 |     	<td> 103</td>
 398 |     	<td> 2.5 </td>
 399 |   	  <td> AS</td>
 400 |     	<td> Front-view </td>
 401 |       <td> ✗ </td>
 402 |     	<td> GPS & IMU </td>
 403 |       <td> 3D BBox & 2D Seg</td>
 404 |       <td><a href="https://arxiv.org/pdf/1803.06184.pdf" target="_blank" title="Homepage">Link</a></td>
 405 | </tr>
 406 | 
 407 | <tr align="middle">
 408 |       <td><a href="https://github.com/JinkyuKimUCB/BDD-X-dataset" target="_blank" 
 409 |       title="Homepage">BDD-X</a></td>  	  
 410 |       <td> 2018</td>
 411 |     	<td> 6984</td>
 412 |     	<td> 77 </td>
 413 |   	  <td> NA</td>
 414 |     	<td> Front-view </td>
 415 |       <td> ✗ </td>
 416 |     	<td> </td>
 417 |       <td>Language</td>
 418 |       <td><a href="https://arxiv.org/pdf/1807.11546.pdf" target="_blank" title="Homepage">Link</a></td>
 419 | </tr>
 420 | 
 421 | <tr align="middle">
 422 |       <td><a href="https://usa.honda-ri.com/hdd" target="_blank" 
 423 |       title="Homepage">HDD</a></td>  	  
 424 |       <td> 2018</td>
 425 |     	<td> -</td>
 426 |     	<td> 104 </td>
 427 |   	  <td> NA</td>
 428 |     	<td> Front-view </td>
 429 |       <td> ✓  </td>
 430 |     	<td> GPS & IMU & CAN-bus </td>
 431 |       <td>2D BBox </td>
 432 |       <td><a href="https://arxiv.org/pdf/1811.02307v1.pdf" target="_blank" title="Homepage">Link</a></td>
 433 | </tr>
 434 | 
 435 | <tr align="middle">
 436 |       <td><a href="https://idd.insaan.iiit.ac.in/dataset/details/" target="_blank" 
 437 |       title="Homepage">IDD</a></td>  	  
 438 |       <td> 2018</td>
 439 |     	<td> 182</td>
 440 |     	<td> - </td>
 441 |   	  <td> AS</td>
 442 |     	<td> Front-view </td>
 443 |       <td> ✗  </td>
 444 |     	<td>  </td>
 445 |       <td>2D Seg </td>
 446 |       <td><a href="https://arxiv.org/pdf/1811.10200v1.pdf" target="_blank" title="Homepage">Link</a></td>
 447 | </tr>
 448 | 
 449 | <tr align="middle">
 450 |       <td><a href="http://semantic-kitti.org/" target="_blank" 
 451 |       title="Homepage">SemanticKITTI</a></td>  	  
 452 |       <td> 2019</td>
 453 |     	<td> 50</td>
 454 |     	<td> 6 </td>
 455 |   	  <td> EU </td>
 456 |     	<td> ✗ </td>
 457 |       <td> ✓  </td>
 458 |     	<td>   </td>
 459 |       <td>3D Seg </td>
 460 |       <td><a href="https://arxiv.org/pdf/1904.01416.pdf" target="_blank" title="Homepage">Link</a></td>
 461 | </tr>
 462 | 
 463 | <tr align="middle">
 464 |       <td><a href="https://github.com/valeoai/WoodScape" target="_blank" 
 465 |       title="Homepage">Woodscape</a></td>  	  
 466 |       <td> 2019 </td>
 467 |     	<td> -</td>
 468 |     	<td> - </td>
 469 |   	  <td> Global</td>
 470 |     	<td> 360° </td>
 471 |       <td> ✓  </td>
 472 |     	<td> GPS & IMU & CAN-bus </td>
 473 |       <td>3D BBox & 2D Seg </td>
 474 |       <td><a href="https://arxiv.org/pdf/1905.01489.pdf" target="_blank" title="Homepage">Link</a></td>
 475 | </tr>
 476 | 
 477 | <tr align="middle">
 478 |       <td><a href="https://drivingstereo-dataset.github.io/" target="_blank" 
 479 |       title="Homepage">DrivingStereo</a></td>  	  
 480 |       <td> 2019 </td>
 481 |     	<td> 42</td>
 482 |     	<td> - </td>
 483 |   	  <td> AS </td>
 484 |     	<td> Front-view </td>
 485 |       <td> ✓  </td>
 486 |     	<td>   </td>
 487 |       <td>-</td>
 488 |       <td><a href="https://ieeexplore.ieee.org/document/8954165/" target="_blank" title="Homepage">Link</a></td>
 489 | </tr>
 490 | 
 491 | <tr align="middle">
 492 |       <td><a href="https://github.com/Robotics-BUT/Brno-Urban-Dataset" target="_blank" 
 493 |       title="Homepage">Brno-Urban</a></td>  	  
 494 |       <td> 2019 </td>
 495 |     	<td> 67</td>
 496 |     	<td> 10 </td>
 497 |   	  <td> EU</td>
 498 |     	<td> Front-view </td>
 499 |       <td> ✓  </td>
 500 |     	<td> GPS & IMU & Infrared Camera </td>
 501 |       <td> -</td>
 502 |       <td><a href="https://arxiv.org/abs/1909.06897.pdf" target="_blank" title="Homepage">Link</a></td>
 503 | </tr>
 504 | 
 505 | <tr align="middle">
 506 |       <td><a href="https://github.com/I2RDL2/ASTAR-3D" target="_blank" 
 507 |       title="Homepage">A*3D</a></td>  	  
 508 |       <td> 2019 </td>
 509 |     	<td> -</td>
 510 |     	<td> 55 </td>
 511 |   	  <td> AS</td>
 512 |     	<td> Front-view </td>
 513 |       <td> ✓  </td>
 514 |     	<td>   </td>
 515 |       <td> 3D BBox </td>
 516 |       <td><a href="https://arxiv.org/pdf/1909.07541v1.pdf" target="_blank" title="Homepage">Link</a></td>
 517 | </tr>
 518 | 
 519 | <tr align="middle">
 520 |       <td><a href="https://github.com/talk2car/Talk2Car" target="_blank" 
 521 |       title="Homepage">Talk2Car</a></td>  	  
 522 |       <td> 2019 </td>
 523 |     	<td> 850</td>
 524 |     	<td> 283.3 </td>
 525 |   	  <td> NA</td>
 526 |     	<td> Front-view </td>
 527 |       <td> ✓  </td>
 528 |     	<td>  </td>
 529 |       <td>Language & 3D BBox </td>
 530 |       <td><a href="https://arxiv.org/pdf/1909.10838.pdf" target="_blank" title="Homepage">Link</a></td>
 531 | </tr>
 532 | 
 533 | <tr align="middle">
 534 |       <td><a href="https://data.vision.ee.ethz.ch/arunv/personal/talk2nav.html" target="_blank" 
 535 |       title="Homepage">Talk2Nav</a></td>  	  
 536 |       <td> 2019 </td>
 537 |     	<td> 10714</td>
 538 |     	<td> - </td>
 539 |   	  <td> Sim</td>
 540 |     	<td> 360° </td>
 541 |       <td> ✗  </td>
 542 |     	<td>  </td>
 543 |       <td>Language </td>
 544 |       <td><a href="https://arxiv.org/abs/1910.02029.pdf" target="_blank" title="Homepage">Link</a></td>
 545 | </tr>
 546 | 
 547 | <tr align="middle">
 548 |       <td><a href="https://github.com/aras62/PIEPredict" target="_blank" 
 549 |       title="Homepage">PIE</a></td>  	  
 550 |       <td> 2019 </td>
 551 |     	<td> -</td>
 552 |     	<td> 6 </td>
 553 |   	  <td> NA</td>
 554 |     	<td> Front-view </td>
 555 |       <td> ✗  </td>
 556 |     	<td>  </td>
 557 |       <td>2D BBox </td>
 558 |       <td><a href="https://openaccess.thecvf.com/content_ICCV_2019/papers/Rasouli_PIE_A_Large-Scale_Dataset_and_Models_for_Pedestrian_Intention_Estimation_ICCV_2019_paper.pdf" target="_blank" title="Homepage">Link</a></td>
 559 | </tr>
 560 | 
 561 | <tr align="middle">
 562 |       <td><a href="https://github.com/weisongwen/UrbanLoco" target="_blank" 
 563 |       title="Homepage">UrbanLoco</a></td>  	  
 564 |       <td> 2019 </td>
 565 |     	<td> 13</td>
 566 |     	<td> -</td>
 567 |   	  <td>AS & NA</td>
 568 |     	<td> 360° </td>
 569 |       <td> ✓  </td>
 570 |     	<td> IMU </td>
 571 |       <td>- </td>
 572 |       <td><a href="https://arxiv.org/abs/1912.09513.pdf" target="_blank" title="Homepage">Link</a></td>
 573 | </tr>
 574 | 
 575 | <tr align="middle">
 576 |       <td><a href="https://usa.honda-ri.com/titan" target="_blank" 
 577 |       title="Homepage">TITAN</a></td>  	  
 578 |       <td> 2019 </td>
 579 |     	<td> 700</td>
 580 |     	<td> - </td>
 581 |   	  <td> AS</td>
 582 |     	<td> Front-view </td>
 583 |       <td> ✗   </td>
 584 |     	<td>  </td>
 585 |       <td>2D BBox </td>
 586 |       <td><a href="https://arxiv.org/pdf/2003.13886.pdf" target="_blank" title="Homepage">Link</a></td>
 587 | </tr>
 588 | 
 589 | <tr align="middle">
 590 |       <td><a href="https://usa.honda-ri.com/H3D" target="_blank" 
 591 |       title="Homepage">H3D </a></td>  	  
 592 |       <td> 2019 </td>
 593 |     	<td> 160 </td>
 594 |     	<td> 0.77 </td>
 595 |   	  <td> NA</td>
 596 |     	<td> Front-view </td>
 597 |       <td> ✓  </td>
 598 |     	<td> GPS & IMU </td>
 599 |       <td>- </td>
 600 |       <td><a href="https://arxiv.org/abs/1903.01568.pdf" target="_blank" title="Homepage">Link</a></td>
 601 | </tr>
 602 | 
 603 | <tr align="middle">
 604 |       <td><a href="https://www.a2d2.audi/a2d2/en/download.html" target="_blank" 
 605 |       title="Homepage">A2D2</a></td>  	  
 606 |       <td> 2020 </td>
 607 |     	<td> - </td>
 608 |     	<td> 5.6  </td>
 609 |   	  <td> EU</td>
 610 |     	<td> 360°  </td>
 611 |       <td> ✓  </td>
 612 |     	<td> GPS & IMU & CAN-bus</td>
 613 |       <td>3D BBox & 2D Seg </td>
 614 |       <td><a href="https://arxiv.org/pdf/2004.06320.pdf" target="_blank" title="Homepage">Link</a></td>
 615 | </tr>
 616 | 
 617 | <tr align="middle">
 618 |       <td><a href="https://github.com/valeoai/carrada_dataset" target="_blank" 
 619 |       title="Homepage">CARRADA</a></td>  	  
 620 |       <td> 2020 </td>
 621 |     	<td> 30  </td>
 622 |     	<td> 0.3 </td>
 623 |   	  <td> NA</td>
 624 |     	<td> Front-view  </td>
 625 |       <td> ✗  </td>
 626 |     	<td> Radar</td>
 627 |       <td>3D BBox </td>
 628 |       <td><a href="https://arxiv.org/abs/2005.01456.pdf" target="_blank" title="Homepage">Link</a></td>
 629 | </tr>
 630 | 
 631 | <tr align="middle">
 632 |       <td><a href="https://data.mendeley.com/datasets/766ygrbt8y/3" target="_blank" 
 633 |       title="Homepage">DAWN</a></td>  	  
 634 |       <td> 2019  </td>
 635 |     	<td> - </td>
 636 |     	<td> -  </td>
 637 |   	  <td> Global</td>
 638 |     	<td> Front-view  </td>
 639 |       <td> ✗  </td>
 640 |     	<td>  </td>
 641 |       <td>2D BBox </td>
 642 |       <td><a href="https://arxiv.org/abs/2008.05402.pdf" target="_blank" title="Homepage">Link</a></td>
 643 | </tr>
 644 | 
 645 | <tr align="middle">
 646 |       <td><a href="https://github.com/pmwenzel/4seasons-dataset" target="_blank" 
 647 |       title="Homepage">4Seasons</a></td>  	  
 648 |       <td> 2019</td>
 649 |     	<td> - </td>
 650 |     	<td> -  </td>
 651 |   	  <td> -</td>
 652 |     	<td> Front-view  </td>
 653 |       <td> ✗  </td>
 654 |     	<td> GPS & IMU</td>
 655 |       <td>- </td>
 656 |       <td><a href="https://arxiv.org/abs/2009.06364.pdf" target="_blank" title="Homepage">Link</a></td>
 657 | </tr>
 658 | 
 659 | <tr align="middle">
 660 |       <td><a href="https://github.com/sauradip/night_image_semantic_segmentation#Urban%20Night%20Driving%20Dataset" target="_blank" 
 661 |       title="Homepage">UNDD</a></td>  	  
 662 |       <td> 2019 </td>
 663 |     	<td> - </td>
 664 |     	<td> -  </td>
 665 |   	  <td> -</td>
 666 |     	<td> Front-view  </td>
 667 |       <td> ✗  </td>
 668 |     	<td>  </td>
 669 |       <td>  2D Seg </td>
 670 |       <td><a href="https://ieeexplore.ieee.org/document/8803299
 671 | " target="_blank" title="Homepage">Link</a></td>
 672 | </tr>
 673 | 
 674 | <tr align="middle">
 675 |       <td><a href="http://www.poss.pku.edu.cn/" target="_blank" 
 676 |       title="Homepage">SemanticPOSS</a></td>  	  
 677 |       <td> 2020 </td>
 678 |     	<td> - </td>
 679 |     	<td> -  </td>
 680 |   	  <td> AS</td>
 681 |     	<td> ✗  </td>
 682 |       <td> ✓  </td>
 683 |     	<td> GPS & IMU </td>
 684 |       <td>3D Seg </td>
 685 |       <td><a href="https://arxiv.org/abs/2002.09147.pdf" target="_blank" title="Homepage">Link</a></td>
 686 | </tr>
 687 | 
 688 | <tr align="middle">
 689 |       <td><a href="https://github.com/WeikaiTan/Toronto-3D" target="_blank" 
 690 |       title="Homepage">Toronto-3D</a></td>  	  
 691 |       <td> 2020 </td>
 692 |     	<td> 4 </td>
 693 |     	<td> -  </td>
 694 |   	  <td> NA</td>
 695 |     	<td> ✗ </td>
 696 |       <td> ✓  </td>
 697 |     	<td> </td>
 698 |       <td>3D Seg </td>
 699 |       <td><a href="https://openaccess.thecvf.com/content_CVPRW_2020/papers/w11/Tan_Toronto-3D_A_Large-Scale_Mobile_LiDAR_Dataset_for_Semantic_Segmentation_of_CVPRW_2020_paper.pdf" target="_blank" title="Homepage">Link</a></td>
 700 | </tr>
 701 | 
 702 | <tr align="middle">
 703 |       <td><a href="https://github.com/gurkirt/road-dataset" target="_blank" 
 704 |       title="Homepage">ROAD</a></td>  	  
 705 |       <td> 2021 </td>
 706 |     	<td> 22 </td>
 707 |     	<td> -  </td>
 708 |   	  <td> EU</td>
 709 |     	<td>Front-view </td>
 710 |       <td> ✗  </td>
 711 |     	<td> </td>
 712 |       <td>2D BBox & Topology </td>
 713 |       <td><a href="https://arxiv.org/abs/2102.11585.pdf" target="_blank" title="Homepage">Link</a></td>
 714 | </tr>
 715 | 
 716 | <tr align="middle">
 717 |       <td><a href="https://github.com/bassam-motional/Reasonable-Crowd" target="_blank" 
 718 |       title="Homepage">Reasonable Crowd</a></td>  	  
 719 |       <td> 2021 </td>
 720 |     	<td> - </td>
 721 |     	<td> -  </td>
 722 |   	  <td> Sim</td>
 723 |     	<td> Front-view </td>
 724 |       <td> ✗  </td>
 725 |     	<td> </td>
 726 |       <td>Language </td>
 727 |       <td><a href="https://arxiv.org/abs/2107.13507.pdf" target="_blank" title="Homepage">Link</a></td>
 728 | </tr>
 729 | 
 730 | <tr align="middle">
 731 |       <td><a href="https://gamma.umd.edu/researchdirections/autonomousdriving/meteor/" target="_blank" 
 732 |       title="Homepage">METEOR</a></td>  	  
 733 |       <td> 2021 </td>
 734 |     	<td> 1250 </td>
 735 |     	<td> 20.9  </td>
 736 |   	  <td> AS</td>
 737 |     	<td> Front-view </td>
 738 |       <td> ✗  </td>
 739 |     	<td> GPS  </td>
 740 |       <td>Language </td>
 741 |       <td><a href="https://arxiv.org/abs/2109.07648.pdf" target="_blank" title="Homepage">Link</a></td>
 742 | </tr>
 743 | 
 744 | <tr align="middle">
 745 |       <td><a href="https://github.com/scaleapi/pandaset-devkit" target="_blank" 
 746 |       title="Homepage">PandaSet</a></td>  	  
 747 |       <td> 2021 </td>
 748 |     	<td> 179 </td>
 749 |     	<td> -  </td>
 750 |   	  <td> NA</td>
 751 |     	<td> 360° </td>
 752 |       <td> ✓  </td>
 753 |     	<td> GPS & IMU </td>
 754 |       <td>3D BBox </td>
 755 |       <td><a href="https://arxiv.org/abs/2112.12610.pdf" target="_blank" title="Homepage">Link</a></td>
 756 | </tr>
 757 | 
 758 | <tr align="middle">
 759 |       <td><a href="https://github.com/ENSTA-U2IS/MUAD-Dataset" target="_blank" 
 760 |       title="Homepage">MUAD</a></td>  	  
 761 |       <td> 2022 </td>
 762 |     	<td> - </td>
 763 |     	<td> -  </td>
 764 |   	  <td> Sim </td>
 765 |     	<td> 360° </td>
 766 |       <td> ✓  </td>
 767 |     	<td> </td>
 768 |       <td>2D Seg& 2D BBox </td>
 769 |       <td><a href="https://arxiv.org/abs/2203.01437.pdf" target="_blank" title="Homepage">Link</a></td>
 770 | </tr>
 771 | 
 772 | <tr align="middle">
 773 |       <td><a href="https://mucar3.de/iros2022-ppniv-tas-nir/" target="_blank" 
 774 |       title="Homepage">TAS-NIR</a></td>  	  
 775 |       <td> 2022 </td>
 776 |     	<td> - </td>
 777 |     	<td> -  </td>
 778 |   	  <td> - </td>
 779 |     	<td> Front-view </td>
 780 |       <td> ✗   </td>
 781 |     	<td>Infrared Camera </td>
 782 |       <td>2D Seg</td>
 783 |       <td><a href="https://arxiv.org/abs/2212.09368.pdf" target="_blank" title="Homepage">Link</a></td>
 784 | </tr>
 785 | 
 786 | <tr align="middle">
 787 |       <td><a href="https://github.com/LiDAR-Perception/LiDAR-CS" target="_blank" 
 788 |       title="Homepage">LiDAR-CS</a></td>  	  
 789 |       <td> 2022 </td>
 790 |     	<td> 6 </td>
 791 |     	<td> -  </td>
 792 |   	  <td> Sim </td>
 793 |     	<td> ✗  </td>
 794 |       <td> ✓  </td>
 795 |     	<td> </td>
 796 |       <td>3D BBox </td>
 797 |       <td><a href="https://arxiv.org/abs/2301.12515.pdf" target="_blank" title="Homepage">Link</a></td>
 798 | </tr>
 799 | 
 800 | <tr align="middle">
 801 |       <td><a href="https://wilddash.cc/" target="_blank" 
 802 |       title="Homepage">WildDash </a></td>  	  
 803 |       <td> 2022 </td>
 804 |     	<td> - </td>
 805 |     	<td> -  </td>
 806 |   	  <td> - </td>
 807 |     	<td> Front-view </td>
 808 |       <td> ✗   </td>
 809 |     	<td> </td>
 810 |       <td>2D Seg </td>
 811 |       <td><a href="https://openaccess.thecvf.com/content_ECCV_2018/papers/Oliver_Zendel_WildDash_-_Creating_ECCV_2018_paper.pdf" target="_blank" title="Homepage">Link</a></td>
 812 | </tr>
 813 | 
 814 | <tr align="middle">
 815 |       <td><a href="https://github.com/OpenDriveLab/OpenScene" target="_blank" 
 816 |       title="Homepage">OpenScene</a></td>  	  
 817 |       <td> 2023 </td>
 818 |     	<td> 1000 </td>
 819 |     	<td> 5.5  </td>
 820 |   	  <td> AS & NA</td>
 821 |     	<td> 360° </td>
 822 |       <td> ✗   </td>
 823 |     	<td> </td>
 824 |       <td>3D Occ </td>
 825 |       <td><a href="https://arxiv.org/abs/2211.15654.pdf" target="_blank" title="Homepage">Link</a></td>
 826 | </tr>
 827 | 
 828 | <tr align="middle">
 829 |       <td><a href="https://zod.zenseact.com/" target="_blank" 
 830 |       title="Homepage">ZOD</a></td>  	  
 831 |       <td> 2023 </td>
 832 |     	<td> 1473 </td>
 833 |     	<td> 8.2  </td>
 834 |   	  <td> EU   </td>
 835 |     	<td> 360° </td>
 836 |       <td> ✓   </td>
 837 |     	<td> GPS & IMU & CAN-bus </td>
 838 |       <td>3D BBox & 2D Seg </td>
 839 |       <td><a href="https://arxiv.org/abs/2305.02008" target="_blank" title="Homepage">Link</a></td>
 840 | </tr>
 841 | 
 842 | <tr align="middle">
 843 |       <td><a href="https://www.nuscenes.org/" target="_blank" 
 844 |       title="Homepage">nuScenes</a></td>  	  
 845 |       <td> 2019 </td>
 846 |     	<td> 1000 </td>
 847 |     	<td> 5.5  </td>
 848 |   	  <td> AS & NA </td>
 849 |     	<td> 360° </td>
 850 |       <td> ✓  </td>
 851 |     	<td> GPS & CAN-bus & Radar & HDMap</td>
 852 |       <td>3D BBox & 3D Seg </td>
 853 |       <td><a href="https://arxiv.org/pdf/1903.11027.pdf" target="_blank" title="Homepage">Link</a></td>
 854 | </tr>
 855 | 
 856 | <tr align="middle">
 857 |       <td><a href="https://www.argoverse.org/av1.html" target="_blank" 
 858 |       title="Homepage">Argoverse V1</a></td>  	  
 859 |       <td> 2019 </td>
 860 |     	<td> 324k  </td>
 861 |     	<td>320   </td>
 862 |   	  <td>   NA </td>
 863 |     	<td> 360° </td>
 864 |       <td> ✓  </td>
 865 |     	<td> HDMap</td>
 866 |       <td>3D BBox & 3D Seg </td>
 867 |       <td><a href="https://arxiv.org/pdf/1911.02620.pdf" target="_blank" title="Homepage">Link</a></td>
 868 | </tr>
 869 | 
 870 | <tr align="middle">
 871 |       <td><a href="https://github.com/waymo-research/waymo-open-dataset" target="_blank" 
 872 |       title="Homepage">Waymo</a></td>  	  
 873 |       <td> 2019 </td>
 874 |     	<td> 1000 </td>
 875 |     	<td>6.4  </td>
 876 |   	  <td> NA </td>
 877 |     	<td> 360° </td>
 878 |       <td> ✓  </td>
 879 |     	<td>  </td>
 880 |       <td>2D BBox & 3D BBox </td>
 881 |       <td><a href="https://arxiv.org/abs/1912.04838.pdf" target="_blank" title="Homepage">Link</a></td>
 882 | </tr>
 883 | 
 884 | <tr align="middle">
 885 |       <td><a href="https://github.com/autonomousvision/kitti360Scripts" target="_blank" 
 886 |       title="Homepage">KITTI-360</a></td>  	  
 887 |       <td> 2020 </td>
 888 |     	<td> 366  </td>
 889 |     	<td> 2.5  </td>
 890 |   	  <td> EU </td>
 891 |     	<td> 360° </td>
 892 |       <td> ✓  </td>
 893 |     	<td>  </td>
 894 |       <td>3D BBox & 3D Seg </td>
 895 |       <td><a href="https://arxiv.org/abs/2109.13410.pdf" target="_blank" title="Homepage">Link</a></td>
 896 | </tr>
 897 | 
 898 | <tr align="middle">
 899 |       <td><a href="https://once-for-auto-driving.github.io/index.html" target="_blank" 
 900 |       title="Homepage">ONCE</a></td>  	  
 901 |       <td> 2021  </td>
 902 |     	<td> - </td>
 903 |     	<td> 144  </td>
 904 |   	  <td> AS  </td>
 905 |     	<td> 360° </td>
 906 |       <td> ✓  </td>
 907 |     	<td>  </td>
 908 |       <td>3D BBox  </td>
 909 |       <td><a href="https://arxiv.org/pdf/2106.11037.pdf" target="_blank" title="Homepage">Link</a></td>
 910 | </tr>
 911 | 
 912 | <tr align="middle">
 913 |       <td><a href="https://www.nuscenes.org/nuplan" target="_blank" 
 914 |       title="Homepage">nuPlan </a></td>  	  
 915 |       <td> 2021 </td>
 916 |     	<td> - </td>
 917 |     	<td> 120  </td>
 918 |   	  <td> AS & NA </td>
 919 |     	<td> 360° </td>
 920 |       <td> ✓  </td>
 921 |     	<td>  </td>
 922 |       <td>3D BBox    </td>
 923 |       <td><a href="https://arxiv.org/abs/2106.11810.pdf" target="_blank" title="Homepage">Link</a></td>
 924 | </tr>
 925 | 
 926 | <tr align="middle">
 927 |       <td><a href="https://www.argoverse.org/av2.html" target="_blank" 
 928 |       title="Homepage">Argoverse V2</a></td>  	  
 929 |       <td> 2022 </td>
 930 |     	<td> 1000 </td>
 931 |     	<td> 4  </td>
 932 |   	  <td>   NA </td>
 933 |     	<td> 360° </td>
 934 |       <td> ✓  </td>
 935 |     	<td>  HDMap</td>
 936 |       <td>3D BBox  </td>
 937 |       <td><a href="https://arxiv.org/pdf/2301.00493.pdf" target="_blank" title="Homepage">Link</a></td>
 938 | </tr>
 939 | 
 940 | <tr align="middle">
 941 |       <td><a href="https://github.com/OpenDriveLab/DriveLM" target="_blank" 
 942 |       title="Homepage">DriveLM </a></td>  	  
 943 |       <td> 2023 </td>
 944 |     	<td> 1000 </td>
 945 |     	<td> 5.5  </td>
 946 |   	  <td> AS & NA </td>
 947 |     	<td> 360° </td>
 948 |       <td> ✗  </td>
 949 |     	<td>  </td>
 950 |       <td>Language </td>
 951 |       <td><a href="https://github.com/OpenDriveLab/DriveLM" target="_blank" title="Homepage">Link</a></td>
 952 | </tr>
 953 | <tr align="middle">
 954 | <tr align="middle">
 955 | </table>
 956 | 
 957 | </table>
 958 | <p align="right">(<a href="#top">back to top</a>)</p>
 959 | 
 960 | 
 961 | 
 962 | ### Mapping 
 963 | 
 964 | <table>
 965 | <capital></capital>
 966 | <tr align="middle"> </tr>
 967 | <tr align="middle">
 968 |     <th rowspan=3 colspan=1>Dataset</td>
 969 |     <th rowspan=3 >Year</td>
 970 |     <th  align="middle" colspan=2 >Diversity</th>
 971 |     <th  align="middle" colspan=2 >Sensor</th>
 972 |     <th  align="middle" colspan=4 >Annotation</th>
 973 |     <th rowspan=3 colspan=1>Paper</th>
 974 | </tr>
 975 | <tr align="middle"> </tr>
 976 | <tr align="middle">
 977 |   	  <th> Scenes</th>
 978 |     	<th> Frames </th>
 979 |   	  <th> Camera</th>
 980 |     	<th> Lidar </th>
 981 |     	<th> Type </th>
 982 |     	<th> Space </th>
 983 |     	<th> Inst. </th>
 984 |     	<th> Track </th>
 985 | </tr>
 986 | 
 987 | <tr align="middle">
 988 |       <td><a href="https://www.cvlibs.net/datasets/kitti/" target="_blank" title="Homepage">Caltech Lanes</a></td>
 989 |   	  <td> 2008</td>
 990 |       <td>4</td>
 991 |     	<td> 1224/1224 </td>
 992 |     	<td>  </td>
 993 |   	  <td> ✗</td>
 994 |     	<td>  </td>
 995 |       <td>  PV  </td>
 996 |     	<td>✓</td>
 997 |       <td>✗</td>
 998 |       <td><a href="https://www.cvlibs.net/datasets/kitti/" target="_blank" title="Homepage">Link</a></td>
 999 | </tr>
1000 | 
1001 | <tr align="middle">
1002 |       <td><a href="https://github.com/SeokjuLee/VPGNet" target="_blank" title="Homepage">VPG</a></td>
1003 |   	  <td> 2017</td>
1004 |       <td>-</td>
1005 |     	<td> 20K/20K </td>
1006 |     	<td>  </td>
1007 |   	  <td> ✗</td>
1008 |     	<td>  </td>
1009 |       <td>  PV  </td>
1010 |     	<td>✗</td>
1011 |       <td>-</td>
1012 |       <td><a href="https://openaccess.thecvf.com/content_iccv_2017/html/Lee_VPGNet_Vanishing_Point_ICCV_2017_paper.html" target="_blank" title="Homepage">Link</a></td>
1013 | </tr>
1014 | 
1015 | <tr align="middle">
1016 |       <td><a href="https://github.com/TuSimple/tusimple-benchmark" target="_blank" title="Homepage">TUsimple</a></td>
1017 |   	  <td> 2017</td>
1018 |       <td>6.4K</td>
1019 |     	<td> 6.4K/128K </td>
1020 |     	<td>  </td>
1021 |   	  <td> ✗</td>
1022 |     	<td>  </td>
1023 |       <td>  PV  </td>
1024 |     	<td>✓</td>
1025 |       <td>✗</td>
1026 |       <td><a href="https://github.com/TuSimple/tusimple-benchmark" target="_blank" title="Homepage">Link</a></td>
1027 | </tr>
1028 | 
1029 | <tr align="middle">
1030 |       <td><a href="https://xingangpan.github.io/projects/CULane.html" target="_blank" title="Homepage">CULane</a></td>
1031 |   	  <td> 2018</td>
1032 |       <td>-</td>
1033 |     	<td> 133K/133K </td>
1034 |     	<td>  </td>
1035 |   	  <td> ✗</td>
1036 |     	<td>  </td>
1037 |       <td>  PV  </td>
1038 |     	<td>✓</td>
1039 |       <td>-</td>
1040 |       <td><a href="https://arxiv.org/abs/1712.06080.pdf" target="_blank" title="Homepage">Link</a></td>
1041 | </tr>
1042 | 
1043 | <tr align="middle">
1044 |       <td><a href="https://github.com/ApolloScapeAuto/dataset-api" target="_blank" title="Homepage">ApolloScape</a></td>
1045 |   	  <td> 2018 </td>
1046 |       <td>235</td>
1047 |     	<td>115K/115K</td>
1048 |     	<td>  </td>
1049 |   	  <td> ✓</td>
1050 |     	<td>  </td>
1051 |       <td>  PV  </td>
1052 |     	<td>✗</td>
1053 |       <td>✗</td>
1054 |       <td><a href="https://arxiv.org/abs/1803.06184.pdf" target="_blank" title="Homepage">Link</a></td>
1055 | </tr>
1056 | 
1057 | <tr align="middle">
1058 |       <td><a href="https://unsupervised-llamas.com/llamas/" target="_blank" title="Homepage">LLAMAS</a></td>
1059 |   	  <td> 2019</td>
1060 |       <td>14</td>
1061 |     	<td> 79K/100K  </td>
1062 |     	<td> Front-view Image </td>
1063 |   	  <td> ✗</td>
1064 |     	<td> Laneline </td>
1065 |       <td>  PV  </td>
1066 |     	<td>✓</td>
1067 |       <td>✗</td>
1068 |       <td><a href="https://ieeexplore.ieee.org/document/9022318" target="_blank" title="Homepage">Link</a></td>
1069 | </tr>
1070 | 
1071 | <tr align="middle">
1072 |       <td><a href="https://github.com/yuliangguo/Pytorch_Generalized_3D_Lane_Detection" target="_blank" title="Homepage">3D Synthetic</a></td>
1073 |   	  <td> 2020</td>
1074 |       <td>-</td>
1075 |     	<td> 10K/10K  </td>
1076 |     	<td>   </td>
1077 |   	  <td> ✗</td>
1078 |     	<td>  </td>
1079 |       <td>  PV  </td>
1080 |     	<td>✓</td>
1081 |       <td>-</td>
1082 |       <td><a href="https://arxiv.org/abs/2003.10656.pdf" target="_blank" title="Homepage">Link</a></td>
1083 | </tr>
1084 | 
1085 | <tr align="middle">
1086 |       <td><a href="https://github.com/SoulmateB/CurveLanes" target="_blank" title="Homepage">CurveLanes</a></td>
1087 |   	  <td> 2020</td>
1088 |       <td>-</td>
1089 |     	<td> 150K/150K  </td>
1090 |     	<td>  </td>
1091 |   	  <td> ✗</td>
1092 |     	<td>   </td>
1093 |       <td>  PV  </td>
1094 |     	<td>✓</td>
1095 |       <td>-</td>
1096 |       <td><a href="https://arxiv.org/abs/2007.12147.pdf" target="_blank" title="Homepage">Link</a></td>
1097 | </tr>
1098 | 
1099 | <tr align="middle">
1100 |       <td><a href="https://github.com/yujun0-0/mma-net" target="_blank" title="Homepage">VIL-100</a></td>
1101 |   	  <td> 2021 </td>
1102 |       <td>100 </td>
1103 |     	<td> 10K/10K  </td>
1104 |     	<td>  </td>
1105 |   	  <td> ✗</td>
1106 |     	<td>   </td>
1107 |       <td>  PV  </td>
1108 |     	<td>✓</td>
1109 |       <td>✗</td>
1110 |       <td><a href="https://arxiv.org/abs/2108.08482.pdf" target="_blank" title="Homepage">Link</a></td>
1111 | </tr>
1112 | 
1113 | <tr align="middle">
1114 |       <td><a href="https://github.com/OpenDriveLab/OpenLane" target="_blank" title="Homepage">OpenLane-V1</a></td>
1115 |   	  <td> 2022</td>
1116 |       <td>1K </td>
1117 |     	<td> 200K/200K  </td>
1118 |     	<td>  </td>
1119 |   	  <td> ✗</td>
1120 |     	<td>  </td>
1121 |       <td>  3D  </td>
1122 |     	<td>✓</td>
1123 |       <td>✓</td>
1124 |       <td><a href="https://arxiv.org/abs/2203.11089.pdf" target="_blank" title="Homepage">Link</a></td>
1125 | </tr>
1126 | 
1127 | <tr align="middle">
1128 |       <td><a href="https://once-3dlanes.github.io/" target="_blank" title="Homepage">ONCE-3DLane</a></td>
1129 |   	  <td> 2022 </td>
1130 |       <td>-</td>
1131 |     	<td> 211K/211K  </td>
1132 |     	<td>  </td>
1133 |   	  <td> ✗</td>
1134 |     	<td>   </td>
1135 |       <td>  3D </td>
1136 |     	<td>✓</td>
1137 |       <td>-</td>
1138 |       <td><a href="https://openaccess.thecvf.com/content/CVPR2022/papers/Yan_ONCE-3DLanes_Building_Monocular_3D_Lane_Detection_CVPR_2022_paper.pdf" target="_blank" title="Homepage">Link</a></td>
1139 | </tr>
1140 | 
1141 | <tr align="middle">
1142 |        <td><a href="https://github.com/OpenDriveLab/OpenLane-V2" target="_blank" title="Homepage">OpenLane-V2</a></td>
1143 |   	  <td> 2023 </td>
1144 |       <td>2K </td>
1145 |     	<td>72K/72K </td>
1146 |     	<td> Multi-view Image  </td>
1147 |   	  <td> ✗</td>
1148 |     	<td> Lane Centerline, Lane Segment </td>
1149 |       <td>  3D  </td>
1150 |     	<td>✓</td>
1151 |       <td>✓</td>
1152 |       <td><a href="https://arxiv.org/abs/2304.10440.pdf" target="_blank" title="Homepage">Link</a></td>
1153 | </tr>
1154 | <tr align="middle">
1155 | </tr>
1156 | 
1157 | 
1158 | </table>
1159 | 
1160 | 
1161 | <p align="right">(<a href="#top">back to top</a>)</p>
1162 | 
1163 | ### Prediction and Planning 
1164 | 
1165 | <table>
1166 | <capital></capital>
1167 | <tr align="middle"> </tr>
1168 | <tr align="middle">
1169 |     <th rowspan=1 colspan=1>Subtask</th>
1170 |     <th rowspan=1 > Input</th>
1171 |     <th  colspan=1 >Output</th>
1172 |     <th  colspan=1 >Evaluation</th>
1173 |     <th  colspan=1 >Dataset</th>
1174 | </tr>
1175 | 
1176 | <tr align="middle">
1177 |   	  <td rowspan=9 > Motion Prediction</td>
1178 |     	<td  rowspan=9> Surrounding Traffic States </td>
1179 |   	  <td  rowspan=9 > Spatiotemporal Trajectories of Single/Multiple Vehicle(s) </td>
1180 |     	<td  rowspan=9 > Displacement Error </td>
1181 |       <td><a href="https://www.argoverse.org" target="_blank" 
1182 |       title="Homepage">Argoverse</a></td> 
1183 | </tr>
1184 | <tr align="middle"> </tr>
1185 | <tr align="middle">
1186 |       <td><a href="https://www.nuscenes.org/" target="_blank" 
1187 |       title="Homepage">nuScenes</a></td>  
1188 | </tr>
1189 | <tr align="middle"> </tr>
1190 | <tr align="middle">
1191 |     	<td><a href="https://github.com/waymo-research/waymo-open-dataset" target="_blank" 
1192 |       title="Homepage">Waymo</a></td>   
1193 | </tr>
1194 | <tr align="middle"> </tr>
1195 | <tr align="middle">
1196 |       <td><a href="https://github.com/interaction-dataset/interaction-dataset" target="_blank" 
1197 |       title="Homepage">Interaction</a></td>  
1198 | </tr>
1199 | <tr align="middle"> </tr>
1200 | <tr align="middle">
1201 |       <td><a href="https://tum-cps.pages.gitlab.lrz.de/mona-dataset/" target="_blank" 
1202 |       title="Homepage">MONA</a></td>  
1203 | </tr>
1204 | <tr align="middle">
1205 |   	  <td rowspan=7 > Trajectory Planning</td>
1206 |     	<td  rowspan=7> Motion States for Ego Vehicles, Scenario Cognition and Prediction </td>
1207 |   	  <td  rowspan=7 > Trajectories for Ego Vehicles </td>
1208 |     	<td  rowspan=7 > Displacement Error, Safety, Compliance, Comfort </td>
1209 |       <td><a href="https://www.nuscenes.org/nuplan" target="_blank" 
1210 |       title="Homepage">nuPlan </a></td>  	 
1211 | </tr>
1212 | <tr align="middle"> </tr>
1213 | <tr align="middle">
1214 |       <td><a href="https://carlachallenge.org/" target="_blank" 
1215 |       title="Homepage"> CARLA </a></td> 
1216 | </tr>   
1217 | <tr align="middle"> </tr>
1218 | <tr align="middle">
1219 |       <td><a href="https://github.com/metadriverse/metadrive" target="_blank" title="Homepage">MetaDrive</a></td>
1220 | </tr>
1221 | <tr align="middle"> </tr>
1222 | <tr align="middle">
1223 |       <td><a href="https://github.com/ApolloScapeAuto/dataset-api" target="_blank" title="Homepage">Apollo</a></td>
1224 | </tr>
1225 | 
1226 | <tr align="middle">
1227 |   	  <td rowspan=9 > Path Planning</td>
1228 |     	<td  rowspan=9> Maps for Road Network</td>
1229 |   	  <td  rowspan=9 > Routes Connecting to Nodes and Links </td>
1230 |     	<td  rowspan=9 > Efficiency, Energy Conservation </td>
1231 |       <td><a href="https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4653466" target="_blank" 
1232 |       title="Homepage">OpenStreetMap </a></td> 
1233 | </tr>
1234 | <tr align="middle"> </tr>
1235 | <tr align="middle">
1236 |       <td><a href="https://github.com/bstabler/TransportationNetworks" target="_blank" 
1237 |       title="Homepage">Transportation Networks </a></td>
1238 | </tr>  
1239 | <tr align="middle"> </tr>
1240 | <tr align="middle">
1241 |        <td><a href="https://github.com/asu-trans-ai-lab/DTALite" target="_blank" 
1242 |       title="Homepage"> DTAlite </a></td>
1243 | </tr>
1244 | <tr align="middle"> </tr>
1245 | <tr align="middle">
1246 |        <td><a href="https://dot.ca.gov/programs/traffic-operations/mpr/pems-source" target="_blank" 
1247 |       title="Homepage">PeMS  </a></td>
1248 | </tr>
1249 | <tr align="middle"> </tr>
1250 | <tr align="middle"> 
1251 |       <td><a href="https://github.com/toddwschneider/nyc-taxi-data" target="_blank" 
1252 |       title="Homepage">New York City Taxi Data  </a></td>
1253 | </tr>
1254 | 
1255 | </table>
1256 | 
1257 | <p align="right">(<a href="#top">back to top</a>)</p>
1258 | 
1259 | 
1260 | <!-- ## Data Engine
1261 | ### Commercial Platforms
1262 | 
1263 | <table>
1264 | <capital></capital>
1265 | <tr align="middle"> </tr>
1266 | <tr align="middle">
1267 |     <th rowspan=1 colspan=1>Solution  </td>
1268 |     <th  align="middle" colspan=1 >Data Catelog </th>
1269 |     <th  align="middle" colspan=1 >Data Retrieval </th>
1270 |     <th  align="middle" colspan=1 >Auto-labeling </th>
1271 |     <th  align="middle" colspan=1 >Model Training </th>
1272 |     <th  align="middle" colspan=1 >Simulation </th>
1273 |     <th  align="middle" colspan=1 >Open-Source </th>
1274 | </tr>
1275 | 
1276 | <tr align="middle">
1277 |   	  <td>Full Self Driving (FSD)</td>
1278 |     	<td colspan=2> Misprediction Identification, Label Correction,
1279 | and Selection on Most Valuable Examples</td>
1280 |   	  <td>  Static Scene Annotation via Multi-trip Reconstruction </td>
1281 |     	<td> Dojo Supercomputer  </td>
1282 |     	<td>Scene Generation
1283 | in Minutes
1284 | </td>
1285 |     	<td> ✗
1286 | </td>  
1287 | </tr>
1288 | 
1289 | <tr align="middle">
1290 |   	  <td>MagLev</td>
1291 |     	<td> Generating Dataset
1292 | via Searching, Collection,
1293 | Labeling, and Export </td>
1294 |     	<td> Elastic Search and
1295 | Categorization via
1296 | Active Learning </td>
1297 |   	  <td>  - </td>
1298 |     	<td> Multi-node Training
1299 | and Parallel Evaluation  </td>
1300 |     	<td>DRIVE Sim</td>
1301 |     	<td> ✓</td>   
1302 | </tr>
1303 | 
1304 | <tr align="middle">
1305 |   	  <td>OpenTrek</td>
1306 |     	<td> - </td>
1307 |     	<td> Multi-modal Retrieval
1308 | based on Semantics,
1309 | Images, Labels,
1310 | Similarity, etc.
1311 | </td>
1312 |   	  <td>  - </td>
1313 |     	<td> ✓ </td>
1314 |     	<td>✓</td>
1315 |     	<td> ✗</td>   
1316 | </tr>
1317 | </table>
1318 | 
1319 | 
1320 | <p align="right">(<a href="#top">back to top</a>)</p>
1321 | 
1322 | ### Label Tools
1323 | 
1324 | <table>
1325 | <capital></capital>
1326 | <tr align="middle"> </tr>
1327 | <tr align="middle">
1328 |     <th rowspan=3 colspan=1>Company </th>
1329 |     <th  align="middle" colspan=5 >Point Cloud</th>
1330 |     <th  align="middle" colspan=5 >Image</th>
1331 |   
1332 | </tr>
1333 | <tr align="middle"> </tr>
1334 | <tr align="middle">
1335 |   	  <th>3D Seg</th>
1336 |     	<th> Price </th>
1337 |     	<th> 3D BBox </th>
1338 |   	  <th>  Price </th>
1339 |     	<th> 3D BBox-c </th>
1340 |     	<th>2D Seg</th>
1341 |     	<th> Price </th>
1342 |     	<th> 2D BBox </th>
1343 |   	  <th>  Price </th>
1344 |     	<th> 2D BBox-c </th>
1345 | </tr>
1346 | 
1347 | <tr align="middle">
1348 |   	   <td> Sagemaker Ground Truth </td>
1349 |       <td> MA  </td>
1350 |     	<td> - </td>
1351 |     	<td> SA </td>
1352 |   	  <td> -</td>
1353 |     	<td> SA </td>
1354 |       <td>  SA </td>
1355 |     	<td>5.360</td>
1356 |       <td>SA </td>
1357 |       <td>0.255 </td>
1358 |       <td>SA </td>
1359 | </tr>
1360 | 
1361 | <tr align="middle">
1362 |   	   <td> 智能云 </td>
1363 |       <td> MA  </td>
1364 |     	<td> 0.3  </td>
1365 |     	<td> MA </td>
1366 |   	  <td> 0.2-0.25 </td>
1367 |     	<td> MA </td>
1368 |       <td>  MA </td>
1369 |     	<td>0.2</td>
1370 |       <td>MA </td>
1371 |       <td>0.06-0.1  </td>
1372 |       <td>MA </td>
1373 | </tr>
1374 | 
1375 | <tr align="middle">
1376 |   	   <td> VOTT </td>
1377 |       <td> ✗   </td>
1378 |     	<td> - </td>
1379 |     	<td> ✗  </td>
1380 |   	  <td> -</td>
1381 |     	<td> ✗  </td>
1382 |       <td>  MA </td>
1383 |     	<td>-</td>
1384 |       <td>SA </td>
1385 |       <td>- </td>
1386 |       <td>SA </td>
1387 | </tr>
1388 | 
1389 | <tr align="middle">
1390 |   	  <td> Data Pro  </td>
1391 |       <td> SA  </td>
1392 |     	<td> 0.25 </td>
1393 |     	<td> SA </td>
1394 |   	  <td> 0.3</td>
1395 |     	<td> SA </td>
1396 |       <td>  SA </td>
1397 |     	<td>0.25</td>
1398 |       <td>SA </td>
1399 |       <td>0.07 </td>
1400 |       <td>SA </td>
1401 | </tr>
1402 | 
1403 | 
1404 | <tr align="middle">
1405 |   	  <td> Multi-sensor Labeling Platform   </td>
1406 |       <td> SA  </td>
1407 |     	<td> - </td>
1408 |     	<td> SA </td>
1409 |   	  <td> -</td>
1410 |     	<td> SA </td>
1411 |       <td>  SA </td>
1412 |     	<td>-</td>
1413 |       <td>SA </td>
1414 |       <td>- </td>
1415 |       <td>SA </td>
1416 | </tr>
1417 | 
1418 | <tr align="middle">
1419 |   	  <td> Data Engine  </td>
1420 |       <td> SA  </td>
1421 |     	<td> - </td>
1422 |     	<td> SA </td>
1423 |   	  <td> -</td>
1424 |     	<td> SA </td>
1425 |       <td>  SA </td>
1426 |     	<td>18.2</td>
1427 |       <td>SA </td>
1428 |       <td>5.8</td>
1429 |       <td>SA </td>
1430 | </tr>
1431 | 
1432 | </table> -->
1433 | 
1434 | 
1435 | 
1436 | ## License
1437 | Open-sourced Data Ecosystem in Autonomous Driving is released under the [MIT license](./LICENSE).
1438 | 
1439 | 
1440 | <p align="right">(<a href="#top">back to top</a>)</p>
1441 | 
1442 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
   1 | # DriveAGI
   2 | This is **"The One"** project that [**`OpenDriveLab`**](https://opendrivelab.com/) is committed to contribute to the community, providing some thought and general picture of how to embrace `foundation models` into autonomous driving.
   3 | 
   4 | ## Table of Contents
   5 | - [NEWS](#news)
   6 | - [At A Glance](#at-a-glance)
   7 | - 🚀 [Vista](#vista) (NeurIPS 2024)
   8 | - ⭐ [GenAD: OpenDV Dataset](#opendv) (CVPR 2024 Hightlight)
   9 | - ⭐ [DriveLM](#drivelm) (ECCV 2024 Oral)
  10 | - [DriveData Survey](#drivedata-survey)
  11 |   <!-- - [Abstract](#abstract)
  12 |   - [Related Work Collection](#related-work-collection) -->
  13 | - [OpenScene](#openscene)
  14 | - [OpenLane-V2 Update](#openlane-v2-update)
  15 | 
  16 | 
  17 | 
  18 | ## NEWS
  19 | <details>
  20 | 
  21 | **<font color="red">[ NEW❗️]</font> `2024/09/08`** We released a mini version of `OpenDV-YouTube`, containing **25 hours** of driving videos. Feel free to try the mini subset by following instructions at [OpenDV-mini](https://github.com/OpenDriveLab/DriveAGI/blob/main/opendv/README.md)!
  22 | 
  23 | **`2024/05/28`** We released our latest research, [Vista](#vista), a generalizable driving world model. It's capable of predicting high-fidelity and long-horizon futures, executing multi-modal actions, and serving as a generalizable reward function to assess driving behaviors. 
  24 | 
  25 | 
  26 | **`2024/03/24`** `OpenDV-YouTube Update:` **Full suite of toolkits for OpenDV-YouTube** is now available, including data downloading and processing scripts, as well as language annotations. Please refer to [OpenDV-YouTube](https://github.com/OpenDriveLab/DriveAGI/tree/main/opendv).
  27 | 
  28 | **`2024/03/15`** We released the complete video list of `OpenDV-YouTube`, a large-scale driving video dataset, for [GenAD](https://arxiv.org/abs/2403.09630) project. Data downloading and processing script, as well as language annotations, will be released next week. Stay tuned.
  29 | 
  30 | **`2024/01/24`**
  31 | We are excited to announce some update to [our survey](#drivedata-survey) and would like to thank John Lambert, Klemens Esterle from the public community for their advice to improve the manuscript.
  32 | </details>
  33 | 
  34 | 
  35 | ## At A Glance
  36 | 
  37 | <details>
  38 | Here are some key components to construct a large foundation model curated for an autonomous system.
  39 | 
  40 | ![overview](assets/overview.png "overview")
  41 | 
  42 | 
  43 | Below we would like to share the latest update from our team on the **`DriveData`** side. We will release the detail of the **`DriveEngine`** and the **`DriveAGI`** in the future.
  44 | </details>
  45 | 
  46 | ## Vista
  47 | <div id="top" align="center">
  48 | <p align="center">
  49 | <img src="assets/vista-teaser.gif" width="1000px" >
  50 | </p>
  51 | </div>
  52 | 
  53 | > Simulated futures in a wide range of driving scenarios by [Vista](https://arxiv.org/abs/2405.17398). Best viewed on [demo page](https://vista-demo.github.io/).
  54 | 
  55 | ### [🌏 **A Generalizable Driving World Model with High Fidelity and Versatile Controllability**](https://arxiv.org/abs/2405.17398) (NeurIPS 2024)
  56 | 
  57 | **Quick facts:**
  58 | - Introducing the world's first **generalizable driving world model**.
  59 | - Task: High-fidelity, action-conditioned, and long-horizon future prediction for driving scenes in the wild.
  60 | - Dataset: [`OpenDV-YouTube`](https://github.com/OpenDriveLab/DriveAGI/tree/main/opendv), `nuScenes`
  61 | - Code and model: https://github.com/OpenDriveLab/Vista
  62 | - Video Demo: https://vista-demo.github.io
  63 | - Related work: [Vista](https://arxiv.org/abs/2405.17398), [GenAD](https://arxiv.org/abs/2403.09630)
  64 | 
  65 | ```bibtex
  66 | @inproceedings{gao2024vista,
  67 |  title={Vista: A Generalizable Driving World Model with High Fidelity and Versatile Controllability}, 
  68 |  author={Shenyuan Gao and Jiazhi Yang and Li Chen and Kashyap Chitta and Yihang Qiu and Andreas Geiger and Jun Zhang and Hongyang Li},
  69 |  booktitle={Advances in Neural Information Processing Systems (NeurIPS)},
  70 |  year={2024}
  71 | }
  72 | 
  73 | @inproceedings{yang2024genad,
  74 |   title={{Generalized Predictive Model for Autonomous Driving}},
  75 |   author={Jiazhi Yang and Shenyuan Gao and Yihang Qiu and Li Chen and Tianyu Li and Bo Dai and Kashyap Chitta and Penghao Wu and Jia Zeng and Ping Luo and Jun Zhang and Andreas Geiger and Yu Qiao and Hongyang Li},
  76 |   booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
  77 |   year={2024}
  78 | }
  79 | ```
  80 | 
  81 | ## GenAD: OpenDV Dataset <a name="opendv"></a>
  82 | ![opendv](assets/opendv_examples.png)
  83 | > Examples of **real-world** driving scenarios in the OpenDV dataset, including urban, highway, rural scenes, etc.
  84 | 
  85 | ### [⭐ **Generalized Predictive Model for Autonomous Driving**](https://arxiv.org/abs/2403.09630) (**CVPR 2024, Highlight**)
  86 | 
  87 | ### [Paper](https://arxiv.org/abs/2403.09630) | [Video](https://www.youtube.com/watch?v=a4H6Jj-7IC0) | [Poster](assets/cvpr24_genad_poster.png) | [Slides](https://opendrivelab.github.io/content/GenAD_slides_with_vista.pdf)
  88 | 
  89 | 🎦 The **Largest Driving Video dataset** to date, containing more than **1700 hours** of real-world driving videos and being 300 times larger than the widely used nuScenes dataset.
  90 | 
  91 | 
  92 | - **Complete video list** (under YouTube license): [OpenDV Videos](https://docs.google.com/spreadsheets/d/1bHWWP_VXeEe5UzIG-QgKFBdH7mNlSC4GFSJkEhFnt2I).
  93 |   - The downloaded raw videos (`mostly 1080P`) consume about `3 TB` storage space. However, these hour-long videos cannot be directly applied for model training as they are extremely memory consuming.
  94 |   - Therefore, we preprocess them into conseductive images which are more flexible and efficient to load during training. Processed images consumes about `24 TB` storage space in total.
  95 |   - It's recommended to set up your experiments on a small subset, say **1/20** of the whole dataset. An official mini subset is also provided and you can refer to [**OpenDV-mini**](https://github.com/OpenDriveLab/DriveAGI/tree/main/opendv#about-opendv-youtube-and-opendv-mini) for details. After stablizing the training, you can then apply your method on the whole dataset and hope for the best 🤞.
  96 | - <font color="red">**[ New❗️]**</font> **Mini subset**: [OpenDV-mini](https://github.com/OpenDriveLab/DriveAGI/tree/main/opendv).
  97 |   - A mini version of `OpenDV-YouTube`. The raw videos consume about `44 GB` of storage space and the processed images will consume about `390 GB` of storage space.
  98 | - **Step-by-step instruction for data preparation**: [OpenDV-YouTube](https://github.com/OpenDriveLab/DriveAGI/tree/main/opendv/README.md).
  99 | - **Language annotation for OpenDV-YouTube**: [OpenDV-YouTube-Language](https://huggingface.co/datasets/OpenDriveLab/OpenDV-YouTube-Language).
 100 | 
 101 | 
 102 | **Quick facts:**
 103 | - Task: large-scale video prediction for driving scenes.
 104 | - Data source: `YouTube`, with careful collection and filtering process.
 105 | - Diversity Highlights: 1700 hours of driving videos, covering more than 244 cities in 40 countries.
 106 | - Related work: [GenAD](https://arxiv.org/abs/2403.09630) **`Accepted at CVPR 2024, Highlight`**
 107 | - `Note`: Annotations for other public datasets in OpenDV-2K will not be released since we randomly sampled a subset of them in training, which are incomplete and hard to trace back to their origins (i.e., file name). Nevertheless, it's easy to reproduce the collection and annotation process on your own following [our paper]((https://arxiv.org/abs/2403.09630)).
 108 | 
 109 | ```bibtex
 110 | @inproceedings{yang2024genad,
 111 |   title={Generalized Predictive Model for Autonomous Driving},
 112 |   author={Jiazhi Yang and Shenyuan Gao and Yihang Qiu and Li Chen and Tianyu Li and Bo Dai and Kashyap Chitta and Penghao Wu and Jia Zeng and Ping Luo and Jun Zhang and Andreas Geiger and Yu Qiao and Hongyang Li},
 113 |   booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
 114 |   year={2024}
 115 | }
 116 | ```
 117 | 
 118 | ## DriveLM
 119 | Introducing the First benchmark on **Language Prompt for Driving**.
 120 | 
 121 | **Quick facts:**
 122 | - Task: given the language prompts as input, predict the trajectory in the scene
 123 | - Origin dataset: `nuScenes`, `CARLA (To be released)`
 124 | - Repo: https://github.com/OpenDriveLab/DriveLM, https://github.com/OpenDriveLab/ELM
 125 | - Related work: [DriveLM](https://arxiv.org/abs/2312.14150), [ELM](https://arxiv.org/abs/2403.04593)
 126 | - Related challenge: [Driving with Language AGC Challenge 2024](https://opendrivelab.com/challenge2024/#driving_with_language)
 127 | 
 128 | 
 129 | ## DriveData Survey
 130 | <details>
 131 | 
 132 | ### Abstract
 133 | With the continuous maturation and application of autonomous driving technology, a systematic examination of open-source autonomous driving datasets becomes instrumental in fostering the robust evolution of the industry ecosystem. In this survey, we provide a comprehensive analysis of more than 70 papers on the timeline, impact, challenges, and future trends in autonomous driving dataset.
 134 | 
 135 | > **Open-sourced Data Ecosystem in Autonomous Driving: the Present and Future**
 136 | > - [English Version](https://arxiv.org/abs/2312.03408)
 137 | > - [Chinese Version](https://www.sciengine.com/SSI/doi/10.1360/SSI-2023-0313) **`Accepted at SCIENTIA SINICA Informationis (中文版)`**
 138 | 
 139 |  ```bib
 140 | @article{li2024_driving_dataset_survey,
 141 |   title = {Open-sourced Data Ecosystem in Autonomous Driving: the Present and Future},
 142 |   author = {Hongyang Li and Yang Li and Huijie Wang and Jia Zeng and Huilin Xu and Pinlong Cai and Li Chen and Junchi Yan and Feng Xu and Lu Xiong and Jingdong Wang and Futang Zhu and Chunjing Xu and Tiancai Wang and Fei Xia and Beipeng Mu and Zhihui Peng and Dahua Lin and Yu Qiao},
 143 |   journal = {SCIENTIA SINICA Informationis},
 144 |   year = {2024},
 145 |   doi = {10.1360/SSI-2023-0313}
 146 | }
 147 | ```
 148 | 
 149 | <!-- > [Hongyang Li](https://lihongyang.info/)<sup>1</sup>, Yang Li<sup>1</sup>, [Huijie Wang](https://faikit.github.io/)<sup>1</sup>, [Jia Zeng](https://scholar.google.com/citations?user=kYrUfMoAAAAJ)<sup>1</sup>, Pinlong Cai<sup>1</sup>, Dahua Lin<sup>1</sup>, Junchi Yan<sup>2</sup>, Feng Xu<sup>3</sup>, Lu Xiong<sup>4</sup>, Jingdong Wang<sup>5</sup>, Futang Zhu<sup>6</sup>, Kai Yan<sup>7</sup>, Chunjing Xu<sup>8</sup>, Tiancai Wang<sup>9</sup>, Beipeng Mu<sup>10</sup>, Shaoqing Ren<sup>11</sup>, Zhihui Peng<sup>12</sup>, Yu Qiao<sup>1</sup>
 150 | > 
 151 | > <sup>1</sup> Shanghai AI Lab, <sup>2</sup> Shanghai Jiao Tong University, <sup>3</sup> Fudan University, <sup>4</sup> Tongji University, <sup>5</sup> Baidu, <sup>6</sup> BYD, <sup>7</sup> Changan, <sup>8</sup> Huawei, <sup>9</sup> Megvii Technology, <sup>10</sup> Meituan, <sup>11</sup> Nio Automotive, <sup>12</sup> Agibot
 152 | > -->
 153 | 
 154 | ![overview](assets/Drivedata_overview.jpg "Drivedata_overview")
 155 | >Current autonomous driving datasets can broadly be categorized into two generations since the 2010s. We define the Impact (y-axis) of a dataset based on sensor configuration, input modality, task category, data scale, ecosystem, etc.
 156 | 
 157 | ![overview](assets/Drivedata_timeline.jpg "Drivedata_timeline")
 158 | 
 159 | ### Related Work Collection 
 160 | 
 161 | We present comprehensive paper collections, leaderboards, and challenges.(Click to expand)
 162 | 
 163 | <details>
 164 | <summary>Challenges and Leaderboards</summary>
 165 | 
 166 | <table>
 167 | <capital></capital>
 168 | <tr align="middle"> </tr>
 169 | <tr align="middle">
 170 |     <th >Title</th>
 171 |     <th >Host</th>
 172 |     <th >Year</th>
 173 |     <th >Task</th>
 174 |     <th >Entry</th>
 175 | </tr>
 176 | 
 177 | <tr align="middle">
 178 |       <td rowspan=7 ><a href="https://opendrivelab.com/AD23Challenge.html" target="_blank" title="Autonomous Driving Challenge">Autonomous Driving Challenge</a></td>
 179 |   	  <td rowspan=7 > OpenDriveLab</td>
 180 |       <td rowspan=7 >CVPR2023</td>
 181 |        <td>Perception / OpenLane Topology</td>
 182 |     	<td rowspan=7> 111 </td>
 183 | </tr>
 184 | <tr align="middle"> </tr>
 185 | <tr align="middle">
 186 |        <td>Perception / Online HD Map Construction</td>
 187 | </tr>
 188 | <tr align="middle"> </tr>
 189 | <tr align="middle">
 190 |        <td>Perception / 3D Occupancy Prediction</td>
 191 | </tr>
 192 | <tr align="middle"> </tr>
 193 | <tr align="middle">
 194 |         <td>Prediction & Planning / nuPlan Planning</td>
 195 | </tr>
 196 | 
 197 | <tr align="middle">
 198 |       <td rowspan=23 ><a href="https://waymo.com/open/challenges/" target="_blank" title="Waymo Open Dataset
 199 | Challenges">Waymo Open Dataset Challenges</a></td>
 200 |   	  <td rowspan=23 > Waymo</td>
 201 |       <td rowspan=8>CVPR2023</td>
 202 |        <td>Perception / 2D Video Panoptic Segmentation</td>
 203 |     	<td rowspan=8> 35 </td>
 204 | </tr>
 205 | <tr align="middle"> </tr>
 206 | <tr align="middle">
 207 |        <td>Perception / Pose Estimation</td>
 208 | </tr>
 209 | <tr align="middle"> </tr>
 210 | <tr align="middle">
 211 |        <td>Prediction / Motion Prediction</td>
 212 | </tr>
 213 | <tr align="middle"> </tr>
 214 | <tr align="middle">
 215 |     <td>Prediction / Sim Agents</td>
 216 | </tr>
 217 | <tr align="middle"> </tr>
 218 | <tr align="middle"> 
 219 |       <td rowspan=8>CVPR2022</td>
 220 |        <td>Prediction / Motion Prediction</td>
 221 |     	<td rowspan=8> 128 </td>
 222 | </tr>
 223 | <tr align="middle"> </tr>
 224 | <tr align="middle">
 225 |        <td>Prediction / Occupancy and Flow Prediction</td>
 226 | </tr>
 227 | <tr align="middle"> </tr>
 228 | <tr align="middle">
 229 |        <td>Perception / 3D Semantic Segmentation</td>
 230 | </tr>
 231 | <tr align="middle"> </tr>
 232 | <tr align="middle">
 233 |        <td>Perception / 3D Camera-only Detection</td>
 234 | </tr>
 235 | <tr align="middle"> </tr>
 236 | <tr align="middle"> 
 237 |  <td rowspan=7>CVPR2021</td>
 238 |        <td>Prediction / Motion Prediction</td>
 239 |     	<td rowspan=7> 115 </td>
 240 |   </tr>
 241 | <tr align="middle"> </tr>
 242 | <tr align="middle">
 243 |        <td>Prediction / Interaction Prediction</td>
 244 | </tr>
 245 | <tr align="middle"> </tr>
 246 | <tr align="middle">
 247 |        <td>Perception / Real-time 3D Detection</td>
 248 | </tr>
 249 | <tr align="middle"> </tr>
 250 | <tr align="middle">
 251 |        <td>Perception / Real-time 2D Detection</td>
 252 | </tr>
 253 | <tr align="middle">
 254 |       <td rowspan=19 ><a href="https://www.argoverse.org/tasks.html" target="_blank" title="Argoverse
 255 | Challenges">Argoverse Challenges</a></td>
 256 |   	  <td rowspan=19 > Argoverse</td>
 257 |       <td rowspan=8>CVPR2023</td>
 258 |        <td>Prediction / Multi-agent Forecasting</td>
 259 |     	<td rowspan=8> 81 </td>
 260 | </tr>
 261 | <tr align="middle"> </tr>
 262 | <tr align="middle">
 263 |        <td>Perception & Prediction / Unified Sensorbased Detection, Tracking, and Forecasting</td>
 264 | </tr>
 265 | <tr align="middle"> </tr>
 266 | <tr align="middle">
 267 |        <td>Perception / LiDAR Scene Flow</td>
 268 | </tr>
 269 | <tr align="middle"> </tr>
 270 | <tr align="middle">
 271 |        <td>Prediction / 3D Occupancy Forecasting</td>
 272 | </tr>
 273 | <tr align="middle"> </tr>
 274 | <tr align="middle">
 275 |   <td rowspan=6>CVPR2022</td>
 276 |        <td>Perception / 3D Object Detection</td>
 277 |     	<td rowspan=6> 81 </td> 
 278 | </tr>
 279 | <tr align="middle"> </tr>
 280 | <tr align="middle">
 281 |        <td>Prediction / Motion Forecasting</td>
 282 | </tr>
 283 | <tr align="middle"> </tr>
 284 | <tr align="middle">
 285 |        <td>Perception / Stereo Depth Estimation</td>
 286 | </tr>
 287 | <tr align="middle"> </tr>
 288 | <tr align="middle"> 
 289 |       <td rowspan=5>CVPR2021</td>
 290 |        <td>Perception / Stereo Depth Estimation</td>
 291 |     	<td rowspan=5> 368 </td>
 292 | </tr>
 293 | <tr align="middle"> </tr>
 294 | <tr align="middle">
 295 |        <td>Prediction / Motion Forecasting</td>
 296 | </tr>
 297 | <tr align="middle"> </tr>
 298 | <tr align="middle">
 299 |        <td>Perception / Streaming 2D Detection</td>
 300 | </tr>
 301 | 
 302 | <tr align="middle">
 303 |       <td rowspan=5 ><a href="https://carlachallenge.org/" target="_blank" title="CARLA Autonomous Driving Challenge">CARLA Autonomous Driving Challenge</a></td>
 304 |   	  <td rowspan=5 > CARLA Team, Intel</td>
 305 |       <td rowspan=2 >2023</td>
 306 |       <td>Planning / CARLA AD Challenge 2.0</td>
 307 |     	<td rowspan=2> - </td>
 308 | </tr>
 309 | <tr align="middle"> </tr>
 310 | <tr align="middle">
 311 |        <td rowspan=2 >NeurIPS2022</td>
 312 |        <td>Planning / CARLA AD Challenge 1.0</td>
 313 |        <td rowspan=2> 19 </td>
 314 | </tr>
 315 | <tr align="middle"> </tr>
 316 | <tr align="middle">
 317 |        <td rowspan=1 >NeurIPS2021</td>
 318 |        <td>Planning / CARLA AD Challenge 1.0</td>
 319 |        <td rowspan=1> - </td>
 320 | </tr>
 321 | 
 322 | <tr align="middle">
 323 |       <td rowspan=7 ><a href="https://iacc.pazhoulab-huangpu.com/" target="_blank" title="粤港澳大湾区
 324 |       （黄埔）国际算法算例大赛">粤港澳大湾区
 325 | （黄埔）国际算法算例大赛</a></td>
 326 |   	  <td rowspan=7> 琶洲实验室</td>
 327 |       <td rowspan=4>2023</td>
 328 |        <td>感知 / 跨场景单目深度估计</td>
 329 |     	<td> - </td>
 330 | </tr>
 331 | <tr align="middle"> </tr>
 332 | <tr align="middle">
 333 |        <td>感知 / 路侧毫米波雷达标定和目标跟踪</td>
 334 |        <td> - </td>
 335 | </tr>
 336 | <tr align="middle"> </tr>
 337 | <tr align="middle">
 338 |       <td rowspan=3>2022</td>
 339 |        <td>感知 / 路侧三维感知算法</td>
 340 |        <td> - </td>
 341 | </tr>
 342 | <tr align="middle"> </tr>
 343 | <tr align="middle">
 344 |        <td>感知 / 街景图像店面招牌文字识别</td>
 345 |        <td> - </td>
 346 | </tr>
 347 | 
 348 | <tr align="middle">
 349 |       <td rowspan=9 ><a href="https://driving-olympics.ai/" target="_blank" title="AI Driving Olympics">AI Driving Olympics</a></td>
 350 |   	  <td rowspan=9 > ETH Zurich, University of Montreal,Motional</td>
 351 |       <td> NeurIP2021 </td>
 352 |       <td rowspan=1>Perception / nuScenes Panoptic</td>
 353 |     	<td> 11 </td>
 354 | </tr>
 355 | <tr align="middle"> </tr>
 356 | <tr align="middle">
 357 |       <td rowspan=7>ICRA2021</td>
 358 |        <td>Perception / nuScenes Detection</td>
 359 |        <td rowspan=7> 456 </td>
 360 | </tr>
 361 | <tr align="middle"> </tr>
 362 | <tr align="middle">
 363 |        <td>Perception / nuScenes Tracking</td>
 364 | </tr>
 365 | <tr align="middle"> </tr>
 366 | <tr align="middle">
 367 |        <td>Prediction / nuScenes Prediction</td>
 368 | </tr>
 369 | <tr align="middle"> </tr>
 370 | <tr align="middle">
 371 |        <td>Perception / nuScenes LiDAR Segmentation</td>
 372 | </tr>
 373 | 
 374 | <tr align="middle">
 375 |       <td rowspan=1 ><a href="https://cg.cs.tsinghua.edu.cn/jittor/news/2021-1-22-13-14-comp/" target="_blank" title="计图 (Jittor)人工智能算法挑战赛">计图 (Jittor)人工智能算法挑战赛</a></td>
 376 |   	  <td rowspan=1 > 国家自然科学基金委信息科学部</td>
 377 |       <td> 2021 </td>
 378 |       <td rowspan=1>感知 / 交通标志检测</td>
 379 |     	<td> 37 </td>
 380 | </tr>
 381 | 
 382 | <tr align="middle">
 383 |       <td rowspan=1 ><a href="https://www.cvlibs.net/datasets/kitti/" target="_blank" title="KITTI Vision Benchmark Suite">KITTI Vision Benchmark Suite</a></td>
 384 |   	  <td rowspan=1 > University of Tübingen </td>
 385 |       <td> 2012 </td>
 386 |       <td rowspan=1>Perception / Stereo, Flow, Scene Flow, Depth,
 387 | Odometry, Object, Tracking, Road, Semantics</td>
 388 |     	<td> 5,610 </td>
 389 | </tr>
 390 | 
 391 | </table>
 392 | <p align="right">(<a href="#top">back to top</a>)</p>
 393 | 
 394 | </details>
 395 | 
 396 | <details>
 397 | <summary>Perception Datasets</summary>
 398 | 
 399 | <table>
 400 | <capital></capital>
 401 | <tr align="middle"> </tr>
 402 | <tr align="middle">
 403 |     <th rowspan=3 colspan=1>Dataset</th>
 404 |     <th rowspan=3 >Year</td>
 405 |     <th  align="middle" colspan=3 >Diversity</th>
 406 |     <th  align="middle" colspan=3 >Sensor</th>
 407 |     <th rowspan=3 colspan=1>Annotation</th>
 408 |     <th rowspan=3 colspan=1>Paper</th>
 409 | </tr> 
 410 | <tr align="middle"> </tr>
 411 | <tr align="middle">
 412 |   	  <th> Scenes</th>
 413 |     	<th> Hours </th>
 414 |     	<th> Region </th>
 415 |   	  <th> Camera</th>
 416 |     	<th> Lidar </th>
 417 |     	<th> Other </th>
 418 | </tr>
 419 | 
 420 | <tr align="middle">
 421 |       <td><a href="https://www.cvlibs.net/datasets/kitti/" target="_blank" title="Homepage">KITTI</a></td>  	  
 422 |       <td> 2012</td>
 423 |     	<td> 50 </td>
 424 |     	<td> 6 </td>
 425 |   	  <td> EU</td>
 426 |     	<td> Font-view </td>
 427 |       <td> ✗</td>
 428 |     	<td> GPS & IMU </td>
 429 |       <td>2D BBox & 3D BBox</td>
 430 |       <td><a href="https://www.cvlibs.net/publications/Geiger2012CVPR.pdf" target="_blank" title="Homepage">Link</a></td>
 431 | </tr>
 432 | 
 433 | <tr align="middle">
 434 |       <td><a href="https://www.cityscapes-dataset.com/" target="_blank" title="Homepage">Cityscapes</a></td>  	  <td> 2016</td>
 435 |     	<td> - </td>
 436 |     	<td> - </td>
 437 |   	  <td> EU</td>
 438 |     	<td> Font-view </td>
 439 |       <td> ✗ </td>
 440 |     	<td> </td>
 441 |       <td>2D Seg</td>
 442 |       <td><a href="https://arxiv.org/abs/1604.01685" target="_blank" title="Homepage">Link</a></td>
 443 | </tr>
 444 | 
 445 | <tr align="middle">
 446 |       <td><a href="http://ww1.6d-vision.com/lostandfounddataset" target="_blank" 
 447 |       title="Homepage">Lost and Found</a></td>  	  <td> 2016</td>
 448 |     	<td> 112 </td>
 449 |     	<td> - </td>
 450 |   	  <td> -</td>
 451 |     	<td> Font-view </td>
 452 |       <td> ✗ </td>
 453 |     	<td> </td>
 454 |       <td>2D Seg</td>
 455 |       <td><a href="https://arxiv.org/abs/1609.04653" target="_blank" title="Homepage">Link</a></td>
 456 | </tr>
 457 | 
 458 | <tr align="middle">
 459 |       <td><a href="https://eval-vistas.mapillary.com/" target="_blank" 
 460 |       title="Homepage">Mapillary</a></td>  	  
 461 |       <td> 2016</td>
 462 |     	<td> - </td>
 463 |     	<td> - </td>
 464 |   	  <td> Global</td>
 465 |     	<td> Street-view </td>
 466 |       <td> ✗ </td>
 467 |     	<td> </td>
 468 |       <td>2D Seg</td>
 469 |       <td><a href="https://openaccess.thecvf.com/content_ICCV_2017/papers/Neuhold_The_Mapillary_Vistas_ICCV_2017_paper.pdf" target="_blank" title="Homepage">Link</a></td>
 470 | </tr>
 471 | 
 472 | <tr align="middle">
 473 |       <td><a href="http://sensors.ini.uzh.ch/news_page/DDD17.html" target="_blank" 
 474 |       title="Homepage">DDD17</a></td>  	  
 475 |       <td> 2017</td>
 476 |     	<td> 36</td>
 477 |     	<td> 12 </td>
 478 |   	  <td> EU</td>
 479 |     	<td> Front-view </td>
 480 |       <td> ✗ </td>
 481 |     	<td> GPS & CAN-bus & Event Camera</td>
 482 |       <td>-</td>
 483 |       <td><a href="https://arxiv.org/pdf/1711.01458.pdf" target="_blank" title="Homepage">Link</a></td>
 484 | </tr>
 485 | 
 486 | <tr align="middle">
 487 |       <td><a href="https://github.com/ApolloScapeAuto/dataset-api" target="_blank" 
 488 |       title="Homepage">Apolloscape</a></td>  	  
 489 |       <td> 2016</td>
 490 |     	<td> 103</td>
 491 |     	<td> 2.5 </td>
 492 |   	  <td> AS</td>
 493 |     	<td> Front-view </td>
 494 |       <td> ✗ </td>
 495 |     	<td> GPS & IMU </td>
 496 |       <td> 3D BBox & 2D Seg</td>
 497 |       <td><a href="https://arxiv.org/pdf/1803.06184.pdf" target="_blank" title="Homepage">Link</a></td>
 498 | </tr>
 499 | 
 500 | <tr align="middle">
 501 |       <td><a href="https://github.com/JinkyuKimUCB/BDD-X-dataset" target="_blank" 
 502 |       title="Homepage">BDD-X</a></td>  	  
 503 |       <td> 2018</td>
 504 |     	<td> 6984</td>
 505 |     	<td> 77 </td>
 506 |   	  <td> NA</td>
 507 |     	<td> Front-view </td>
 508 |       <td> ✗ </td>
 509 |     	<td> </td>
 510 |       <td>Language</td>
 511 |       <td><a href="https://arxiv.org/pdf/1807.11546.pdf" target="_blank" title="Homepage">Link</a></td>
 512 | </tr>
 513 | 
 514 | <tr align="middle">
 515 |       <td><a href="https://usa.honda-ri.com/hdd" target="_blank" 
 516 |       title="Homepage">HDD</a></td>  	  
 517 |       <td> 2018</td>
 518 |     	<td> -</td>
 519 |     	<td> 104 </td>
 520 |   	  <td> NA</td>
 521 |     	<td> Front-view </td>
 522 |       <td> ✓  </td>
 523 |     	<td> GPS & IMU & CAN-bus </td>
 524 |       <td>2D BBox </td>
 525 |       <td><a href="https://arxiv.org/pdf/1811.02307v1.pdf" target="_blank" title="Homepage">Link</a></td>
 526 | </tr>
 527 | 
 528 | <tr align="middle">
 529 |       <td><a href="https://idd.insaan.iiit.ac.in/dataset/details/" target="_blank" 
 530 |       title="Homepage">IDD</a></td>  	  
 531 |       <td> 2018</td>
 532 |     	<td> 182</td>
 533 |     	<td> - </td>
 534 |   	  <td> AS</td>
 535 |     	<td> Front-view </td>
 536 |       <td> ✗  </td>
 537 |     	<td>  </td>
 538 |       <td>2D Seg </td>
 539 |       <td><a href="https://arxiv.org/pdf/1811.10200v1.pdf" target="_blank" title="Homepage">Link</a></td>
 540 | </tr>
 541 | 
 542 | <tr align="middle">
 543 |       <td><a href="http://semantic-kitti.org/" target="_blank" 
 544 |       title="Homepage">SemanticKITTI</a></td>  	  
 545 |       <td> 2019</td>
 546 |     	<td> 50</td>
 547 |     	<td> 6 </td>
 548 |   	  <td> EU </td>
 549 |     	<td> ✗ </td>
 550 |       <td> ✓  </td>
 551 |     	<td>   </td>
 552 |       <td>3D Seg </td>
 553 |       <td><a href="https://arxiv.org/pdf/1904.01416.pdf" target="_blank" title="Homepage">Link</a></td>
 554 | </tr>
 555 | 
 556 | <tr align="middle">
 557 |       <td><a href="https://github.com/valeoai/WoodScape" target="_blank" 
 558 |       title="Homepage">Woodscape</a></td>  	  
 559 |       <td> 2019 </td>
 560 |     	<td> -</td>
 561 |     	<td> - </td>
 562 |   	  <td> Global</td>
 563 |     	<td> 360° </td>
 564 |       <td> ✓  </td>
 565 |     	<td> GPS & IMU & CAN-bus </td>
 566 |       <td>3D BBox & 2D Seg </td>
 567 |       <td><a href="https://arxiv.org/pdf/1905.01489.pdf" target="_blank" title="Homepage">Link</a></td>
 568 | </tr>
 569 | 
 570 | <tr align="middle">
 571 |       <td><a href="https://drivingstereo-dataset.github.io/" target="_blank" 
 572 |       title="Homepage">DrivingStereo</a></td>  	  
 573 |       <td> 2019 </td>
 574 |     	<td> 42</td>
 575 |     	<td> - </td>
 576 |   	  <td> AS </td>
 577 |     	<td> Front-view </td>
 578 |       <td> ✓  </td>
 579 |     	<td>   </td>
 580 |       <td>-</td>
 581 |       <td><a href="https://ieeexplore.ieee.org/document/8954165/" target="_blank" title="Homepage">Link</a></td>
 582 | </tr>
 583 | 
 584 | <tr align="middle">
 585 |       <td><a href="https://github.com/Robotics-BUT/Brno-Urban-Dataset" target="_blank" 
 586 |       title="Homepage">Brno-Urban</a></td>  	  
 587 |       <td> 2019 </td>
 588 |     	<td> 67</td>
 589 |     	<td> 10 </td>
 590 |   	  <td> EU</td>
 591 |     	<td> Front-view </td>
 592 |       <td> ✓  </td>
 593 |     	<td> GPS & IMU & Infrared Camera </td>
 594 |       <td> -</td>
 595 |       <td><a href="https://arxiv.org/abs/1909.06897.pdf" target="_blank" title="Homepage">Link</a></td>
 596 | </tr>
 597 | 
 598 | <tr align="middle">
 599 |       <td><a href="https://github.com/I2RDL2/ASTAR-3D" target="_blank" 
 600 |       title="Homepage">A*3D</a></td>  	  
 601 |       <td> 2019 </td>
 602 |     	<td> -</td>
 603 |     	<td> 55 </td>
 604 |   	  <td> AS</td>
 605 |     	<td> Front-view </td>
 606 |       <td> ✓  </td>
 607 |     	<td>   </td>
 608 |       <td> 3D BBox </td>
 609 |       <td><a href="https://arxiv.org/pdf/1909.07541v1.pdf" target="_blank" title="Homepage">Link</a></td>
 610 | </tr>
 611 | 
 612 | <tr align="middle">
 613 |       <td><a href="https://github.com/talk2car/Talk2Car" target="_blank" 
 614 |       title="Homepage">Talk2Car</a></td>  	  
 615 |       <td> 2019 </td>
 616 |     	<td> 850</td>
 617 |     	<td> 283.3 </td>
 618 |   	  <td> NA</td>
 619 |     	<td> Front-view </td>
 620 |       <td> ✓  </td>
 621 |     	<td>  </td>
 622 |       <td>Language & 3D BBox </td>
 623 |       <td><a href="https://arxiv.org/pdf/1909.10838.pdf" target="_blank" title="Homepage">Link</a></td>
 624 | </tr>
 625 | 
 626 | <tr align="middle">
 627 |       <td><a href="https://data.vision.ee.ethz.ch/arunv/personal/talk2nav.html" target="_blank" 
 628 |       title="Homepage">Talk2Nav</a></td>  	  
 629 |       <td> 2019 </td>
 630 |     	<td> 10714</td>
 631 |     	<td> - </td>
 632 |   	  <td> Sim</td>
 633 |     	<td> 360° </td>
 634 |       <td> ✗  </td>
 635 |     	<td>  </td>
 636 |       <td>Language </td>
 637 |       <td><a href="https://arxiv.org/abs/1910.02029.pdf" target="_blank" title="Homepage">Link</a></td>
 638 | </tr>
 639 | 
 640 | <tr align="middle">
 641 |       <td><a href="https://github.com/aras62/PIEPredict" target="_blank" 
 642 |       title="Homepage">PIE</a></td>  	  
 643 |       <td> 2019 </td>
 644 |     	<td> -</td>
 645 |     	<td> 6 </td>
 646 |   	  <td> NA</td>
 647 |     	<td> Front-view </td>
 648 |       <td> ✗  </td>
 649 |     	<td>  </td>
 650 |       <td>2D BBox </td>
 651 |       <td><a href="https://openaccess.thecvf.com/content_ICCV_2019/papers/Rasouli_PIE_A_Large-Scale_Dataset_and_Models_for_Pedestrian_Intention_Estimation_ICCV_2019_paper.pdf" target="_blank" title="Homepage">Link</a></td>
 652 | </tr>
 653 | 
 654 | <tr align="middle">
 655 |       <td><a href="https://github.com/weisongwen/UrbanLoco" target="_blank" 
 656 |       title="Homepage">UrbanLoco</a></td>  	  
 657 |       <td> 2019 </td>
 658 |     	<td> 13</td>
 659 |     	<td> -</td>
 660 |   	  <td>AS & NA</td>
 661 |     	<td> 360° </td>
 662 |       <td> ✓  </td>
 663 |     	<td> IMU </td>
 664 |       <td>- </td>
 665 |       <td><a href="https://arxiv.org/abs/1912.09513.pdf" target="_blank" title="Homepage">Link</a></td>
 666 | </tr>
 667 | 
 668 | <tr align="middle">
 669 |       <td><a href="https://usa.honda-ri.com/titan" target="_blank" 
 670 |       title="Homepage">TITAN</a></td>  	  
 671 |       <td> 2019 </td>
 672 |     	<td> 700</td>
 673 |     	<td> - </td>
 674 |   	  <td> AS</td>
 675 |     	<td> Front-view </td>
 676 |       <td> ✗   </td>
 677 |     	<td>  </td>
 678 |       <td>2D BBox </td>
 679 |       <td><a href="https://arxiv.org/pdf/2003.13886.pdf" target="_blank" title="Homepage">Link</a></td>
 680 | </tr>
 681 | 
 682 | <tr align="middle">
 683 |       <td><a href="https://usa.honda-ri.com/H3D" target="_blank" 
 684 |       title="Homepage">H3D </a></td>  	  
 685 |       <td> 2019 </td>
 686 |     	<td> 160 </td>
 687 |     	<td> 0.77 </td>
 688 |   	  <td> NA</td>
 689 |     	<td> Front-view </td>
 690 |       <td> ✓  </td>
 691 |     	<td> GPS & IMU </td>
 692 |       <td>- </td>
 693 |       <td><a href="https://arxiv.org/abs/1903.01568.pdf" target="_blank" title="Homepage">Link</a></td>
 694 | </tr>
 695 | 
 696 | <tr align="middle">
 697 |       <td><a href="https://www.a2d2.audi/a2d2/en/download.html" target="_blank" 
 698 |       title="Homepage">A2D2</a></td>  	  
 699 |       <td> 2020 </td>
 700 |     	<td> - </td>
 701 |     	<td> 5.6  </td>
 702 |   	  <td> EU</td>
 703 |     	<td> 360°  </td>
 704 |       <td> ✓  </td>
 705 |     	<td> GPS & IMU & CAN-bus</td>
 706 |       <td>3D BBox & 2D Seg </td>
 707 |       <td><a href="https://arxiv.org/pdf/2004.06320.pdf" target="_blank" title="Homepage">Link</a></td>
 708 | </tr>
 709 | 
 710 | <tr align="middle">
 711 |       <td><a href="https://github.com/valeoai/carrada_dataset" target="_blank" 
 712 |       title="Homepage">CARRADA</a></td>  	  
 713 |       <td> 2020 </td>
 714 |     	<td> 30  </td>
 715 |     	<td> 0.3 </td>
 716 |   	  <td> NA</td>
 717 |     	<td> Front-view  </td>
 718 |       <td> ✗  </td>
 719 |     	<td> Radar</td>
 720 |       <td>3D BBox </td>
 721 |       <td><a href="https://arxiv.org/abs/2005.01456.pdf" target="_blank" title="Homepage">Link</a></td>
 722 | </tr>
 723 | 
 724 | <tr align="middle">
 725 |       <td><a href="https://data.mendeley.com/datasets/766ygrbt8y/3" target="_blank" 
 726 |       title="Homepage">DAWN</a></td>  	  
 727 |       <td> 2019  </td>
 728 |     	<td> - </td>
 729 |     	<td> -  </td>
 730 |   	  <td> Global</td>
 731 |     	<td> Front-view  </td>
 732 |       <td> ✗  </td>
 733 |     	<td>  </td>
 734 |       <td>2D BBox </td>
 735 |       <td><a href="https://arxiv.org/abs/2008.05402.pdf" target="_blank" title="Homepage">Link</a></td>
 736 | </tr>
 737 | 
 738 | <tr align="middle">
 739 |       <td><a href="https://github.com/pmwenzel/4seasons-dataset" target="_blank" 
 740 |       title="Homepage">4Seasons</a></td>  	  
 741 |       <td> 2019</td>
 742 |     	<td> - </td>
 743 |     	<td> -  </td>
 744 |   	  <td> -</td>
 745 |     	<td> Front-view  </td>
 746 |       <td> ✗  </td>
 747 |     	<td> GPS & IMU</td>
 748 |       <td>- </td>
 749 |       <td><a href="https://arxiv.org/abs/2009.06364.pdf" target="_blank" title="Homepage">Link</a></td>
 750 | </tr>
 751 | 
 752 | <tr align="middle">
 753 |       <td><a href="https://github.com/sauradip/night_image_semantic_segmentation#Urban%20Night%20Driving%20Dataset" target="_blank" 
 754 |       title="Homepage">UNDD</a></td>  	  
 755 |       <td> 2019 </td>
 756 |     	<td> - </td>
 757 |     	<td> -  </td>
 758 |   	  <td> -</td>
 759 |     	<td> Front-view  </td>
 760 |       <td> ✗  </td>
 761 |     	<td>  </td>
 762 |       <td>  2D Seg </td>
 763 |       <td><a href="https://ieeexplore.ieee.org/document/8803299
 764 | " target="_blank" title="Homepage">Link</a></td>
 765 | </tr>
 766 | 
 767 | <tr align="middle">
 768 |       <td><a href="http://www.poss.pku.edu.cn/" target="_blank" 
 769 |       title="Homepage">SemanticPOSS</a></td>  	  
 770 |       <td> 2020 </td>
 771 |     	<td> - </td>
 772 |     	<td> -  </td>
 773 |   	  <td> AS</td>
 774 |     	<td> ✗  </td>
 775 |       <td> ✓  </td>
 776 |     	<td> GPS & IMU </td>
 777 |       <td>3D Seg </td>
 778 |       <td><a href="https://arxiv.org/abs/2002.09147.pdf" target="_blank" title="Homepage">Link</a></td>
 779 | </tr>
 780 | 
 781 | <tr align="middle">
 782 |       <td><a href="https://github.com/WeikaiTan/Toronto-3D" target="_blank" 
 783 |       title="Homepage">Toronto-3D</a></td>  	  
 784 |       <td> 2020 </td>
 785 |     	<td> 4 </td>
 786 |     	<td> -  </td>
 787 |   	  <td> NA</td>
 788 |     	<td> ✗ </td>
 789 |       <td> ✓  </td>
 790 |     	<td> </td>
 791 |       <td>3D Seg </td>
 792 |       <td><a href="https://openaccess.thecvf.com/content_CVPRW_2020/papers/w11/Tan_Toronto-3D_A_Large-Scale_Mobile_LiDAR_Dataset_for_Semantic_Segmentation_of_CVPRW_2020_paper.pdf" target="_blank" title="Homepage">Link</a></td>
 793 | </tr>
 794 | 
 795 | <tr align="middle">
 796 |       <td><a href="https://github.com/gurkirt/road-dataset" target="_blank" 
 797 |       title="Homepage">ROAD</a></td>  	  
 798 |       <td> 2021 </td>
 799 |     	<td> 22 </td>
 800 |     	<td> -  </td>
 801 |   	  <td> EU</td>
 802 |     	<td>Front-view </td>
 803 |       <td> ✗  </td>
 804 |     	<td> </td>
 805 |       <td>2D BBox & Topology </td>
 806 |       <td><a href="https://arxiv.org/abs/2102.11585.pdf" target="_blank" title="Homepage">Link</a></td>
 807 | </tr>
 808 | 
 809 | <tr align="middle">
 810 |       <td><a href="https://github.com/bassam-motional/Reasonable-Crowd" target="_blank" 
 811 |       title="Homepage">Reasonable Crowd</a></td>  	  
 812 |       <td> 2021 </td>
 813 |     	<td> - </td>
 814 |     	<td> -  </td>
 815 |   	  <td> Sim</td>
 816 |     	<td> Front-view </td>
 817 |       <td> ✗  </td>
 818 |     	<td> </td>
 819 |       <td>Language </td>
 820 |       <td><a href="https://arxiv.org/abs/2107.13507.pdf" target="_blank" title="Homepage">Link</a></td>
 821 | </tr>
 822 | 
 823 | <tr align="middle">
 824 |       <td><a href="https://gamma.umd.edu/researchdirections/autonomousdriving/meteor/" target="_blank" 
 825 |       title="Homepage">METEOR</a></td>  	  
 826 |       <td> 2021 </td>
 827 |     	<td> 1250 </td>
 828 |     	<td> 20.9  </td>
 829 |   	  <td> AS</td>
 830 |     	<td> Front-view </td>
 831 |       <td> ✗  </td>
 832 |     	<td> GPS  </td>
 833 |       <td>Language </td>
 834 |       <td><a href="https://arxiv.org/abs/2109.07648.pdf" target="_blank" title="Homepage">Link</a></td>
 835 | </tr>
 836 | 
 837 | <tr align="middle">
 838 |       <td><a href="https://github.com/scaleapi/pandaset-devkit" target="_blank" 
 839 |       title="Homepage">PandaSet</a></td>  	  
 840 |       <td> 2021 </td>
 841 |     	<td> 179 </td>
 842 |     	<td> -  </td>
 843 |   	  <td> NA</td>
 844 |     	<td> 360° </td>
 845 |       <td> ✓  </td>
 846 |     	<td> GPS & IMU </td>
 847 |       <td>3D BBox </td>
 848 |       <td><a href="https://arxiv.org/abs/2112.12610.pdf" target="_blank" title="Homepage">Link</a></td>
 849 | </tr>
 850 | 
 851 | <tr align="middle">
 852 |       <td><a href="https://github.com/ENSTA-U2IS/MUAD-Dataset" target="_blank" 
 853 |       title="Homepage">MUAD</a></td>  	  
 854 |       <td> 2022 </td>
 855 |     	<td> - </td>
 856 |     	<td> -  </td>
 857 |   	  <td> Sim </td>
 858 |     	<td> 360° </td>
 859 |       <td> ✓  </td>
 860 |     	<td> </td>
 861 |       <td>2D Seg& 2D BBox </td>
 862 |       <td><a href="https://arxiv.org/abs/2203.01437.pdf" target="_blank" title="Homepage">Link</a></td>
 863 | </tr>
 864 | 
 865 | <tr align="middle">
 866 |       <td><a href="https://mucar3.de/iros2022-ppniv-tas-nir/" target="_blank" 
 867 |       title="Homepage">TAS-NIR</a></td>  	  
 868 |       <td> 2022 </td>
 869 |     	<td> - </td>
 870 |     	<td> -  </td>
 871 |   	  <td> - </td>
 872 |     	<td> Front-view </td>
 873 |       <td> ✗   </td>
 874 |     	<td>Infrared Camera </td>
 875 |       <td>2D Seg</td>
 876 |       <td><a href="https://arxiv.org/abs/2212.09368.pdf" target="_blank" title="Homepage">Link</a></td>
 877 | </tr>
 878 | 
 879 | <tr align="middle">
 880 |       <td><a href="https://github.com/LiDAR-Perception/LiDAR-CS" target="_blank" 
 881 |       title="Homepage">LiDAR-CS</a></td>  	  
 882 |       <td> 2022 </td>
 883 |     	<td> 6 </td>
 884 |     	<td> -  </td>
 885 |   	  <td> Sim </td>
 886 |     	<td> ✗  </td>
 887 |       <td> ✓  </td>
 888 |     	<td> </td>
 889 |       <td>3D BBox </td>
 890 |       <td><a href="https://arxiv.org/abs/2301.12515.pdf" target="_blank" title="Homepage">Link</a></td>
 891 | </tr>
 892 | 
 893 | <tr align="middle">
 894 |       <td><a href="https://wilddash.cc/" target="_blank" 
 895 |       title="Homepage">WildDash </a></td>  	  
 896 |       <td> 2022 </td>
 897 |     	<td> - </td>
 898 |     	<td> -  </td>
 899 |   	  <td> - </td>
 900 |     	<td> Front-view </td>
 901 |       <td> ✗   </td>
 902 |     	<td> </td>
 903 |       <td>2D Seg </td>
 904 |       <td><a href="https://openaccess.thecvf.com/content_ECCV_2018/papers/Oliver_Zendel_WildDash_-_Creating_ECCV_2018_paper.pdf" target="_blank" title="Homepage">Link</a></td>
 905 | </tr>
 906 | 
 907 | <tr align="middle">
 908 |       <td><a href="https://github.com/OpenDriveLab/OpenScene" target="_blank" 
 909 |       title="Homepage">OpenScene</a></td>  	  
 910 |       <td> 2023 </td>
 911 |     	<td> 1000 </td>
 912 |     	<td> 5.5  </td>
 913 |   	  <td> AS & NA</td>
 914 |     	<td> 360° </td>
 915 |       <td> ✗   </td>
 916 |     	<td> </td>
 917 |       <td>3D Occ </td>
 918 |       <td><a href="https://arxiv.org/abs/2211.15654.pdf" target="_blank" title="Homepage">Link</a></td>
 919 | </tr>
 920 | 
 921 | <tr align="middle">
 922 |       <td><a href="https://zod.zenseact.com/" target="_blank" 
 923 |       title="Homepage">ZOD</a></td>  	  
 924 |       <td> 2023 </td>
 925 |     	<td> 1473 </td>
 926 |     	<td> 8.2  </td>
 927 |   	  <td> EU   </td>
 928 |     	<td> 360° </td>
 929 |       <td> ✓   </td>
 930 |     	<td> GPS & IMU & CAN-bus </td>
 931 |       <td>3D BBox & 2D Seg </td>
 932 |       <td><a href="https://arxiv.org/abs/2305.02008" target="_blank" title="Homepage">Link</a></td>
 933 | </tr>
 934 | 
 935 | <tr align="middle">
 936 |       <td><a href="https://www.nuscenes.org/" target="_blank" 
 937 |       title="Homepage">nuScenes</a></td>  	  
 938 |       <td> 2019 </td>
 939 |     	<td> 1000 </td>
 940 |     	<td> 5.5  </td>
 941 |   	  <td> AS & NA </td>
 942 |     	<td> 360° </td>
 943 |       <td> ✓  </td>
 944 |     	<td> GPS & CAN-bus & Radar & HDMap</td>
 945 |       <td>3D BBox & 3D Seg </td>
 946 |       <td><a href="https://arxiv.org/pdf/1903.11027.pdf" target="_blank" title="Homepage">Link</a></td>
 947 | </tr>
 948 | 
 949 | <tr align="middle">
 950 |       <td><a href="https://www.argoverse.org/av1.html" target="_blank" 
 951 |       title="Homepage">Argoverse V1</a></td>  	  
 952 |       <td> 2019 </td>
 953 |     	<td> 324k  </td>
 954 |     	<td>320   </td>
 955 |   	  <td>   NA </td>
 956 |     	<td> 360° </td>
 957 |       <td> ✓  </td>
 958 |     	<td> HDMap</td>
 959 |       <td>3D BBox & 3D Seg </td>
 960 |       <td><a href="https://arxiv.org/pdf/1911.02620.pdf" target="_blank" title="Homepage">Link</a></td>
 961 | </tr>
 962 | 
 963 | <tr align="middle">
 964 |       <td><a href="https://github.com/waymo-research/waymo-open-dataset" target="_blank" 
 965 |       title="Homepage">Waymo</a></td>  	  
 966 |       <td> 2019 </td>
 967 |     	<td> 1000 </td>
 968 |     	<td>6.4  </td>
 969 |   	  <td> NA </td>
 970 |     	<td> 360° </td>
 971 |       <td> ✓  </td>
 972 |     	<td>  </td>
 973 |       <td>2D BBox & 3D BBox </td>
 974 |       <td><a href="https://arxiv.org/abs/1912.04838.pdf" target="_blank" title="Homepage">Link</a></td>
 975 | </tr>
 976 | 
 977 | <tr align="middle">
 978 |       <td><a href="https://github.com/autonomousvision/kitti360Scripts" target="_blank" 
 979 |       title="Homepage">KITTI-360</a></td>  	  
 980 |       <td> 2020 </td>
 981 |     	<td> 366  </td>
 982 |     	<td> 2.5  </td>
 983 |   	  <td> EU </td>
 984 |     	<td> 360° </td>
 985 |       <td> ✓  </td>
 986 |     	<td>  </td>
 987 |       <td>3D BBox & 3D Seg </td>
 988 |       <td><a href="https://arxiv.org/abs/2109.13410.pdf" target="_blank" title="Homepage">Link</a></td>
 989 | </tr>
 990 | 
 991 | <tr align="middle">
 992 |       <td><a href="https://once-for-auto-driving.github.io/index.html" target="_blank" 
 993 |       title="Homepage">ONCE</a></td>  	  
 994 |       <td> 2021  </td>
 995 |     	<td> - </td>
 996 |     	<td> 144  </td>
 997 |   	  <td> AS  </td>
 998 |     	<td> 360° </td>
 999 |       <td> ✓  </td>
1000 |     	<td>  </td>
1001 |       <td>3D BBox  </td>
1002 |       <td><a href="https://arxiv.org/pdf/2106.11037.pdf" target="_blank" title="Homepage">Link</a></td>
1003 | </tr>
1004 | 
1005 | <tr align="middle">
1006 |       <td><a href="https://www.nuscenes.org/nuplan" target="_blank" 
1007 |       title="Homepage">nuPlan </a></td>  	  
1008 |       <td> 2021 </td>
1009 |     	<td> - </td>
1010 |     	<td> 120  </td>
1011 |   	  <td> AS & NA </td>
1012 |     	<td> 360° </td>
1013 |       <td> ✓  </td>
1014 |     	<td>  </td>
1015 |       <td>3D BBox    </td>
1016 |       <td><a href="https://arxiv.org/abs/2106.11810.pdf" target="_blank" title="Homepage">Link</a></td>
1017 | </tr>
1018 | 
1019 | <tr align="middle">
1020 |       <td><a href="https://www.argoverse.org/av2.html" target="_blank" 
1021 |       title="Homepage">Argoverse V2</a></td>  	  
1022 |       <td> 2022 </td>
1023 |     	<td> 1000 </td>
1024 |     	<td> 4  </td>
1025 |   	  <td>   NA </td>
1026 |     	<td> 360° </td>
1027 |       <td> ✓  </td>
1028 |     	<td>  HDMap</td>
1029 |       <td>3D BBox  </td>
1030 |       <td><a href="https://arxiv.org/pdf/2301.00493.pdf" target="_blank" title="Homepage">Link</a></td>
1031 | </tr>
1032 | 
1033 | <tr align="middle">
1034 |       <td><a href="https://github.com/OpenDriveLab/DriveLM" target="_blank" 
1035 |       title="Homepage">DriveLM </a></td>  	  
1036 |       <td> 2023 </td>
1037 |     	<td> 1000 </td>
1038 |     	<td> 5.5  </td>
1039 |   	  <td> AS & NA </td>
1040 |     	<td> 360° </td>
1041 |       <td> ✗  </td>
1042 |     	<td>  </td>
1043 |       <td>Language </td>
1044 |       <td><a href="https://github.com/OpenDriveLab/DriveLM" target="_blank" title="Homepage">Link</a></td>
1045 | </tr>
1046 | <tr align="middle">
1047 | <tr align="middle">
1048 | </table>
1049 | 
1050 | </table>
1051 | <p align="right">(<a href="#top">back to top</a>)</p>
1052 | </details>
1053 | 
1054 | <details>
1055 | <summary>Mapping Datasets</summary>
1056 | 
1057 | <table>
1058 | <capital></capital>
1059 | <tr align="middle"> </tr>
1060 | <tr align="middle">
1061 |     <th rowspan=3 colspan=1>Dataset</td>
1062 |     <th rowspan=3 >Year</td>
1063 |     <th  align="middle" colspan=2 >Diversity</th>
1064 |     <th  align="middle" colspan=2 >Sensor</th>
1065 |     <th  align="middle" colspan=4 >Annotation</th>
1066 |     <th rowspan=3 colspan=1>Paper</th>
1067 | </tr>
1068 | <tr align="middle"> </tr>
1069 | <tr align="middle">
1070 |   	  <th> Scenes</th>
1071 |     	<th> Frames </th>
1072 |   	  <th> Camera</th>
1073 |     	<th> Lidar </th>
1074 |     	<th> Type </th>
1075 |     	<th> Space </th>
1076 |     	<th> Inst. </th>
1077 |     	<th> Track </th>
1078 | </tr>
1079 | 
1080 | <tr align="middle">
1081 |       <td><a href="https://www.cvlibs.net/datasets/kitti/" target="_blank" title="Homepage">Caltech Lanes</a></td>
1082 |   	  <td> 2008</td>
1083 |       <td>4</td>
1084 |     	<td> 1224/1224 </td>
1085 |     	<td>  </td>
1086 |   	  <td> ✗</td>
1087 |     	<td>  </td>
1088 |       <td>  PV  </td>
1089 |     	<td>✓</td>
1090 |       <td>✗</td>
1091 |       <td><a href="https://www.cvlibs.net/datasets/kitti/" target="_blank" title="Homepage">Link</a></td>
1092 | </tr>
1093 | 
1094 | <tr align="middle">
1095 |       <td><a href="https://github.com/SeokjuLee/VPGNet" target="_blank" title="Homepage">VPG</a></td>
1096 |   	  <td> 2017</td>
1097 |       <td>-</td>
1098 |     	<td> 20K/20K </td>
1099 |     	<td>  </td>
1100 |   	  <td> ✗</td>
1101 |     	<td>  </td>
1102 |       <td>  PV  </td>
1103 |     	<td>✗</td>
1104 |       <td>-</td>
1105 |       <td><a href="https://openaccess.thecvf.com/content_iccv_2017/html/Lee_VPGNet_Vanishing_Point_ICCV_2017_paper.html" target="_blank" title="Homepage">Link</a></td>
1106 | </tr>
1107 | 
1108 | <tr align="middle">
1109 |       <td><a href="https://github.com/TuSimple/tusimple-benchmark" target="_blank" title="Homepage">TUsimple</a></td>
1110 |   	  <td> 2017</td>
1111 |       <td>6.4K</td>
1112 |     	<td> 6.4K/128K </td>
1113 |     	<td>  </td>
1114 |   	  <td> ✗</td>
1115 |     	<td>  </td>
1116 |       <td>  PV  </td>
1117 |     	<td>✓</td>
1118 |       <td>✗</td>
1119 |       <td><a href="https://github.com/TuSimple/tusimple-benchmark" target="_blank" title="Homepage">Link</a></td>
1120 | </tr>
1121 | 
1122 | <tr align="middle">
1123 |       <td><a href="https://xingangpan.github.io/projects/CULane.html" target="_blank" title="Homepage">CULane</a></td>
1124 |   	  <td> 2018</td>
1125 |       <td>-</td>
1126 |     	<td> 133K/133K </td>
1127 |     	<td>  </td>
1128 |   	  <td> ✗</td>
1129 |     	<td>  </td>
1130 |       <td>  PV  </td>
1131 |     	<td>✓</td>
1132 |       <td>-</td>
1133 |       <td><a href="https://arxiv.org/abs/1712.06080.pdf" target="_blank" title="Homepage">Link</a></td>
1134 | </tr>
1135 | 
1136 | <tr align="middle">
1137 |       <td><a href="https://github.com/ApolloScapeAuto/dataset-api" target="_blank" title="Homepage">ApolloScape</a></td>
1138 |   	  <td> 2018 </td>
1139 |       <td>235</td>
1140 |     	<td>115K/115K</td>
1141 |     	<td>  </td>
1142 |   	  <td> ✓</td>
1143 |     	<td>  </td>
1144 |       <td>  PV  </td>
1145 |     	<td>✗</td>
1146 |       <td>✗</td>
1147 |       <td><a href="https://arxiv.org/abs/1803.06184.pdf" target="_blank" title="Homepage">Link</a></td>
1148 | </tr>
1149 | 
1150 | <tr align="middle">
1151 |       <td><a href="https://unsupervised-llamas.com/llamas/" target="_blank" title="Homepage">LLAMAS</a></td>
1152 |   	  <td> 2019</td>
1153 |       <td>14</td>
1154 |     	<td> 79K/100K  </td>
1155 |     	<td> Front-view Image </td>
1156 |   	  <td> ✗</td>
1157 |     	<td> Laneline </td>
1158 |       <td>  PV  </td>
1159 |     	<td>✓</td>
1160 |       <td>✗</td>
1161 |       <td><a href="https://ieeexplore.ieee.org/document/9022318" target="_blank" title="Homepage">Link</a></td>
1162 | </tr>
1163 | 
1164 | <tr align="middle">
1165 |       <td><a href="https://github.com/yuliangguo/Pytorch_Generalized_3D_Lane_Detection" target="_blank" title="Homepage">3D Synthetic</a></td>
1166 |   	  <td> 2020</td>
1167 |       <td>-</td>
1168 |     	<td> 10K/10K  </td>
1169 |     	<td>   </td>
1170 |   	  <td> ✗</td>
1171 |     	<td>  </td>
1172 |       <td>  PV  </td>
1173 |     	<td>✓</td>
1174 |       <td>-</td>
1175 |       <td><a href="https://arxiv.org/abs/2003.10656.pdf" target="_blank" title="Homepage">Link</a></td>
1176 | </tr>
1177 | 
1178 | <tr align="middle">
1179 |       <td><a href="https://github.com/SoulmateB/CurveLanes" target="_blank" title="Homepage">CurveLanes</a></td>
1180 |   	  <td> 2020</td>
1181 |       <td>-</td>
1182 |     	<td> 150K/150K  </td>
1183 |     	<td>  </td>
1184 |   	  <td> ✗</td>
1185 |     	<td>   </td>
1186 |       <td>  PV  </td>
1187 |     	<td>✓</td>
1188 |       <td>-</td>
1189 |       <td><a href="https://arxiv.org/abs/2007.12147.pdf" target="_blank" title="Homepage">Link</a></td>
1190 | </tr>
1191 | 
1192 | <tr align="middle">
1193 |       <td><a href="https://github.com/yujun0-0/mma-net" target="_blank" title="Homepage">VIL-100</a></td>
1194 |   	  <td> 2021 </td>
1195 |       <td>100 </td>
1196 |     	<td> 10K/10K  </td>
1197 |     	<td>  </td>
1198 |   	  <td> ✗</td>
1199 |     	<td>   </td>
1200 |       <td>  PV  </td>
1201 |     	<td>✓</td>
1202 |       <td>✗</td>
1203 |       <td><a href="https://arxiv.org/abs/2108.08482.pdf" target="_blank" title="Homepage">Link</a></td>
1204 | </tr>
1205 | 
1206 | <tr align="middle">
1207 |       <td><a href="https://github.com/OpenDriveLab/OpenLane" target="_blank" title="Homepage">OpenLane-V1</a></td>
1208 |   	  <td> 2022</td>
1209 |       <td>1K </td>
1210 |     	<td> 200K/200K  </td>
1211 |     	<td>  </td>
1212 |   	  <td> ✗</td>
1213 |     	<td>  </td>
1214 |       <td>  3D  </td>
1215 |     	<td>✓</td>
1216 |       <td>✓</td>
1217 |       <td><a href="https://arxiv.org/abs/2203.11089.pdf" target="_blank" title="Homepage">Link</a></td>
1218 | </tr>
1219 | 
1220 | <tr align="middle">
1221 |       <td><a href="https://once-3dlanes.github.io/" target="_blank" title="Homepage">ONCE-3DLane</a></td>
1222 |   	  <td> 2022 </td>
1223 |       <td>-</td>
1224 |     	<td> 211K/211K  </td>
1225 |     	<td>  </td>
1226 |   	  <td> ✗</td>
1227 |     	<td>   </td>
1228 |       <td>  3D </td>
1229 |     	<td>✓</td>
1230 |       <td>-</td>
1231 |       <td><a href="https://openaccess.thecvf.com/content/CVPR2022/papers/Yan_ONCE-3DLanes_Building_Monocular_3D_Lane_Detection_CVPR_2022_paper.pdf" target="_blank" title="Homepage">Link</a></td>
1232 | </tr>
1233 | 
1234 | <tr align="middle">
1235 |        <td><a href="https://github.com/OpenDriveLab/OpenLane-V2" target="_blank" title="Homepage">OpenLane-V2</a></td>
1236 |   	  <td> 2023 </td>
1237 |       <td>2K </td>
1238 |     	<td>72K/72K </td>
1239 |     	<td> Multi-view Image  </td>
1240 |   	  <td> ✗</td>
1241 |     	<td> Lane Centerline, Lane Segment </td>
1242 |       <td>  3D  </td>
1243 |     	<td>✓</td>
1244 |       <td>✓</td>
1245 |       <td><a href="https://arxiv.org/abs/2304.10440.pdf" target="_blank" title="Homepage">Link</a></td>
1246 | </tr>
1247 | <tr align="middle">
1248 | </tr>
1249 | 
1250 | 
1251 | </table>
1252 | 
1253 | </details>
1254 | <details>
1255 | <summary>Prediction and Planning Datasets</summary>
1256 | 
1257 | <table>
1258 | <capital></capital>
1259 | <tr align="middle"> </tr>
1260 | <tr align="middle">
1261 |     <th rowspan=1 colspan=1>Subtask</th>
1262 |     <th rowspan=1 > Input</th>
1263 |     <th  colspan=1 >Output</th>
1264 |     <th  colspan=1 >Evaluation</th>
1265 |     <th  colspan=1 >Dataset</th>
1266 | </tr>
1267 | 
1268 | <tr align="middle">
1269 |   	  <td rowspan=9 > Motion Prediction</td>
1270 |     	<td  rowspan=9> Surrounding Traffic States </td>
1271 |   	  <td  rowspan=9 > Spatiotemporal Trajectories of Single/Multiple Vehicle(s) </td>
1272 |     	<td  rowspan=9 > Displacement Error </td>
1273 |       <td><a href="https://www.argoverse.org" target="_blank" 
1274 |       title="Homepage">Argoverse</a></td> 
1275 | </tr>
1276 | <tr align="middle"> </tr>
1277 | <tr align="middle">
1278 |       <td><a href="https://www.nuscenes.org/" target="_blank" 
1279 |       title="Homepage">nuScenes</a></td>  
1280 | </tr>
1281 | <tr align="middle"> </tr>
1282 | <tr align="middle">
1283 |     	<td><a href="https://github.com/waymo-research/waymo-open-dataset" target="_blank" 
1284 |       title="Homepage">Waymo</a></td>   
1285 | </tr>
1286 | <tr align="middle"> </tr>
1287 | <tr align="middle">
1288 |       <td><a href="https://github.com/interaction-dataset/interaction-dataset" target="_blank" 
1289 |       title="Homepage">Interaction</a></td>  
1290 | </tr>
1291 | <tr align="middle"> </tr>
1292 | <tr align="middle">
1293 |       <td><a href="https://tum-cps.pages.gitlab.lrz.de/mona-dataset/" target="_blank" 
1294 |       title="Homepage">MONA</a></td>  
1295 | </tr>
1296 | <tr align="middle">
1297 |   	  <td rowspan=7 > Trajectory Planning</td>
1298 |     	<td  rowspan=7> Motion States for Ego Vehicles, Scenario Cognition and Prediction </td>
1299 |   	  <td  rowspan=7 > Trajectories for Ego Vehicles </td>
1300 |     	<td  rowspan=7 > Displacement Error, Safety, Compliance, Comfort </td>
1301 |       <td><a href="https://www.nuscenes.org/nuplan" target="_blank" 
1302 |       title="Homepage">nuPlan </a></td>  	 
1303 | </tr>
1304 | <tr align="middle"> </tr>
1305 | <tr align="middle">
1306 |       <td><a href="https://carlachallenge.org/" target="_blank" 
1307 |       title="Homepage"> CARLA </a></td> 
1308 | </tr>   
1309 | <tr align="middle"> </tr>
1310 | <tr align="middle">
1311 |       <td><a href="https://github.com/metadriverse/metadrive" target="_blank" title="Homepage">MetaDrive</a></td>
1312 | </tr>
1313 | <tr align="middle"> </tr>
1314 | <tr align="middle">
1315 |       <td><a href="https://github.com/ApolloScapeAuto/dataset-api" target="_blank" title="Homepage">Apollo</a></td>
1316 | </tr>
1317 | 
1318 | <tr align="middle">
1319 |   	  <td rowspan=9 > Path Planning</td>
1320 |     	<td  rowspan=9> Maps for Road Network</td>
1321 |   	  <td  rowspan=9 > Routes Connecting to Nodes and Links </td>
1322 |     	<td  rowspan=9 > Efficiency, Energy Conservation </td>
1323 |       <td><a href="https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4653466" target="_blank" 
1324 |       title="Homepage">OpenStreetMap </a></td> 
1325 | </tr>
1326 | <tr align="middle"> </tr>
1327 | <tr align="middle">
1328 |       <td><a href="https://github.com/bstabler/TransportationNetworks" target="_blank" 
1329 |       title="Homepage">Transportation Networks </a></td>
1330 | </tr>  
1331 | <tr align="middle"> </tr>
1332 | <tr align="middle">
1333 |        <td><a href="https://github.com/asu-trans-ai-lab/DTALite" target="_blank" 
1334 |       title="Homepage"> DTAlite </a></td>
1335 | </tr>
1336 | <tr align="middle"> </tr>
1337 | <tr align="middle">
1338 |        <td><a href="https://dot.ca.gov/programs/traffic-operations/mpr/pems-source" target="_blank" 
1339 |       title="Homepage">PeMS  </a></td>
1340 | </tr>
1341 | <tr align="middle"> </tr>
1342 | <tr align="middle"> 
1343 |       <td><a href="https://github.com/toddwschneider/nyc-taxi-data" target="_blank" 
1344 |       title="Homepage">New York City Taxi Data  </a></td>
1345 | </tr>
1346 | 
1347 | </table>
1348 | </details>
1349 | </details>
1350 | 
1351 | 
1352 | 
1353 | ## OpenScene
1354 | <details>
1355 | 
1356 | The Largest up-to-date **3D Occupancy Forecasting** dataset for visual pre-training.
1357 | 
1358 | **Quick facts:**
1359 | - Task: given the large amount of data, predict the 3D occupancy in the environment. 
1360 | - Origin dataset: `nuPlan`
1361 | - Repo: https://github.com/OpenDriveLab/OpenScene
1362 | - Related work: [OccNet](https://github.com/OpenDriveLab/OccNet)
1363 | - Related challenge: [3D Occupancy Prediction Challenge 2023](https://opendrivelab.com/AD23Challenge.html#Track3), [Occupancy and Flow AGC Challenge 2024](https://opendrivelab.com/challenge2024/#occupancy_and_flow), [Predictive World Model AGC Challenge 2024](https://opendrivelab.com/challenge2024/#predictive_world_model)
1364 | </details>
1365 | 
1366 | ## OpenLane-V2 Update
1367 | <details>
1368 | 
1369 | Flourishing [OpenLane-V2](https://github.com/OpenDriveLab/OpenLane-V2) with **Standard Definition (SD) Map and Map Elements**.
1370 | 
1371 | **Quick facts:**
1372 | - Task: given multi-view images and SD-map (also known as ADAS map) as input, build the driving scene on the fly _without_ the aid of HD-map. 
1373 | - Repo: https://github.com/OpenDriveLab/OpenLane-V2
1374 | - Related work: [OpenLane-V2](https://openreview.net/forum?id=OMOOO3ls6g), [TopoNet](https://github.com/OpenDriveLab/TopoNet), [LaneSegNet](https://github.com/OpenDriveLab/LaneSegNet)
1375 | - Related challenge: [Lane Topology Challenge 2023](https://opendrivelab.com/AD23Challenge.html#openlane_topology), [Mapless Driving AGC Challenge 2024](https://opendrivelab.com/challenge2024/#mapless_driving)
1376 | </details>
1377 | 
1378 | 
1379 | 


--------------------------------------------------------------------------------
/assets/Affiliation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/Affiliation.png


--------------------------------------------------------------------------------
/assets/Drivedata_overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/Drivedata_overview.jpg


--------------------------------------------------------------------------------
/assets/Drivedata_timeline.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/Drivedata_timeline.jpg


--------------------------------------------------------------------------------
/assets/cvpr24_genad_poster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/cvpr24_genad_poster.png


--------------------------------------------------------------------------------
/assets/opendv_examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/opendv_examples.png


--------------------------------------------------------------------------------
/assets/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/overview.png


--------------------------------------------------------------------------------
/assets/vista-teaser.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/vista-teaser.gif


--------------------------------------------------------------------------------
/opendv/.gitignore:
--------------------------------------------------------------------------------
  1 | # full OpenDV-YouTube dataset
  2 | *meta/
  3 | OpenDV-YouTube/
  4 | annos
  5 | 
  6 | # logs
  7 | *exceptions.txt
  8 | *output.txt
  9 | *finished.txt
 10 | 
 11 | # Byte-compiled / optimized / DLL files
 12 | __pycache__/
 13 | *.py[cod]
 14 | *$py.class
 15 | 
 16 | # C extensions
 17 | *.so
 18 | 
 19 | # Distribution / packaging
 20 | .Python
 21 | build/
 22 | develop-eggs/
 23 | dist/
 24 | downloads/
 25 | eggs/
 26 | .eggs/
 27 | lib/
 28 | lib64/
 29 | parts/
 30 | sdist/
 31 | var/
 32 | wheels/
 33 | pip-wheel-metadata/
 34 | share/python-wheels/
 35 | *.egg-info/
 36 | .installed.cfg
 37 | *.egg
 38 | MANIFEST
 39 | 
 40 | # PyInstaller
 41 | #  Usually these files are written by a python script from a template
 42 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 43 | *.manifest
 44 | *.spec
 45 | 
 46 | # Installer logs
 47 | pip-log.txt
 48 | pip-delete-this-directory.txt
 49 | 
 50 | # Unit test / coverage reports
 51 | htmlcov/
 52 | .tox/
 53 | .nox/
 54 | .coverage
 55 | .coverage.*
 56 | .cache
 57 | nosetests.xml
 58 | coverage.xml
 59 | *.cover
 60 | *.py,cover
 61 | .hypothesis/
 62 | .pytest_cache/
 63 | 
 64 | # Translations
 65 | *.mo
 66 | *.pot
 67 | 
 68 | # Django stuff:
 69 | *.log
 70 | local_settings.py
 71 | db.sqlite3
 72 | db.sqlite3-journal
 73 | 
 74 | # Flask stuff:
 75 | instance/
 76 | .webassets-cache
 77 | 
 78 | # Scrapy stuff:
 79 | .scrapy
 80 | 
 81 | # Sphinx documentation
 82 | docs/_build/
 83 | 
 84 | # PyBuilder
 85 | target/
 86 | 
 87 | # Jupyter Notebook
 88 | .ipynb_checkpoints
 89 | 
 90 | # IPython
 91 | profile_default/
 92 | ipython_config.py
 93 | 
 94 | # pyenv
 95 | .python-version
 96 | 
 97 | # pipenv
 98 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 99 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
100 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
101 | #   install all needed dependencies.
102 | #Pipfile.lock
103 | 
104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
105 | __pypackages__/
106 | 
107 | # Celery stuff
108 | celerybeat-schedule
109 | celerybeat.pid
110 | 
111 | # SageMath parsed files
112 | *.sage.py
113 | 
114 | # Environments
115 | .env
116 | .venv
117 | env/
118 | venv/
119 | ENV/
120 | env.bak/
121 | venv.bak/
122 | 
123 | # Spyder project settings
124 | .spyderproject
125 | .spyproject
126 | 
127 | # Rope project settings
128 | .ropeproject
129 | 
130 | # mkdocs documentation
131 | /site
132 | 
133 | # mypy
134 | .mypy_cache/
135 | .dmypy.json
136 | dmypy.json
137 | 
138 | # Pyre type checker
139 | .pyre/
140 | 
141 | .gitconfig
142 | .local
143 | .jupyter
144 | .DS_Store
145 | .python_history
146 | 
147 | data/OpenLane-V2/*
148 | !data/OpenLane-V2/data_dict_sample.json
149 | !data/OpenLane-V2/data_dict_example.json
150 | !data/OpenLane-V2/openlanev2.md5
151 | !data/OpenLane-V2/preprocess*
152 | !data/OpenLane-V2/data_dict_subset_A.json
153 | 
154 | RoadData/vis
155 | RoadData/gt_results.json
156 | RoadDataTool/vis
157 | RoadDataTool/gt_result.json
158 | RoadDataTool/pred_case1_no_turbulence.json
159 | 


--------------------------------------------------------------------------------
/opendv/README.md:
--------------------------------------------------------------------------------
  1 | # OpenDV-YouTube
  2 | Due to YouTube License, we could not directly offer our processed data. However, you can follow the steps below to download the raw data and process it by yourself.
  3 | 
  4 | **<font color="red">[ NEW❗️]</font>**: We just released the **<font color="red">OpenDV-mini</font>** subset!
  5 | Please feel free to try the mini subset by following steps. Necessary information is also contained in our <a href="https://docs.google.com/spreadsheets/d/1bHWWP_VXeEe5UzIG-QgKFBdH7mNlSC4GFSJkEhFnt2I" target="_blank">OpenDV-YouTube Google Sheet</a> (marked as `Mini` in the column `Mini / Full Set`).
  6 | 
  7 | ## About OpenDV-YouTube and OpenDV-mini
  8 | 
  9 | - The complete dataset <a href="https://docs.google.com/spreadsheets/d/1bHWWP_VXeEe5UzIG-QgKFBdH7mNlSC4GFSJkEhFnt2I" target="_blank">OpenDV-YouTube</a> is the **largest driving video dataset** to date, containing more than **1700 hours** of real-world driving videos and being 300 times larger than the widely used nuScenes dataset.
 10 | 
 11 | - The mini subset, OpenDV-mini, contains about **28 hours** of videos, with diverse geographical distribution and various camera settings. Among these videos, **25 hours** are used as `mini-train` and the other **3 hours** are used as `mini-val`.
 12 | 
 13 | ## Environment Setup
 14 | 
 15 | **We recommend to process the dataset in `Linux` environment since `Windows` may have issues with the file paths.**
 16 | 
 17 | Install the required packages by running the following command.
 18 | 
 19 | ```cmd
 20 | conda create -n opendv python=3.10 -y
 21 | conda activate opendv
 22 | pip install -r requirements.txt
 23 | ```
 24 | 
 25 | In case the meta data of videos downloaded are fragmented, we recommend installing `ffmpeg<=3.4.9`. Instead of using the following commands, you can also directly clone and build from [their official repository](https://github.com/FFmpeg/FFmpeg/tree/release/3.4).
 26 | 
 27 | ```cmd
 28 | # 1. prepare yasm for ffmpeg. If it is already satisfied by your machine, skip to the next step.
 29 | wget https://tortall.net/projects/yasm/releases/yasm-1.3.0.tar.gz
 30 | tar -xzvf yasm-1.3.0.tar.gz
 31 | cd yasm-1.3.0
 32 | ./configure
 33 | make
 34 | make install
 35 | 
 36 | # 2. install ffmpeg<=3.4.9.
 37 | wget https://ffmpeg.org/releases/ffmpeg-3.4.9.tar.gz
 38 | tar -xzvf ffmpeg-3.4.9.tar.gz
 39 | cd ffmpeg-3.4.9
 40 | ./configure
 41 | make
 42 | make install
 43 | 
 44 | # 3. check the installation. Sometimes you may need to reactivate the conda environment to see it working.
 45 | ffprobe
 46 | ```
 47 | 
 48 | ## Meta Data Preparation
 49 | First, download the <a href="https://docs.google.com/spreadsheets/d/1bHWWP_VXeEe5UzIG-QgKFBdH7mNlSC4GFSJkEhFnt2I" target="_blank">OpenDV-YouTube Google Sheet</a> as a `csv` file. For default setting, you should save the file as `meta/OpenDV-YouTube.csv`. You could change it to whatever path you want as long as you change the `csv_path` in the command in the next step.
 50 | 
 51 | Then, run the following command to preprocess the meta data. The default value for `--csv_path` (or `-i`) and `--json_path` (or `-o`) are `meta/OpenDV-YouTube.csv` and `meta/OpenDV-YouTube.json` respectively. 
 52 | 
 53 | ```cmd
 54 | python scripts/meta_preprocess.py -i CSV_PATH -o JSON_PATH
 55 | ```
 56 | 
 57 | ## Raw Data Download (Raw videos)
 58 | 
 59 | To download the raw data from YouTube, you should first change the configures in `configs/download.json`. 
 60 | 
 61 | Note that the script **supports multi-threading download**, so please set the `num_workers` to a proper value according to your hardware and network condition.
 62 | 
 63 | Also, the `format` key in the config file **should strictly obey** the format selection rules of the `youtube-dl` package. We do not recommend changing it unless you are familiar with the package.
 64 | 
 65 | Now you can run the following command to download the raw video data.
 66 | 
 67 | ```cmd
 68 | python scripts/youtube_download.py >> download_output.txt
 69 | ```
 70 | 
 71 | The download will take about $2000/\mathrm{NUM_{WORKERS}}$ hours, which may vary your network condition. 
 72 | The default $\mathrm{NUM_{WORKERS}} = 90$, and you can adjust it in [config](configs/download.json#L7).
 73 | The data will take about **3TB** of disk space.
 74 | 
 75 | If you wish to **<font color="red">use the mini subset</font>**, just simply add the `mini` option in your command, i.e. run the following command.
 76 | 
 77 | ```cmd
 78 | python scripts/youtube_download.py --mini >> download_output.txt
 79 | ```
 80 | 
 81 | You may refer to the `download_exceptions.txt` to check whether the download is successful or not. The file will be automatically generated by the script in the root of the `opendv` codebase.
 82 | 
 83 | If downloading with `youtube-dl` is not successful, you can change the `method` in [config](configs/download.json#L4) from `youtube-dl` to `yt-dlp`.
 84 | 
 85 | ## Data Preprocessing (Converting videos to images)
 86 | 
 87 | When the download is finished, you can first set the configures in `configs/video2img.json` to those you expect. The script also **supports multi-threading processing**, so you can set the `num_workers` to a proper value according to your hardware condition.
 88 | 
 89 | Note that if you want to align with the annotations we provide, `frame_rate` **should not be changed.**
 90 | 
 91 | Then, you can run the following command to preprocess the raw video data.
 92 | 
 93 | ```cmd
 94 | python scripts/video2img.py >> vid2img_output.txt
 95 | ```
 96 | 
 97 | The preprocessing will take about $8000/\mathrm{NUM_{WORKERS}}$ hours, which may vary your network condition. 
 98 | The default $\mathrm{NUM_{WORKERS}} = 90$, and you can adjust it in [config](configs/video2img.json#L6).
 99 | Resulting images will take about **25TB** of disk space.
100 | 
101 | If you wish to **<font color="red">use the mini subset</font>**, just simply add the `mini` option in your command, i.e. run the following command.
102 | 
103 | ```cmd
104 | python scripts/video2img.py --mini >> vid2img_output.txt
105 | ```
106 | 
107 | You may refer to the `vid2img_exceptions.txt` to check the status.
108 | 
109 | ## Language Annotations
110 | 
111 | The full annotation data, including **commands** and **contexts** of video clips, is available at <a href="https://huggingface.co/datasets/OpenDriveLab/OpenDV-YouTube-Language" target="_blank">OpenDV-YouTube-Language</a>. The files are in `json` format, with total size of about **14GB**.
112 | 
113 | The annotation data is aligned with the structure of the preprocessed data. You can use the following code to load in annotations respectively.
114 | 
115 | ```python
116 | import json
117 | 
118 | # for train
119 | full_annos = []
120 | for split_id in range(10):
121 |   split = json.load(open("10hz_YouTube_train_split{}.json".format(str(split_id)), "r"))
122 |   full_annos.extend(split)
123 | 
124 | # for val
125 | val_annos = json.load(open("10hz_YouTube_val.json", "r"))
126 | ```
127 | 
128 | Annotations will be loaded in `full_annos` as a list where each element contains annotations for one video clip. All elements in the list are dictionaries of the following structure.
129 | 
130 | ```python
131 | {
132 |   "cmd": <int> -- command, i.e. the command of the ego vehicle in the video clip.
133 |   "blip": <str> -- context, i.e. the BLIP description of the center frame in the video clip.
134 |   "folder": <str> -- the relative path from the processed OpenDV-YouTube dataset root to the image folder of the video clip.
135 |   "first_frame": <str> -- the filename of the first frame in the clip. Note that this file is included in the video clip.
136 |   "last_frame": <str> -- the filename of the last frame in the clip. Note that this file is included in the video clip.
137 | }
138 | ```
139 | 
140 | The command, *i.e.* the `cmd` field, can be converted to natural language using the `map_category_to_caption` function. You may refer to [cmd2caption.py](utils/cmd2caption.py#L158) for details.
141 | 
142 | The context, *i.e.* the `blip` field, is the description of the **center frame** in the video generated by `BLIP2`.
143 | 
144 | 
145 | ## Citation
146 | 
147 | If you find our work helpful, please cite the following paper.
148 | 
149 | ```bibtex
150 | @inproceedings{yang2024genad,
151 |   title={Generalized Predictive Model for Autonomous Driving},
152 |   author={Jiazhi Yang and Shenyuan Gao and Yihang Qiu and Li Chen and Tianyu Li and Bo Dai and Kashyap Chitta and Penghao Wu and Jia Zeng and Ping Luo and Jun Zhang and Andreas Geiger and Yu Qiao and Hongyang Li},
153 |   booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
154 |   year={2024}
155 | }
156 | ```


--------------------------------------------------------------------------------
/opendv/configs/download.json:
--------------------------------------------------------------------------------
1 | {
2 |     "root": "OpenDV-YouTube/videos",
3 |     "video_list": "meta/OpenDV-YouTube.json",
4 |     "method": "yt-dlp",
5 |     "format": "bestvideo[height>=720,height<=1080]/best[height>=720,height<=1080]/bestvideo[height>=720]/best[height>=720]",
6 |     "format_for_ytdlp": "bv*[height<=?1080][height>=720]/b*[height<=?1080][height>=720]",
7 |     "num_workers": 90,
8 |     "exception_file": "download_exceptions.txt"
9 | }


--------------------------------------------------------------------------------
/opendv/configs/video2img.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "video_root": "OpenDV-YouTube/videos",
 3 |     "train_img_root": "OpenDV-YouTube/full_images",
 4 |     "val_img_root": "OpenDV-YouTube/val_images",
 5 |     "meta_info": "meta/OpenDV-YouTube.json",
 6 |     "num_workers": 90,
 7 |     "frame_rate": 10,
 8 |     "exception_file": "vid2img_exceptions.txt",
 9 |     "finish_log": "vid2img_finished.txt"
10 | }


--------------------------------------------------------------------------------
/opendv/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/ytdl-org/youtube-dl
2 | git+https://github.com/yt-dlp/yt-dlp@a065086640e888e8d58c615d52ed2f4f4e4c9d18
3 | 
4 | opencv-python
5 | decord
6 | tqdm
7 | pandas


--------------------------------------------------------------------------------
/opendv/scripts/meta_preprocess.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This script is used for preprocessing OpenDV-YouTube meta data, from Google sheet (as csv file) to json file.
 3 | The script is a part of the [`GenAD`](https://arxiv.org/abs/2403.09630) project.
 4 | """
 5 | 
 6 | import json
 7 | import pandas as pd
 8 | import numpy as np
 9 | import argparse
10 | from tqdm import tqdm
11 | 
12 | KEY_MAP = {
13 |     'train / val': 'split',
14 |     'mini / full set': 'subset',
15 |     'nation or area (inferred by gpt)': 'area',
16 |     'state, province, or city (inferred by gpt and refined by human)': 'state',
17 |     'discarded length at the begininning (second)': 'start_discard',
18 |     'discarded length at the ending (second)': 'end_discard'
19 | }
20 | 
21 | SPECIFIC_TYPE_MAP = {
22 |     'state': str
23 | }
24 | 
25 | def duration2length(duration):
26 |     """
27 |         duration: HH:MM:SS, or MM:SS
28 |         length: int (seconds)
29 |     """
30 |     duration = duration.split(":")
31 |     length = int(duration[0]) * 60 + int(duration[1])
32 |     if len(duration) == 3:
33 |         length = length * 60 + int(duration[2])
34 |     return length
35 | 
36 | 
37 | def csv2json(csv_path, json_path):
38 |     df = pd.read_csv(csv_path)
39 |     vid_list = []
40 |     keys = df.keys()
41 |     for vid_id in tqdm(range(len(df["ID"]))):
42 |         vid_info = dict()
43 |         for key in keys:
44 |             value = df[key][vid_id]
45 |             assigned_key = KEY_MAP.get(key.lower(), key.lower())
46 |             if assigned_key in SPECIFIC_TYPE_MAP:
47 |                 value = SPECIFIC_TYPE_MAP[assigned_key](value)
48 |             if isinstance(value, np.int64):
49 |                 value = int(value)
50 |             elif value == "nan":
51 |                 value = "N/A"
52 |             vid_info[assigned_key] = value
53 |             
54 |         vid_info["length"] = duration2length(vid_info["duration"])
55 |         vid_list.append(vid_info)
56 |         
57 |     with open(json_path, "w") as f:
58 |         json.dump(vid_list, f, indent=4, ensure_ascii=True)
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     parser = argparse.ArgumentParser(description='Convert OpenDV-YouTube meta data from csv to json')
63 |     parser.add_argument('--csv_path', '-i', type=str, default="meta/OpenDV-YouTube.csv", help='path to the csv file')
64 |     parser.add_argument('--json_path', '-o', type=str, default="meta/OpenDV-YouTube.json", help='path to the json file')
65 |     args = parser.parse_args()
66 | 
67 |     csv2json(args.csv_path, args.json_path)


--------------------------------------------------------------------------------
/opendv/scripts/video2img.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This script is used for preprocessing OpenDV-YouTube meta data, from raw video files to image files.
 3 | The script is a part of the [`GenAD`](https://arxiv.org/abs/2403.09630) project.
 4 | """
 5 | 
 6 | import json
 7 | import os, sys
 8 | import time
 9 | import argparse
10 | from multiprocessing import Pool
11 | 
12 | from tqdm import tqdm
13 | 
14 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
15 | sys.path.append(root_dir)
16 | 
17 | from utils.easydict import EasyDict
18 | from utils.frame_extraction import extract_frames
19 | from utils.download import POSSIBLE_EXTS, youtuber_formatize, get_mini_opendv
20 | 
21 | 
22 | def collect_unfinished_videos(config, mini=False):
23 |     configs = EasyDict(json.load(open(config, "r")))
24 |     root = {
25 |         "train": configs.train_img_root,
26 |         "val": configs.val_img_root
27 |     }
28 | 
29 |     meta_infos = json.load(open(configs.meta_info, "r"))
30 |     if mini:
31 |         meta_infos = get_mini_opendv(meta_infos)
32 |     if os.path.exists(configs.finish_log):
33 |         finish_log = set(open(configs.finish_log, "r").readlines())
34 |         finish_log = {x.strip() for x in finish_log}
35 |     else:
36 |         finish_log = set()
37 |     
38 |     unfinished_videos = []
39 |     print("collecting unfinished videos...")
40 |     for video_meta in tqdm(meta_infos):
41 |         if video_meta["videoid"] in finish_log:
42 |             continue
43 |         video_path = os.path.join(configs.video_root, youtuber_formatize(video_meta["youtuber"]), video_meta['videoid'])
44 |         for ext in POSSIBLE_EXTS:
45 |             if os.path.exists(f"{video_path}.{ext}"):
46 |                 break
47 |         if not os.path.exists(f"{video_path}.{ext}"):
48 |             raise ValueError(f"Video {video_meta['videoid']} not found. maybe something wrong in the download process?")
49 |         
50 |         video_info = {
51 |             "video_id": video_meta["videoid"],
52 |             "video_path": f"{video_path}.{ext}",
53 |             "output_dir": os.path.join(root[video_meta["split"].lower()], youtuber_formatize(video_meta["youtuber"]), video_meta['videoid']),
54 |             "freq": configs.frame_rate,
55 |             "start_discard": video_meta["start_discard"],
56 |             "end_discard": video_meta["end_discard"],
57 |             "exception_file": configs.exception_file,
58 |             "finish_log": configs.finish_log
59 |         }
60 |         unfinished_videos.append(video_info)
61 | 
62 |     return unfinished_videos, EasyDict(configs)
63 | 
64 | 
65 | def convert_multiprocess(video_lists, configs):
66 |     video_count = len(video_lists)    
67 |     with Pool(configs.num_workers) as p:
68 |         current_time = time.perf_counter()
69 |         for _ in tqdm(p.imap(extract_frames, video_lists), total=video_count):
70 |             pass
71 |             
72 | 
73 | if __name__ == '__main__':
74 |     parser = argparse.ArgumentParser()
75 |     parser.add_argument('--config', type=str, default='configs/video2img.json')
76 |     parser.add_argument('--mini', action='store_true', default=False, help='Convert mini dataset only.')
77 |     # parser.add_argument('--start_id', type=int, default=0)
78 |     # parser.add_argument('--end_id', type=int, default=-1)
79 |     # parser.add_argument('--test_video', type=str, default=None)
80 | 
81 |     args = parser.parse_args()
82 |     video_lists, meta_configs = collect_unfinished_videos(args.config, args.mini)
83 |     
84 |     # if args.end_id == -1:
85 |     #     args.end_id = len(video_lists)
86 |     # video_lists = video_lists[args.start_id:args.end_id]
87 |     # if args.test_video is not None:
88 |     #     convert_multiprocess([{**video_lists[0], "video_path": args.test_video}], meta_config)
89 |     #     exit(0)
90 | 
91 |     convert_multiprocess(video_lists, meta_configs)


--------------------------------------------------------------------------------
/opendv/scripts/youtube_download.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script is used for downloading OpenDV-YouTube raw data.
  3 | The script is a part of the [`GenAD`](https://arxiv.org/abs/2403.09630) project.
  4 | """
  5 | 
  6 | from multiprocessing import Pool
  7 | from tqdm import tqdm
  8 | import os, sys
  9 | import time
 10 | import json
 11 | import cv2
 12 | 
 13 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 14 | sys.path.append(root_dir)
 15 | 
 16 | from utils.easydict import EasyDict
 17 | from utils.download import youtuber_formatize, POSSIBLE_EXTS, get_video_with_meta, get_mini_opendv
 18 | 
 19 | CONFIGS = dict()
 20 | 
 21 | def single_download(args):
 22 |     vid_info, CONFIGS = args
 23 | 
 24 |     url = vid_info["link"]
 25 |     filename = vid_info["videoid"]
 26 |     folder = youtuber_formatize(vid_info["youtuber"])
 27 |     path = os.path.join(CONFIGS.root, folder)
 28 | 
 29 |     for ext in POSSIBLE_EXTS:
 30 |         if os.path.exists(f"{path}/{filename}.{ext}"):
 31 |             print(f"Video {filename} already exists in {path}. Skipping...")
 32 |             return
 33 |     if not os.path.exists(path):
 34 |         os.makedirs(path, exist_ok=True)
 35 |     
 36 |     try:
 37 |         ret = os.system(f"{CONFIGS.method} -f '{CONFIGS.format}' -o '{path}/{filename}.%(ext)s' {url}")
 38 |         if ret != 0:
 39 |             raise Exception("ERROR: Video unavailable or network error.")
 40 |     except Exception as e:
 41 |         with open(CONFIGS.exception_file, "a") as f:
 42 |             f.write("Error downloading video [{}]: {}\n".format(filename, e))
 43 |         return
 44 |     
 45 | 
 46 | def multiple_download(video_list, configs):
 47 |     global CONFIGS
 48 | 
 49 |     video_count = len(video_list)
 50 |     CONFIGS["method"] = configs["method"]
 51 |     assert CONFIGS["method"] in ["youtube-dl", "yt-dlp"], "Only support `youtube-dl` and `yt-dlp`."
 52 |     CONFIGS["format"] = configs["format"] if configs["method"] == "youtube-dl" else configs["format_for_ytdlp"]
 53 |     CONFIGS["root"] = configs.root
 54 |     CONFIGS["exception_file"] = configs.exception_file
 55 |     CONFIGS = EasyDict(CONFIGS)
 56 |     finished = 0
 57 |     with Pool(configs.num_workers) as p:
 58 |         current_time = time.perf_counter()
 59 |         for _ in tqdm(p.imap(single_download, [(vid_info, CONFIGS) for vid_info in video_list]), total=video_count):
 60 |             finished += 1
 61 |             working_time = time.perf_counter() - current_time
 62 |             eta = working_time / finished * (video_count - finished)
 63 |             eta = time.strftime("%H:%M:%S", time.gmtime(eta))
 64 |             print("Finished {}/{} videos. ETA: {}.".format(finished, video_count, eta))
 65 | 
 66 | 
 67 | def check_status(video_list, configs):
 68 |     if "exception_file" not in configs:
 69 |         print("No exception file specified. Skipping...")
 70 |         return
 71 | 
 72 |     print("Checking download status...")
 73 |     with open(configs.exception_file, "a") as f:
 74 |         f.write("\n\nChecking download status...\n")
 75 | 
 76 |     for vid_info in tqdm(video_list):
 77 |         exists = False
 78 |         path = os.path.join(configs.root, youtuber_formatize(vid_info["youtuber"]))
 79 |         for ext in POSSIBLE_EXTS:
 80 |             if os.path.exists("{}/{}.{}".format(path, vid_info["videoid"], ext)):
 81 |                 exists = True
 82 |                 break
 83 |         if not exists:
 84 |             with open(configs.exception_file, "a") as f:
 85 |                 f.write(f"Video [{vid_info['videoid']}] not found in [{path}].\n")
 86 |             continue
 87 |         
 88 |         _, true_duration = get_video_with_meta("{}/{}.{}".format(path, vid_info["videoid"], ext), ["duration"])
 89 |         
 90 |         duration_in_json = vid_info["duration"]
 91 |         expected_duration = vid_info["length"]
 92 | 
 93 |         if abs(true_duration - expected_duration) > 5:
 94 |             with open(configs.exception_file, "a") as f:
 95 |                 f.write(f"Video [{vid_info['videoid']}]: Duration mismatch. Expected: {duration_in_json} ({expected_duration} seconds), True: {true_duration} seconds.\n")
 96 | 
 97 |     with open(configs.exception_file, "a") as f:
 98 |         f.write("\nChecking download status finished.")
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     import argparse
103 |     parser = argparse.ArgumentParser()
104 |     parser.add_argument("--config", type=str, default="configs/download.json", help="Path to the config file. should be a `json` file.")
105 |     parser.add_argument("--mini", action="store_true", default=False, help="Download mini dataset only.")
106 |     args = parser.parse_args()
107 | 
108 |     configs = EasyDict(json.load(open(args.config, "r")))
109 |     with open(configs.exception_file, "w") as f:
110 |         f.write("")
111 | 
112 |     video_list = json.load(open(configs.pop("video_list"), "r"))
113 |     if args.mini:
114 |         video_list = get_mini_opendv(video_list)
115 |     if not os.path.exists(configs.root):
116 |         os.makedirs(configs.root, exist_ok=True)
117 | 
118 |     multiple_download(video_list, configs)
119 |     check_status(video_list, configs)


--------------------------------------------------------------------------------
/opendv/utils/cmd2caption.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | plain_caption_dict = {
  4 |     0: "Go straight.",
  5 |     1: "Pass the intersection.",
  6 |     2: "Turn left.",
  7 |     3: "Turn right.",
  8 |     4: "Change to the left lane.",
  9 |     5: "Change to the right lane.",
 10 |     6: "Go to the left lane branch.",
 11 |     7: "Go to the right lane branch.",
 12 |     8: "Pass the crosswalk.",
 13 |     9: "Pass the railroad.",
 14 |     10: "Merge.",
 15 |     11: "Make a U-turn.",
 16 |     12: "Stop.",
 17 |     13: "Deviate."
 18 | }
 19 | 
 20 | diverse_caption_dict = {
 21 |     0: [
 22 |         "Move forward.",
 23 |         "Move steady.",
 24 |         "Go forward.",
 25 |         "Go straight.",
 26 |         "Proceed.",
 27 |         "Drive forward.",
 28 |         "Drive straight.",
 29 |         "Drive steady.",
 30 |         "Keep the direction.",
 31 |         "Maintain the direction.",
 32 |     ],
 33 |     1: [
 34 |         "Pass the intersection.",
 35 |         "Cross the intersection.",
 36 |         "Traverse the intersection.",
 37 |         "Drive through the intersection.",
 38 |         "Move past the intersection.",
 39 |         "Pass the junction.",
 40 |         "Cross the junction.",
 41 |         "Traverse the junction.",
 42 |         "Drive through the junction.",
 43 |         "Move past the junction.",
 44 |         "Pass the crossroad.",
 45 |         "Cross the crossroad.",
 46 |         "Traverse the crossroad.",
 47 |         "Drive through the crossroad.",
 48 |         "Move past the crossroad.",
 49 |     ],
 50 |     2: [
 51 |         "Turn left.",
 52 |         "Turn to the left.",
 53 |         "Make a left turn.",
 54 |         "Take a left turn.",
 55 |         "Turn to the left.",
 56 |         "Left turn.",
 57 |         "Steer left.",
 58 |         "Steer to the left.",
 59 |     ],
 60 |     3: [
 61 |         "Turn right.",
 62 |         "Turn to the right.",
 63 |         "Make a right turn.",
 64 |         "Take a right turn.",
 65 |         "Turn to the right.",
 66 |         "Right turn.",
 67 |         "Steer right.",
 68 |         "Steer to the right.",
 69 |     ],
 70 |     4: [
 71 |         "Make a left lane change.",
 72 |         "Change to the left lane.",
 73 |         "Switch to the left lane.",
 74 |         "Shift to the left lane.",
 75 |         "Move to the left lane.",
 76 |     ],
 77 |     5: [
 78 |         "Make a right lane change.",
 79 |         "Change to the right lane.",
 80 |         "Switch to the right lane.",
 81 |         "Shift to the right lane.",
 82 |         "Move to the right lane.",
 83 |     ],
 84 |     6: [
 85 |         "Go to the left lane branch.",
 86 |         "Take the left lane branch.",
 87 |         "Move into the left lane branch.",
 88 |         "Follow the left lane branch.",
 89 |         "Follow the left side road.",
 90 |     ],
 91 |     7: [
 92 |         "Go to the right lane branch.",
 93 |         "Take the right lane branch.",
 94 |         "Move into the right lane branch.",
 95 |         "Follow the right lane branch.",
 96 |         "Follow the right side road.",
 97 |     ],
 98 |     8: [
 99 |         "Pass the crosswalk.",
100 |         "Cross the crosswalk.",
101 |         "Traverse the crosswalk.",
102 |         "Drive through the crosswalk.",
103 |         "Move past the crosswalk.",
104 |         "Pass the crossing area.",
105 |         "Cross the crossing area.",
106 |         "Traverse the crossing area.",
107 |         "Drive through the crossing area.",
108 |         "Move past the crossing area.",
109 |     ],
110 |     9: [
111 |         "Pass the railroad.",
112 |         "Cross the railroad.",
113 |         "Traverse the railroad.",
114 |         "Drive through the railroad.",
115 |         "Move past the railroad.",
116 |         "Pass the railway.",
117 |         "Cross the railway.",
118 |         "Traverse the railway.",
119 |         "Drive through the railway.",
120 |         "Move past the railway.",
121 |     ],
122 |     10: [
123 |         "Merge.",
124 |         "Merge traffic.",
125 |         "Merge into traffic.",
126 |         "Merge into the traffic.",
127 |         "Join the traffic.",
128 |         "Merge into the traffic flow.",
129 |         "Join the traffic flow.",
130 |         "Merge into the traffic stream.",
131 |         "Join the traffic stream.",
132 |         "Merge into the lane.",
133 |     ],
134 |     11: [
135 |         "Make a U-turn.",
136 |         "Make a 180-degree turn.",
137 |         "Turn 180 degree.",
138 |         "Turn around.",
139 |         "Drive in a U-turn.",
140 |     ],
141 |     12: [
142 |         "Stop.",
143 |         "Halt.",
144 |         "Decelerate.",
145 |         "Slow down.",
146 |         "Brake.",
147 |     ],
148 |     13: [
149 |         "Deviate.",
150 |         "Deviate from the path.",
151 |         "Deviate from the lane.",
152 |         "Change the direction.",
153 |         "Shift the direction.",
154 |     ]
155 | }
156 | 
157 | 
158 | def map_category_to_caption(category_index, diverse=True):
159 |     if diverse:
160 |         return random.choice(diverse_caption_dict[category_index])
161 |     else:
162 |         return plain_caption_dict[category_index]
163 | 


--------------------------------------------------------------------------------
/opendv/utils/download.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | 
 4 | POSSIBLE_EXTS = ["mp4", "webm", "mkv"]
 5 | 
 6 | def youtuber_formatize(youtuber):
 7 |     return youtuber.replace(" ", "_")
 8 | 
 9 | 
10 | def get_video_with_meta(video_path, need_metas=["fps", "duration", "num_frames"]):
11 |     if not os.path.exists(video_path):
12 |         video = None
13 |         fps = -1
14 |         duration = -1
15 |         num_frames = -1
16 |     else:
17 |         try:
18 |             video = cv2.VideoCapture(video_path)
19 |             fps = video.get(cv2.CAP_PROP_FPS)
20 |             if fps == 0:
21 |                 cmd = "ffprobe -v error -select_streams v -of default=noprint_wrappers=1:nokey=1 -show_entries stream=r_frame_rate {}".format(video_path)
22 |                 precise_fps = os.popen(cmd).read().split("/")
23 |                 fps = float(precise_fps[0]) / float(precise_fps[1])
24 |                 if ("num_frames" in need_metas) or ("duration" in need_metas):
25 |                     cmd = "ffprobe -show_entries format=duration -v quiet -of csv=\"p=0\" {}".format(video_path)
26 |                     precise_duration = os.popen(cmd).read()
27 |                     duration = int(float(precise_duration))
28 |                 if "num_frames" in need_metas:
29 |                     num_frames = int(duration * fps)
30 |             else:
31 |                 if "num_frames" in need_metas:
32 |                     num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
33 |                 if "duration" in need_metas:
34 |                     duration = video.get(cv2.CAP_PROP_FRAME_COUNT) / fps            
35 |                     
36 |         except Exception as e:
37 |             print("Error: ", e)
38 |             video = None
39 |             fps = -1
40 |             duration = -1
41 |             num_frames = -1
42 |         
43 |     return_params = (video,)
44 |     if "fps" in need_metas:
45 |         return_params += (fps,)
46 |     if "duration" in need_metas:
47 |         return_params += (duration,)
48 |     if "num_frames" in need_metas:
49 |         return_params += (num_frames,)
50 | 
51 |     return return_params
52 | 
53 | def get_mini_opendv(full_video_list):
54 |     mini_list = []
55 |     for vid_info in full_video_list:
56 |         if vid_info["subset"] != "Mini":
57 |             continue
58 |         mini_list.append(vid_info)
59 | 
60 |     return mini_list


--------------------------------------------------------------------------------
/opendv/utils/easydict.py:
--------------------------------------------------------------------------------
  1 | class EasyDict(dict):
  2 |     """
  3 |     Get attributes
  4 | 
  5 |     >>> d = EasyDict({'foo':3})
  6 |     >>> d['foo']
  7 |     3
  8 |     >>> d.foo
  9 |     3
 10 |     >>> d.bar
 11 |     Traceback (most recent call last):
 12 |     ...
 13 |     AttributeError: 'EasyDict' object has no attribute 'bar'
 14 | 
 15 |     Works recursively
 16 | 
 17 |     >>> d = EasyDict({'foo':3, 'bar':{'x':1, 'y':2}})
 18 |     >>> isinstance(d.bar, dict)
 19 |     True
 20 |     >>> d.bar.x
 21 |     1
 22 | 
 23 |     Bullet-proof
 24 | 
 25 |     >>> EasyDict({})
 26 |     {}
 27 |     >>> EasyDict(d={})
 28 |     {}
 29 |     >>> EasyDict(None)
 30 |     {}
 31 |     >>> d = {'a': 1}
 32 |     >>> EasyDict(**d)
 33 |     {'a': 1}
 34 | 
 35 |     Set attributes
 36 | 
 37 |     >>> d = EasyDict()
 38 |     >>> d.foo = 3
 39 |     >>> d.foo
 40 |     3
 41 |     >>> d.bar = {'prop': 'value'}
 42 |     >>> d.bar.prop
 43 |     'value'
 44 |     >>> d
 45 |     {'foo': 3, 'bar': {'prop': 'value'}}
 46 |     >>> d.bar.prop = 'newer'
 47 |     >>> d.bar.prop
 48 |     'newer'
 49 | 
 50 | 
 51 |     Values extraction
 52 | 
 53 |     >>> d = EasyDict({'foo':0, 'bar':[{'x':1, 'y':2}, {'x':3, 'y':4}]})
 54 |     >>> isinstance(d.bar, list)
 55 |     True
 56 |     >>> from operator import attrgetter
 57 |     >>> map(attrgetter('x'), d.bar)
 58 |     [1, 3]
 59 |     >>> map(attrgetter('y'), d.bar)
 60 |     [2, 4]
 61 |     >>> d = EasyDict()
 62 |     >>> d.keys()
 63 |     []
 64 |     >>> d = EasyDict(foo=3, bar=dict(x=1, y=2))
 65 |     >>> d.foo
 66 |     3
 67 |     >>> d.bar.x
 68 |     1
 69 | 
 70 |     Still like a dict though
 71 | 
 72 |     >>> o = EasyDict({'clean':True})
 73 |     >>> o.items()
 74 |     [('clean', True)]
 75 | 
 76 |     And like a class
 77 | 
 78 |     >>> class Flower(EasyDict):
 79 |     ...     power = 1
 80 |     ...
 81 |     >>> f = Flower()
 82 |     >>> f.power
 83 |     1
 84 |     >>> f = Flower({'height': 12})
 85 |     >>> f.height
 86 |     12
 87 |     >>> f['power']
 88 |     1
 89 |     >>> sorted(f.keys())
 90 |     ['height', 'power']
 91 | 
 92 |     update and pop items
 93 |     >>> d = EasyDict(a=1, b='2')
 94 |     >>> e = EasyDict(c=3.0, a=9.0)
 95 |     >>> d.update(e)
 96 |     >>> d.c
 97 |     3.0
 98 |     >>> d['c']
 99 |     3.0
100 |     >>> d.get('c')
101 |     3.0
102 |     >>> d.update(a=4, b=4)
103 |     >>> d.b
104 |     4
105 |     >>> d.pop('a')
106 |     4
107 |     >>> d.a
108 |     Traceback (most recent call last):
109 |     ...
110 |     AttributeError: 'EasyDict' object has no attribute 'a'
111 |     """
112 | 
113 |     def __init__(self, d=None, **kwargs):
114 |         if d is None:
115 |             d = {}
116 |         if kwargs:
117 |             d.update(**kwargs)
118 |         for k, v in d.items():
119 |             setattr(self, k, v)
120 |         # Class attributes
121 |         for k in self.__class__.__dict__.keys():
122 |             if not (k.startswith("__") and k.endswith("__")) and not k in ("update", "pop"):
123 |                 setattr(self, k, getattr(self, k))
124 | 
125 |     def __setattr__(self, name, value):
126 |         if isinstance(value, (list, tuple)):
127 |             value = [self.__class__(x) if isinstance(x, dict) else x for x in value]
128 |         elif isinstance(value, dict) and not isinstance(value, self.__class__):
129 |             value = self.__class__(value)
130 |         super(EasyDict, self).__setattr__(name, value)
131 |         super(EasyDict, self).__setitem__(name, value)
132 | 
133 |     __setitem__ = __setattr__
134 | 
135 |     def update(self, e=None, **f):
136 |         d = e or dict()
137 |         d.update(f)
138 |         for k in d:
139 |             setattr(self, k, d[k])
140 | 
141 |     def pop(self, k, d=None):
142 |         if hasattr(self, k):
143 |             delattr(self, k)
144 |         return super(EasyDict, self).pop(k, d)
145 | 
146 | 
147 | if __name__ == "__main__":
148 |     import doctest


--------------------------------------------------------------------------------
/opendv/utils/frame_extraction.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script is used for preprocessing OpenDV-YouTube raw data.
  3 | The script is a part of the [`GenAD`](https://arxiv.org/abs/2403.09630) project.
  4 | """
  5 | 
  6 | import os
  7 | import time
  8 | import traceback
  9 | import json
 10 | 
 11 | import numpy as np
 12 | import decord
 13 | import cv2
 14 | from tqdm import tqdm
 15 | 
 16 | from utils.download import get_video_with_meta
 17 | 
 18 | DECORD_ACCEPTABLE_TYPES = ['mp4']
 19 | FORCE_USE_CV2 = True
 20 | 
 21 | IDX_WIDTH = 9
 22 | # set [IDX_WIDTH] to [None] if you want to use the default format, i.e. zero padding to the maximal index of a video
 23 | INFO_INTERVAL = 1000
 24 | DEFAULT_FPS = 10
 25 | 
 26 | 
 27 | def extract_frames(video_info):
 28 |     video_path = video_info.get("video_path", None)
 29 |     output_dir = video_info.get("output_dir", None)
 30 |     fps = video_info.get("freq", DEFAULT_FPS)
 31 |     discard_begin = video_info.get("start_discard", 90)
 32 |     discard_end = video_info.get("end_discard", 60)
 33 |     exception_file = video_info.get("exception_file", None)
 34 |     finish_log = video_info.get("finish_log", None)
 35 | 
 36 |     if video_path is None or output_dir is None:
 37 |         print("skipping invalid video info...")
 38 |         return
 39 | 
 40 |     try:
 41 |         if (FORCE_USE_CV2) or (video_path.split('.')[-1] not in DECORD_ACCEPTABLE_TYPES):
 42 |             print("[opencv] extracting frames from video [{}]...".format(video_path))
 43 |             cv2_extract_frames(video_path, output_dir, fps, discard_begin, discard_end, exception_file)
 44 |         else:
 45 |             print("[decord] extracting frames from video [{}]...".format(video_path))
 46 |             decord_extract_frames(video_path, output_dir, fps, discard_begin, discard_end, exception_file)
 47 | 
 48 |         if finish_log is not None:
 49 |             with open(finish_log, "a") as f:
 50 |                 f.write(video_info.get("video_id", video_path.split("/")[-1]))
 51 |                 f.write("\n")
 52 |         
 53 |     except Exception as e:
 54 |         exceptions = dict()
 55 |         exceptions["video_path"] = video_path
 56 |         exceptions["problem"] = str(e)
 57 |         exceptions["action"] = "skipped"
 58 |         exceptions["details"] = traceback.format_exc()
 59 |         json.dump(exceptions, open(exception_file, "a"), indent=4)
 60 |         with open(exception_file, "a") as f:
 61 |             f.write(",\n")
 62 | 
 63 |         traceback.print_exc()
 64 | 
 65 | 
 66 | def count_done_frames(save_path):
 67 |     return len(os.listdir(save_path))
 68 | 
 69 | def special_video_setting_log(video_path, exception_file, height=None, width=None, video_reader=None):
 70 |     skipped = False
 71 | 
 72 |     exception = None
 73 |     if video_reader is None:
 74 |         exception = {
 75 |             "video_path": video_path,
 76 |             "problem": "video not found or corrupted",
 77 |             "action": "skipped",
 78 |             "details": "video not found or corrupted"
 79 |         }
 80 |         return True
 81 |     
 82 |     if (height is None) or (width is None):
 83 |         height = video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT)
 84 |         width = video_reader.get(cv2.CAP_PROP_FRAME_WIDTH)
 85 | 
 86 |     if (width < 1280) and (height < 720):
 87 |         exception = {
 88 |             "video_path": video_path,
 89 |             "problem": "< 720p",
 90 |             "action": "skipped",
 91 |             "details": "{} x {}".format(width, height)
 92 |         }
 93 |         skipped = True
 94 | 
 95 |     elif (width / height != 16 / 9):
 96 |         exception = {
 97 |             "video_path": video_path,
 98 |             "problem": "not 16:9",
 99 |             "action": "as normal",
100 |             "details": "{} x {}".format(width, height)
101 |         }
102 | 
103 |     if exception is not None:
104 |         json.dump(exception, open(exception_file, "a"), indent=4)
105 |         with open(exception_file, "a") as f:
106 |             f.write(",\n")
107 |     
108 |     return skipped
109 | 
110 | 
111 | def decord_extract_frames(video_path, save_path, fps=10, discard_begin=90, discard_end=60, msg_file=None):
112 |     start_index = 0
113 |     if not os.path.exists(save_path):
114 |         os.makedirs(save_path)
115 |     else:
116 |         start_index = count_done_frames(save_path) -1
117 |         # so that we could rewrite the last frame, in case the last frame is corrupted
118 | 
119 |     video = decord.VideoReader(video_path, ctx=decord.cpu(), num_threads=1)
120 |     video_fps = video.get_avg_fps()
121 |     num_frames = int( fps * (len(video) // video_fps - discard_begin - discard_end) )
122 |     idx_width = len(str(num_frames)) if IDX_WIDTH is None else IDX_WIDTH
123 |     interval = video_fps / fps
124 | 
125 |     img = video[0].asnumpy()
126 |     frame_height, frame_width, _ = img.shape
127 |     if special_video_setting_log(video_path,  msg_file, frame_height, frame_width):
128 |         return
129 |     del img
130 |     first_log = True
131 | 
132 |     indices = np.array([ int(discard_begin * video_fps) + int(np.round(i * interval)) for i in range(num_frames)])
133 |     start_time = time.perf_counter()
134 |     ids = list(range(num_frames))
135 |     for id in ids[start_index:]:
136 |         frame = video[indices[id]].asnumpy()
137 |         file_path = os.path.join(save_path, str(id).zfill(idx_width) + ".jpg")
138 |         cv2.imwrite(file_path, cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
139 | 
140 |         if (first_log) or ((id+1) % INFO_INTERVAL == 0):
141 |             first_log = False
142 |             elapsed_time = time.perf_counter() - start_time
143 |             eta = elapsed_time / (id+1) * (len(indices) - id - 1)
144 |             elapsed_time = time.strftime("%H:%M:%S", time.gmtime(elapsed_time))
145 |             eta = time.strftime("%H:%M:%S", time.gmtime(eta))
146 |             progress_bar = "\u2588" * int((id+1) / len(indices) * 20) + " " * (20 - int((id+1) / len(indices) * 20))
147 |             print("{} {}/{} Elapsed: {}\t ETA: {}".format(progress_bar, id+1, len(indices), elapsed_time, eta))
148 | 
149 | 
150 | def cv2_extract_frames(video_path, save_path, fps=10, discard_begin=90, discard_end=60, msg_file=None):
151 |     start_index = 0
152 |     if not os.path.exists(save_path):
153 |         os.makedirs(save_path)
154 |     else:
155 |         start_index = count_done_frames(save_path) -1
156 |         # so that we could rewrite the last frame, in case the last frame is corrupted
157 | 
158 |     video, video_fps, total_frames = get_video_with_meta(video_path, need_metas=["fps", "num_frames"])
159 |     if video is not None:
160 |         num_frames = int( fps * (total_frames // video_fps - discard_begin - discard_end) )
161 |         idx_width = len(str(num_frames)) if IDX_WIDTH is None else IDX_WIDTH
162 |         interval = video_fps / fps
163 | 
164 |     if special_video_setting_log(video_path, msg_file, video_reader=video):
165 |         return
166 |     first_log, first_frame = True, True
167 |     
168 |     indices = np.array([ int(discard_begin * video_fps) + int(np.round(i * interval)) for i in range(num_frames)])
169 |     start_time = time.perf_counter()
170 |     ids = list(range(num_frames))
171 |     for id in ids[start_index:]:
172 |         if first_frame:
173 |             video.set(cv2.CAP_PROP_POS_FRAMES, indices[id])
174 |             video.grab()
175 |             first_frame = False
176 |         else:
177 |             for _ in range(indices[id] - indices[id-1]):
178 |                 video.grab()
179 | 
180 |         _, frame = video.retrieve()
181 |         file_path = os.path.join(save_path, str(id).zfill(idx_width) + ".jpg")
182 |         cv2.imwrite(file_path, frame)
183 | 
184 |         if (first_log) or ((id+1) % INFO_INTERVAL == 0):
185 |             first_log = False
186 |             elapsed_time = time.perf_counter() - start_time
187 |             eta = elapsed_time / (id+1) * (len(indices) - id - 1)
188 |             elapsed_time = time.strftime("%H:%M:%S", time.gmtime(elapsed_time))
189 |             eta = time.strftime("%H:%M:%S", time.gmtime(eta))
190 |             progress_bar = "\u2588" * int((id+1) / len(indices) * 20) + " " * (20 - int((id+1) / len(indices) * 20))
191 |             print("{} {}/{} Elapsed: {}\t ETA: {}".format(progress_bar, id+1, len(indices), elapsed_time, eta))


--------------------------------------------------------------------------------