├── .github
└── FUNDING.yml
├── .gitignore
├── Drivedata.md
├── LICENSE
├── README.md
├── assets
├── Affiliation.png
├── Drivedata_overview.jpg
├── Drivedata_timeline.jpg
├── cvpr24_genad_poster.png
├── opendv_examples.png
├── overview.png
└── vista-teaser.gif
└── opendv
├── .gitignore
├── README.md
├── configs
├── download.json
└── video2img.json
├── requirements.txt
├── scripts
├── meta_preprocess.py
├── video2img.py
└── youtube_download.py
└── utils
├── cmd2caption.py
├── download.py
├── easydict.py
└── frame_extraction.py
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: [OpenDriveLab] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
--------------------------------------------------------------------------------
/Drivedata.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Open-sourced Data Ecosystem in Autonomous Driving: the Present and Future
4 |
5 | > **This repo is all you need for Open-sourced Data Ecosystem in Autonomous Driving.**
6 |
7 | We present comprehensive paper collections, leaderboards, and challenges.
8 |
9 |
11 |
12 | ## Table of Contents
13 |
14 | - [Open-sourced Data Ecosystem in Autonomous Driving: the Present and Future](#open-sourced-data-ecosystem-in-autonomous-driving-the-present-and-future)
15 | - [Table of Contents](#table-of-contents)
16 | - [Citation](#citation)
17 | - [Challenges and Leaderboards](#challenges-and-leaderboards)
18 | - [Dataset Collection](#dataset-collection)
19 | - [Perception](#perception)
20 | - [Mapping](#mapping)
21 | - [Prediction and Planning](#prediction-and-planning)
22 |
25 | - [License](#license)
26 |
27 | ## Citation
28 | If you find this project useful in your research, please consider to cite:
29 | ```BibTeX
30 | @misc{li2023opensourced,
31 | title={Open-sourced Data Ecosystem in Autonomous Driving: the Present and Future},
32 | author={Hongyang Li and Yang Li and Huijie Wang and Jia Zeng and Pinlong Cai and Huilin Xu and Dahua Lin and Junchi Yan and Feng Xu and Lu Xiong and Jingdong Wang and Futang Zhu and Kai Yan and Chunjing Xu and Tiancai Wang and Beipeng Mu and Shaoqing Ren and Zhihui Peng and Yu Qiao},
33 | year={2023},
34 | eprint={2312.03408},
35 | archivePrefix={arXiv},
36 | primaryClass={cs.CV}
37 | }
38 | ```
39 |
40 |
(back to top)
41 |
42 | ## Challenges and Leaderboards
43 |
44 |
45 |
46 |
47 |
48 | Title |
49 | Host |
50 | Year |
51 | Task |
52 | Entry |
53 |
54 |
55 |
56 | Autonomous Driving Challenge |
57 | OpenDriveLab |
58 | CVPR2023 |
59 | Perception / OpenLane Topology |
60 | 111 |
61 |
62 |
63 |
64 | Perception / Online HD Map Construction |
65 |
66 |
67 |
68 | Perception / 3D Occupancy Prediction |
69 |
70 |
71 |
72 | Prediction & Planning / nuPlan Planning |
73 |
74 |
75 |
76 | Waymo Open Dataset Challenges |
78 | Waymo |
79 | CVPR2023 |
80 | Perception / 2D Video Panoptic Segmentation |
81 | 35 |
82 |
83 |
84 |
85 | Perception / Pose Estimation |
86 |
87 |
88 |
89 | Prediction / Motion Prediction |
90 |
91 |
92 |
93 | Prediction / Sim Agents |
94 |
95 |
96 |
97 | CVPR2022 |
98 | Prediction / Motion Prediction |
99 | 128 |
100 |
101 |
102 |
103 | Prediction / Occupancy and Flow Prediction |
104 |
105 |
106 |
107 | Perception / 3D Semantic Segmentation |
108 |
109 |
110 |
111 | Perception / 3D Camera-only Detection |
112 |
113 |
114 |
115 | CVPR2021 |
116 | Prediction / Motion Prediction |
117 | 115 |
118 |
119 |
120 |
121 | Prediction / Interaction Prediction |
122 |
123 |
124 |
125 | Perception / Real-time 3D Detection |
126 |
127 |
128 |
129 | Perception / Real-time 2D Detection |
130 |
131 |
132 | Argoverse Challenges |
134 | Argoverse |
135 | CVPR2023 |
136 | Prediction / Multi-agent Forecasting |
137 | 81 |
138 |
139 |
140 |
141 | Perception & Prediction / Unified Sensorbased Detection, Tracking, and Forecasting |
142 |
143 |
144 |
145 | Perception / LiDAR Scene Flow |
146 |
147 |
148 |
149 | Prediction / 3D Occupancy Forecasting |
150 |
151 |
152 |
153 | CVPR2022 |
154 | Perception / 3D Object Detection |
155 | 81 |
156 |
157 |
158 |
159 | Prediction / Motion Forecasting |
160 |
161 |
162 |
163 | Perception / Stereo Depth Estimation |
164 |
165 |
166 |
167 | CVPR2021 |
168 | Perception / Stereo Depth Estimation |
169 | 368 |
170 |
171 |
172 |
173 | Prediction / Motion Forecasting |
174 |
175 |
176 |
177 | Perception / Streaming 2D Detection |
178 |
179 |
180 |
181 | CARLA Autonomous Driving Challenge |
182 | CARLA Team, Intel |
183 | 2023 |
184 | Planning / CARLA AD Challenge 2.0 |
185 | - |
186 |
187 |
188 |
189 | NeurIPS2022 |
190 | Planning / CARLA AD Challenge 1.0 |
191 | 19 |
192 |
193 |
194 |
195 | NeurIPS2021 |
196 | Planning / CARLA AD Challenge 1.0 |
197 | - |
198 |
199 |
200 |
201 | 粤港澳大湾区
203 | (黄埔)国际算法算例大赛 |
204 | 琶洲实验室 |
205 | 2023 |
206 | 感知 / 跨场景单目深度估计 |
207 | - |
208 |
209 |
210 |
211 | 感知 / 路侧毫米波雷达标定和目标跟踪 |
212 | - |
213 |
214 |
215 |
216 | 2022 |
217 | 感知 / 路侧三维感知算法 |
218 | - |
219 |
220 |
221 |
222 | 感知 / 街景图像店面招牌文字识别 |
223 | - |
224 |
225 |
226 |
227 | AI Driving Olympics |
228 | ETH Zurich, University of Montreal,Motional |
229 | NeurIP2021 |
230 | Perception / nuScenes Panoptic |
231 | 11 |
232 |
233 |
234 |
235 | ICRA2021 |
236 | Perception / nuScenes Detection |
237 | 456 |
238 |
239 |
240 |
241 | Perception / nuScenes Tracking |
242 |
243 |
244 |
245 | Prediction / nuScenes Prediction |
246 |
247 |
248 |
249 | Perception / nuScenes LiDAR Segmentation |
250 |
251 |
252 |
253 | 计图 (Jittor)人工智能算法挑战赛 |
254 | 国家自然科学基金委信息科学部 |
255 | 2021 |
256 | 感知 / 交通标志检测 |
257 | 37 |
258 |
259 |
260 |
261 | KITTI Vision Benchmark Suite |
262 | University of Tübingen |
263 | 2012 |
264 | Perception / Stereo, Flow, Scene Flow, Depth,
265 | Odometry, Object, Tracking, Road, Semantics |
266 | 5,610 |
267 |
268 |
269 |
270 |
(back to top)
271 |
272 | ## Dataset Collection
273 |
274 |
275 |
276 | ### Perception
277 |
278 |
305 |
306 |
307 |
308 |
309 |
310 | Dataset |
311 | Year
312 | | Diversity |
313 | Sensor |
314 | Annotation |
315 | Paper |
316 |
317 |
318 |
319 | Scenes |
320 | Hours |
321 | Region |
322 | Camera |
323 | Lidar |
324 | Other |
325 |
326 |
327 |
328 | KITTI |
329 | 2012 |
330 | 50 |
331 | 6 |
332 | EU |
333 | Font-view |
334 | ✗ |
335 | GPS & IMU |
336 | 2D BBox & 3D BBox |
337 | Link |
338 |
339 |
340 |
341 | Cityscapes | 2016 |
342 | - |
343 | - |
344 | EU |
345 | Font-view |
346 | ✗ |
347 | |
348 | 2D Seg |
349 | Link |
350 |
351 |
352 |
353 | Lost and Found | 2016 |
355 | 112 |
356 | - |
357 | - |
358 | Font-view |
359 | ✗ |
360 | |
361 | 2D Seg |
362 | Link |
363 |
364 |
365 |
366 | Mapillary |
368 | 2016 |
369 | - |
370 | - |
371 | Global |
372 | Street-view |
373 | ✗ |
374 | |
375 | 2D Seg |
376 | Link |
377 |
378 |
379 |
380 | DDD17 |
382 | 2017 |
383 | 36 |
384 | 12 |
385 | EU |
386 | Front-view |
387 | ✗ |
388 | GPS & CAN-bus & Event Camera |
389 | - |
390 | Link |
391 |
392 |
393 |
394 | Apolloscape |
396 | 2016 |
397 | 103 |
398 | 2.5 |
399 | AS |
400 | Front-view |
401 | ✗ |
402 | GPS & IMU |
403 | 3D BBox & 2D Seg |
404 | Link |
405 |
406 |
407 |
408 | BDD-X |
410 | 2018 |
411 | 6984 |
412 | 77 |
413 | NA |
414 | Front-view |
415 | ✗ |
416 | |
417 | Language |
418 | Link |
419 |
420 |
421 |
422 | HDD |
424 | 2018 |
425 | - |
426 | 104 |
427 | NA |
428 | Front-view |
429 | ✓ |
430 | GPS & IMU & CAN-bus |
431 | 2D BBox |
432 | Link |
433 |
434 |
435 |
436 | IDD |
438 | 2018 |
439 | 182 |
440 | - |
441 | AS |
442 | Front-view |
443 | ✗ |
444 | |
445 | 2D Seg |
446 | Link |
447 |
448 |
449 |
450 | SemanticKITTI |
452 | 2019 |
453 | 50 |
454 | 6 |
455 | EU |
456 | ✗ |
457 | ✓ |
458 | |
459 | 3D Seg |
460 | Link |
461 |
462 |
463 |
464 | Woodscape |
466 | 2019 |
467 | - |
468 | - |
469 | Global |
470 | 360° |
471 | ✓ |
472 | GPS & IMU & CAN-bus |
473 | 3D BBox & 2D Seg |
474 | Link |
475 |
476 |
477 |
478 | DrivingStereo |
480 | 2019 |
481 | 42 |
482 | - |
483 | AS |
484 | Front-view |
485 | ✓ |
486 | |
487 | - |
488 | Link |
489 |
490 |
491 |
492 | Brno-Urban |
494 | 2019 |
495 | 67 |
496 | 10 |
497 | EU |
498 | Front-view |
499 | ✓ |
500 | GPS & IMU & Infrared Camera |
501 | - |
502 | Link |
503 |
504 |
505 |
506 | A*3D |
508 | 2019 |
509 | - |
510 | 55 |
511 | AS |
512 | Front-view |
513 | ✓ |
514 | |
515 | 3D BBox |
516 | Link |
517 |
518 |
519 |
520 | Talk2Car |
522 | 2019 |
523 | 850 |
524 | 283.3 |
525 | NA |
526 | Front-view |
527 | ✓ |
528 | |
529 | Language & 3D BBox |
530 | Link |
531 |
532 |
533 |
534 | Talk2Nav |
536 | 2019 |
537 | 10714 |
538 | - |
539 | Sim |
540 | 360° |
541 | ✗ |
542 | |
543 | Language |
544 | Link |
545 |
546 |
547 |
548 | PIE |
550 | 2019 |
551 | - |
552 | 6 |
553 | NA |
554 | Front-view |
555 | ✗ |
556 | |
557 | 2D BBox |
558 | Link |
559 |
560 |
561 |
562 | UrbanLoco |
564 | 2019 |
565 | 13 |
566 | - |
567 | AS & NA |
568 | 360° |
569 | ✓ |
570 | IMU |
571 | - |
572 | Link |
573 |
574 |
575 |
576 | TITAN |
578 | 2019 |
579 | 700 |
580 | - |
581 | AS |
582 | Front-view |
583 | ✗ |
584 | |
585 | 2D BBox |
586 | Link |
587 |
588 |
589 |
590 | H3D |
592 | 2019 |
593 | 160 |
594 | 0.77 |
595 | NA |
596 | Front-view |
597 | ✓ |
598 | GPS & IMU |
599 | - |
600 | Link |
601 |
602 |
603 |
604 | A2D2 |
606 | 2020 |
607 | - |
608 | 5.6 |
609 | EU |
610 | 360° |
611 | ✓ |
612 | GPS & IMU & CAN-bus |
613 | 3D BBox & 2D Seg |
614 | Link |
615 |
616 |
617 |
618 | CARRADA |
620 | 2020 |
621 | 30 |
622 | 0.3 |
623 | NA |
624 | Front-view |
625 | ✗ |
626 | Radar |
627 | 3D BBox |
628 | Link |
629 |
630 |
631 |
632 | DAWN |
634 | 2019 |
635 | - |
636 | - |
637 | Global |
638 | Front-view |
639 | ✗ |
640 | |
641 | 2D BBox |
642 | Link |
643 |
644 |
645 |
646 | 4Seasons |
648 | 2019 |
649 | - |
650 | - |
651 | - |
652 | Front-view |
653 | ✗ |
654 | GPS & IMU |
655 | - |
656 | Link |
657 |
658 |
659 |
660 | UNDD |
662 | 2019 |
663 | - |
664 | - |
665 | - |
666 | Front-view |
667 | ✗ |
668 | |
669 | 2D Seg |
670 | Link |
672 |
673 |
674 |
675 | SemanticPOSS |
677 | 2020 |
678 | - |
679 | - |
680 | AS |
681 | ✗ |
682 | ✓ |
683 | GPS & IMU |
684 | 3D Seg |
685 | Link |
686 |
687 |
688 |
689 | Toronto-3D |
691 | 2020 |
692 | 4 |
693 | - |
694 | NA |
695 | ✗ |
696 | ✓ |
697 | |
698 | 3D Seg |
699 | Link |
700 |
701 |
702 |
703 | ROAD |
705 | 2021 |
706 | 22 |
707 | - |
708 | EU |
709 | Front-view |
710 | ✗ |
711 | |
712 | 2D BBox & Topology |
713 | Link |
714 |
715 |
716 |
717 | Reasonable Crowd |
719 | 2021 |
720 | - |
721 | - |
722 | Sim |
723 | Front-view |
724 | ✗ |
725 | |
726 | Language |
727 | Link |
728 |
729 |
730 |
731 | METEOR |
733 | 2021 |
734 | 1250 |
735 | 20.9 |
736 | AS |
737 | Front-view |
738 | ✗ |
739 | GPS |
740 | Language |
741 | Link |
742 |
743 |
744 |
745 | PandaSet |
747 | 2021 |
748 | 179 |
749 | - |
750 | NA |
751 | 360° |
752 | ✓ |
753 | GPS & IMU |
754 | 3D BBox |
755 | Link |
756 |
757 |
758 |
759 | MUAD |
761 | 2022 |
762 | - |
763 | - |
764 | Sim |
765 | 360° |
766 | ✓ |
767 | |
768 | 2D Seg& 2D BBox |
769 | Link |
770 |
771 |
772 |
773 | TAS-NIR |
775 | 2022 |
776 | - |
777 | - |
778 | - |
779 | Front-view |
780 | ✗ |
781 | Infrared Camera |
782 | 2D Seg |
783 | Link |
784 |
785 |
786 |
787 | LiDAR-CS |
789 | 2022 |
790 | 6 |
791 | - |
792 | Sim |
793 | ✗ |
794 | ✓ |
795 | |
796 | 3D BBox |
797 | Link |
798 |
799 |
800 |
801 | WildDash |
803 | 2022 |
804 | - |
805 | - |
806 | - |
807 | Front-view |
808 | ✗ |
809 | |
810 | 2D Seg |
811 | Link |
812 |
813 |
814 |
815 | OpenScene |
817 | 2023 |
818 | 1000 |
819 | 5.5 |
820 | AS & NA |
821 | 360° |
822 | ✗ |
823 | |
824 | 3D Occ |
825 | Link |
826 |
827 |
828 |
829 | ZOD |
831 | 2023 |
832 | 1473 |
833 | 8.2 |
834 | EU |
835 | 360° |
836 | ✓ |
837 | GPS & IMU & CAN-bus |
838 | 3D BBox & 2D Seg |
839 | Link |
840 |
841 |
842 |
843 | nuScenes |
845 | 2019 |
846 | 1000 |
847 | 5.5 |
848 | AS & NA |
849 | 360° |
850 | ✓ |
851 | GPS & CAN-bus & Radar & HDMap |
852 | 3D BBox & 3D Seg |
853 | Link |
854 |
855 |
856 |
857 | Argoverse V1 |
859 | 2019 |
860 | 324k |
861 | 320 |
862 | NA |
863 | 360° |
864 | ✓ |
865 | HDMap |
866 | 3D BBox & 3D Seg |
867 | Link |
868 |
869 |
870 |
871 | Waymo |
873 | 2019 |
874 | 1000 |
875 | 6.4 |
876 | NA |
877 | 360° |
878 | ✓ |
879 | |
880 | 2D BBox & 3D BBox |
881 | Link |
882 |
883 |
884 |
885 | KITTI-360 |
887 | 2020 |
888 | 366 |
889 | 2.5 |
890 | EU |
891 | 360° |
892 | ✓ |
893 | |
894 | 3D BBox & 3D Seg |
895 | Link |
896 |
897 |
898 |
899 | ONCE |
901 | 2021 |
902 | - |
903 | 144 |
904 | AS |
905 | 360° |
906 | ✓ |
907 | |
908 | 3D BBox |
909 | Link |
910 |
911 |
912 |
913 | nuPlan |
915 | 2021 |
916 | - |
917 | 120 |
918 | AS & NA |
919 | 360° |
920 | ✓ |
921 | |
922 | 3D BBox |
923 | Link |
924 |
925 |
926 |
927 | Argoverse V2 |
929 | 2022 |
930 | 1000 |
931 | 4 |
932 | NA |
933 | 360° |
934 | ✓ |
935 | HDMap |
936 | 3D BBox |
937 | Link |
938 |
939 |
940 |
941 | DriveLM |
943 | 2023 |
944 | 1000 |
945 | 5.5 |
946 | AS & NA |
947 | 360° |
948 | ✗ |
949 | |
950 | Language |
951 | Link |
952 |
953 |
954 |
955 |
956 |
957 |
958 |
(back to top)
959 |
960 |
961 |
962 | ### Mapping
963 |
964 |
965 |
966 |
967 |
968 | Dataset
969 | | Year
970 | | Diversity |
971 | Sensor |
972 | Annotation |
973 | Paper |
974 |
975 |
976 |
977 | Scenes |
978 | Frames |
979 | Camera |
980 | Lidar |
981 | Type |
982 | Space |
983 | Inst. |
984 | Track |
985 |
986 |
987 |
988 | Caltech Lanes |
989 | 2008 |
990 | 4 |
991 | 1224/1224 |
992 | |
993 | ✗ |
994 | |
995 | PV |
996 | ✓ |
997 | ✗ |
998 | Link |
999 |
1000 |
1001 |
1002 | VPG |
1003 | 2017 |
1004 | - |
1005 | 20K/20K |
1006 | |
1007 | ✗ |
1008 | |
1009 | PV |
1010 | ✗ |
1011 | - |
1012 | Link |
1013 |
1014 |
1015 |
1016 | TUsimple |
1017 | 2017 |
1018 | 6.4K |
1019 | 6.4K/128K |
1020 | |
1021 | ✗ |
1022 | |
1023 | PV |
1024 | ✓ |
1025 | ✗ |
1026 | Link |
1027 |
1028 |
1029 |
1030 | CULane |
1031 | 2018 |
1032 | - |
1033 | 133K/133K |
1034 | |
1035 | ✗ |
1036 | |
1037 | PV |
1038 | ✓ |
1039 | - |
1040 | Link |
1041 |
1042 |
1043 |
1044 | ApolloScape |
1045 | 2018 |
1046 | 235 |
1047 | 115K/115K |
1048 | |
1049 | ✓ |
1050 | |
1051 | PV |
1052 | ✗ |
1053 | ✗ |
1054 | Link |
1055 |
1056 |
1057 |
1058 | LLAMAS |
1059 | 2019 |
1060 | 14 |
1061 | 79K/100K |
1062 | Front-view Image |
1063 | ✗ |
1064 | Laneline |
1065 | PV |
1066 | ✓ |
1067 | ✗ |
1068 | Link |
1069 |
1070 |
1071 |
1072 | 3D Synthetic |
1073 | 2020 |
1074 | - |
1075 | 10K/10K |
1076 | |
1077 | ✗ |
1078 | |
1079 | PV |
1080 | ✓ |
1081 | - |
1082 | Link |
1083 |
1084 |
1085 |
1086 | CurveLanes |
1087 | 2020 |
1088 | - |
1089 | 150K/150K |
1090 | |
1091 | ✗ |
1092 | |
1093 | PV |
1094 | ✓ |
1095 | - |
1096 | Link |
1097 |
1098 |
1099 |
1100 | VIL-100 |
1101 | 2021 |
1102 | 100 |
1103 | 10K/10K |
1104 | |
1105 | ✗ |
1106 | |
1107 | PV |
1108 | ✓ |
1109 | ✗ |
1110 | Link |
1111 |
1112 |
1113 |
1114 | OpenLane-V1 |
1115 | 2022 |
1116 | 1K |
1117 | 200K/200K |
1118 | |
1119 | ✗ |
1120 | |
1121 | 3D |
1122 | ✓ |
1123 | ✓ |
1124 | Link |
1125 |
1126 |
1127 |
1128 | ONCE-3DLane |
1129 | 2022 |
1130 | - |
1131 | 211K/211K |
1132 | |
1133 | ✗ |
1134 | |
1135 | 3D |
1136 | ✓ |
1137 | - |
1138 | Link |
1139 |
1140 |
1141 |
1142 | OpenLane-V2 |
1143 | 2023 |
1144 | 2K |
1145 | 72K/72K |
1146 | Multi-view Image |
1147 | ✗ |
1148 | Lane Centerline, Lane Segment |
1149 | 3D |
1150 | ✓ |
1151 | ✓ |
1152 | Link |
1153 |
1154 |
1155 |
1156 |
1157 |
1158 |
1159 |
1160 |
1161 |
(back to top)
1162 |
1163 | ### Prediction and Planning
1164 |
1165 |
1166 |
1167 |
1168 |
1169 | Subtask |
1170 | Input |
1171 | Output |
1172 | Evaluation |
1173 | Dataset |
1174 |
1175 |
1176 |
1177 | Motion Prediction |
1178 | Surrounding Traffic States |
1179 | Spatiotemporal Trajectories of Single/Multiple Vehicle(s) |
1180 | Displacement Error |
1181 | Argoverse |
1183 |
1184 |
1185 |
1186 | nuScenes |
1188 |
1189 |
1190 |
1191 | Waymo |
1193 |
1194 |
1195 |
1196 | Interaction |
1198 |
1199 |
1200 |
1201 | MONA |
1203 |
1204 |
1205 | Trajectory Planning |
1206 | Motion States for Ego Vehicles, Scenario Cognition and Prediction |
1207 | Trajectories for Ego Vehicles |
1208 | Displacement Error, Safety, Compliance, Comfort |
1209 | nuPlan |
1211 |
1212 |
1213 |
1214 | CARLA |
1216 |
1217 |
1218 |
1219 | MetaDrive |
1220 |
1221 |
1222 |
1223 | Apollo |
1224 |
1225 |
1226 |
1227 | Path Planning |
1228 | Maps for Road Network |
1229 | Routes Connecting to Nodes and Links |
1230 | Efficiency, Energy Conservation |
1231 | OpenStreetMap |
1233 |
1234 |
1235 |
1236 | Transportation Networks |
1238 |
1239 |
1240 |
1241 | DTAlite |
1243 |
1244 |
1245 |
1246 | PeMS |
1248 |
1249 |
1250 |
1251 | New York City Taxi Data |
1253 |
1254 |
1255 |
1256 |
1257 |
(back to top)
1258 |
1259 |
1260 |
1433 |
1434 |
1435 |
1436 | ## License
1437 | Open-sourced Data Ecosystem in Autonomous Driving is released under the [MIT license](./LICENSE).
1438 |
1439 |
1440 |
(back to top)
1441 |
1442 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DriveAGI
2 | This is **"The One"** project that [**`OpenDriveLab`**](https://opendrivelab.com/) is committed to contribute to the community, providing some thought and general picture of how to embrace `foundation models` into autonomous driving.
3 |
4 | ## Table of Contents
5 | - [NEWS](#news)
6 | - [At A Glance](#at-a-glance)
7 | - 🚀 [Vista](#vista) (NeurIPS 2024)
8 | - ⭐ [GenAD: OpenDV Dataset](#opendv) (CVPR 2024 Hightlight)
9 | - ⭐ [DriveLM](#drivelm) (ECCV 2024 Oral)
10 | - [DriveData Survey](#drivedata-survey)
11 |
13 | - [OpenScene](#openscene)
14 | - [OpenLane-V2 Update](#openlane-v2-update)
15 |
16 |
17 |
18 | ## NEWS
19 |
20 |
21 | **[ NEW❗️] `2024/09/08`** We released a mini version of `OpenDV-YouTube`, containing **25 hours** of driving videos. Feel free to try the mini subset by following instructions at [OpenDV-mini](https://github.com/OpenDriveLab/DriveAGI/blob/main/opendv/README.md)!
22 |
23 | **`2024/05/28`** We released our latest research, [Vista](#vista), a generalizable driving world model. It's capable of predicting high-fidelity and long-horizon futures, executing multi-modal actions, and serving as a generalizable reward function to assess driving behaviors.
24 |
25 |
26 | **`2024/03/24`** `OpenDV-YouTube Update:` **Full suite of toolkits for OpenDV-YouTube** is now available, including data downloading and processing scripts, as well as language annotations. Please refer to [OpenDV-YouTube](https://github.com/OpenDriveLab/DriveAGI/tree/main/opendv).
27 |
28 | **`2024/03/15`** We released the complete video list of `OpenDV-YouTube`, a large-scale driving video dataset, for [GenAD](https://arxiv.org/abs/2403.09630) project. Data downloading and processing script, as well as language annotations, will be released next week. Stay tuned.
29 |
30 | **`2024/01/24`**
31 | We are excited to announce some update to [our survey](#drivedata-survey) and would like to thank John Lambert, Klemens Esterle from the public community for their advice to improve the manuscript.
32 |
33 |
34 |
35 | ## At A Glance
36 |
37 |
38 | Here are some key components to construct a large foundation model curated for an autonomous system.
39 |
40 | 
41 |
42 |
43 | Below we would like to share the latest update from our team on the **`DriveData`** side. We will release the detail of the **`DriveEngine`** and the **`DriveAGI`** in the future.
44 |
45 |
46 | ## Vista
47 |
48 |
49 |
50 |
51 |
52 |
53 | > Simulated futures in a wide range of driving scenarios by [Vista](https://arxiv.org/abs/2405.17398). Best viewed on [demo page](https://vista-demo.github.io/).
54 |
55 | ### [🌏 **A Generalizable Driving World Model with High Fidelity and Versatile Controllability**](https://arxiv.org/abs/2405.17398) (NeurIPS 2024)
56 |
57 | **Quick facts:**
58 | - Introducing the world's first **generalizable driving world model**.
59 | - Task: High-fidelity, action-conditioned, and long-horizon future prediction for driving scenes in the wild.
60 | - Dataset: [`OpenDV-YouTube`](https://github.com/OpenDriveLab/DriveAGI/tree/main/opendv), `nuScenes`
61 | - Code and model: https://github.com/OpenDriveLab/Vista
62 | - Video Demo: https://vista-demo.github.io
63 | - Related work: [Vista](https://arxiv.org/abs/2405.17398), [GenAD](https://arxiv.org/abs/2403.09630)
64 |
65 | ```bibtex
66 | @inproceedings{gao2024vista,
67 | title={Vista: A Generalizable Driving World Model with High Fidelity and Versatile Controllability},
68 | author={Shenyuan Gao and Jiazhi Yang and Li Chen and Kashyap Chitta and Yihang Qiu and Andreas Geiger and Jun Zhang and Hongyang Li},
69 | booktitle={Advances in Neural Information Processing Systems (NeurIPS)},
70 | year={2024}
71 | }
72 |
73 | @inproceedings{yang2024genad,
74 | title={{Generalized Predictive Model for Autonomous Driving}},
75 | author={Jiazhi Yang and Shenyuan Gao and Yihang Qiu and Li Chen and Tianyu Li and Bo Dai and Kashyap Chitta and Penghao Wu and Jia Zeng and Ping Luo and Jun Zhang and Andreas Geiger and Yu Qiao and Hongyang Li},
76 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
77 | year={2024}
78 | }
79 | ```
80 |
81 | ## GenAD: OpenDV Dataset
82 | 
83 | > Examples of **real-world** driving scenarios in the OpenDV dataset, including urban, highway, rural scenes, etc.
84 |
85 | ### [⭐ **Generalized Predictive Model for Autonomous Driving**](https://arxiv.org/abs/2403.09630) (**CVPR 2024, Highlight**)
86 |
87 | ### [Paper](https://arxiv.org/abs/2403.09630) | [Video](https://www.youtube.com/watch?v=a4H6Jj-7IC0) | [Poster](assets/cvpr24_genad_poster.png) | [Slides](https://opendrivelab.github.io/content/GenAD_slides_with_vista.pdf)
88 |
89 | 🎦 The **Largest Driving Video dataset** to date, containing more than **1700 hours** of real-world driving videos and being 300 times larger than the widely used nuScenes dataset.
90 |
91 |
92 | - **Complete video list** (under YouTube license): [OpenDV Videos](https://docs.google.com/spreadsheets/d/1bHWWP_VXeEe5UzIG-QgKFBdH7mNlSC4GFSJkEhFnt2I).
93 | - The downloaded raw videos (`mostly 1080P`) consume about `3 TB` storage space. However, these hour-long videos cannot be directly applied for model training as they are extremely memory consuming.
94 | - Therefore, we preprocess them into conseductive images which are more flexible and efficient to load during training. Processed images consumes about `24 TB` storage space in total.
95 | - It's recommended to set up your experiments on a small subset, say **1/20** of the whole dataset. An official mini subset is also provided and you can refer to [**OpenDV-mini**](https://github.com/OpenDriveLab/DriveAGI/tree/main/opendv#about-opendv-youtube-and-opendv-mini) for details. After stablizing the training, you can then apply your method on the whole dataset and hope for the best 🤞.
96 | -
**[ New❗️]** **Mini subset**: [OpenDV-mini](https://github.com/OpenDriveLab/DriveAGI/tree/main/opendv).
97 | - A mini version of `OpenDV-YouTube`. The raw videos consume about `44 GB` of storage space and the processed images will consume about `390 GB` of storage space.
98 | - **Step-by-step instruction for data preparation**: [OpenDV-YouTube](https://github.com/OpenDriveLab/DriveAGI/tree/main/opendv/README.md).
99 | - **Language annotation for OpenDV-YouTube**: [OpenDV-YouTube-Language](https://huggingface.co/datasets/OpenDriveLab/OpenDV-YouTube-Language).
100 |
101 |
102 | **Quick facts:**
103 | - Task: large-scale video prediction for driving scenes.
104 | - Data source: `YouTube`, with careful collection and filtering process.
105 | - Diversity Highlights: 1700 hours of driving videos, covering more than 244 cities in 40 countries.
106 | - Related work: [GenAD](https://arxiv.org/abs/2403.09630) **`Accepted at CVPR 2024, Highlight`**
107 | - `Note`: Annotations for other public datasets in OpenDV-2K will not be released since we randomly sampled a subset of them in training, which are incomplete and hard to trace back to their origins (i.e., file name). Nevertheless, it's easy to reproduce the collection and annotation process on your own following [our paper]((https://arxiv.org/abs/2403.09630)).
108 |
109 | ```bibtex
110 | @inproceedings{yang2024genad,
111 | title={Generalized Predictive Model for Autonomous Driving},
112 | author={Jiazhi Yang and Shenyuan Gao and Yihang Qiu and Li Chen and Tianyu Li and Bo Dai and Kashyap Chitta and Penghao Wu and Jia Zeng and Ping Luo and Jun Zhang and Andreas Geiger and Yu Qiao and Hongyang Li},
113 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
114 | year={2024}
115 | }
116 | ```
117 |
118 | ## DriveLM
119 | Introducing the First benchmark on **Language Prompt for Driving**.
120 |
121 | **Quick facts:**
122 | - Task: given the language prompts as input, predict the trajectory in the scene
123 | - Origin dataset: `nuScenes`, `CARLA (To be released)`
124 | - Repo: https://github.com/OpenDriveLab/DriveLM, https://github.com/OpenDriveLab/ELM
125 | - Related work: [DriveLM](https://arxiv.org/abs/2312.14150), [ELM](https://arxiv.org/abs/2403.04593)
126 | - Related challenge: [Driving with Language AGC Challenge 2024](https://opendrivelab.com/challenge2024/#driving_with_language)
127 |
128 |
129 | ## DriveData Survey
130 |
131 |
132 | ### Abstract
133 | With the continuous maturation and application of autonomous driving technology, a systematic examination of open-source autonomous driving datasets becomes instrumental in fostering the robust evolution of the industry ecosystem. In this survey, we provide a comprehensive analysis of more than 70 papers on the timeline, impact, challenges, and future trends in autonomous driving dataset.
134 |
135 | > **Open-sourced Data Ecosystem in Autonomous Driving: the Present and Future**
136 | > - [English Version](https://arxiv.org/abs/2312.03408)
137 | > - [Chinese Version](https://www.sciengine.com/SSI/doi/10.1360/SSI-2023-0313) **`Accepted at SCIENTIA SINICA Informationis (中文版)`**
138 |
139 | ```bib
140 | @article{li2024_driving_dataset_survey,
141 | title = {Open-sourced Data Ecosystem in Autonomous Driving: the Present and Future},
142 | author = {Hongyang Li and Yang Li and Huijie Wang and Jia Zeng and Huilin Xu and Pinlong Cai and Li Chen and Junchi Yan and Feng Xu and Lu Xiong and Jingdong Wang and Futang Zhu and Chunjing Xu and Tiancai Wang and Fei Xia and Beipeng Mu and Zhihui Peng and Dahua Lin and Yu Qiao},
143 | journal = {SCIENTIA SINICA Informationis},
144 | year = {2024},
145 | doi = {10.1360/SSI-2023-0313}
146 | }
147 | ```
148 |
149 |
153 |
154 | 
155 | >Current autonomous driving datasets can broadly be categorized into two generations since the 2010s. We define the Impact (y-axis) of a dataset based on sensor configuration, input modality, task category, data scale, ecosystem, etc.
156 |
157 | 
158 |
159 | ### Related Work Collection
160 |
161 | We present comprehensive paper collections, leaderboards, and challenges.(Click to expand)
162 |
163 |
164 | Challenges and Leaderboards
165 |
166 |
167 |
168 |
169 |
170 | Title |
171 | Host |
172 | Year |
173 | Task |
174 | Entry |
175 |
176 |
177 |
178 | Autonomous Driving Challenge |
179 | OpenDriveLab |
180 | CVPR2023 |
181 | Perception / OpenLane Topology |
182 | 111 |
183 |
184 |
185 |
186 | Perception / Online HD Map Construction |
187 |
188 |
189 |
190 | Perception / 3D Occupancy Prediction |
191 |
192 |
193 |
194 | Prediction & Planning / nuPlan Planning |
195 |
196 |
197 |
198 | Waymo Open Dataset Challenges |
200 | Waymo |
201 | CVPR2023 |
202 | Perception / 2D Video Panoptic Segmentation |
203 | 35 |
204 |
205 |
206 |
207 | Perception / Pose Estimation |
208 |
209 |
210 |
211 | Prediction / Motion Prediction |
212 |
213 |
214 |
215 | Prediction / Sim Agents |
216 |
217 |
218 |
219 | CVPR2022 |
220 | Prediction / Motion Prediction |
221 | 128 |
222 |
223 |
224 |
225 | Prediction / Occupancy and Flow Prediction |
226 |
227 |
228 |
229 | Perception / 3D Semantic Segmentation |
230 |
231 |
232 |
233 | Perception / 3D Camera-only Detection |
234 |
235 |
236 |
237 | CVPR2021 |
238 | Prediction / Motion Prediction |
239 | 115 |
240 |
241 |
242 |
243 | Prediction / Interaction Prediction |
244 |
245 |
246 |
247 | Perception / Real-time 3D Detection |
248 |
249 |
250 |
251 | Perception / Real-time 2D Detection |
252 |
253 |
254 | Argoverse Challenges |
256 | Argoverse |
257 | CVPR2023 |
258 | Prediction / Multi-agent Forecasting |
259 | 81 |
260 |
261 |
262 |
263 | Perception & Prediction / Unified Sensorbased Detection, Tracking, and Forecasting |
264 |
265 |
266 |
267 | Perception / LiDAR Scene Flow |
268 |
269 |
270 |
271 | Prediction / 3D Occupancy Forecasting |
272 |
273 |
274 |
275 | CVPR2022 |
276 | Perception / 3D Object Detection |
277 | 81 |
278 |
279 |
280 |
281 | Prediction / Motion Forecasting |
282 |
283 |
284 |
285 | Perception / Stereo Depth Estimation |
286 |
287 |
288 |
289 | CVPR2021 |
290 | Perception / Stereo Depth Estimation |
291 | 368 |
292 |
293 |
294 |
295 | Prediction / Motion Forecasting |
296 |
297 |
298 |
299 | Perception / Streaming 2D Detection |
300 |
301 |
302 |
303 | CARLA Autonomous Driving Challenge |
304 | CARLA Team, Intel |
305 | 2023 |
306 | Planning / CARLA AD Challenge 2.0 |
307 | - |
308 |
309 |
310 |
311 | NeurIPS2022 |
312 | Planning / CARLA AD Challenge 1.0 |
313 | 19 |
314 |
315 |
316 |
317 | NeurIPS2021 |
318 | Planning / CARLA AD Challenge 1.0 |
319 | - |
320 |
321 |
322 |
323 | 粤港澳大湾区
325 | (黄埔)国际算法算例大赛 |
326 | 琶洲实验室 |
327 | 2023 |
328 | 感知 / 跨场景单目深度估计 |
329 | - |
330 |
331 |
332 |
333 | 感知 / 路侧毫米波雷达标定和目标跟踪 |
334 | - |
335 |
336 |
337 |
338 | 2022 |
339 | 感知 / 路侧三维感知算法 |
340 | - |
341 |
342 |
343 |
344 | 感知 / 街景图像店面招牌文字识别 |
345 | - |
346 |
347 |
348 |
349 | AI Driving Olympics |
350 | ETH Zurich, University of Montreal,Motional |
351 | NeurIP2021 |
352 | Perception / nuScenes Panoptic |
353 | 11 |
354 |
355 |
356 |
357 | ICRA2021 |
358 | Perception / nuScenes Detection |
359 | 456 |
360 |
361 |
362 |
363 | Perception / nuScenes Tracking |
364 |
365 |
366 |
367 | Prediction / nuScenes Prediction |
368 |
369 |
370 |
371 | Perception / nuScenes LiDAR Segmentation |
372 |
373 |
374 |
375 | 计图 (Jittor)人工智能算法挑战赛 |
376 | 国家自然科学基金委信息科学部 |
377 | 2021 |
378 | 感知 / 交通标志检测 |
379 | 37 |
380 |
381 |
382 |
383 | KITTI Vision Benchmark Suite |
384 | University of Tübingen |
385 | 2012 |
386 | Perception / Stereo, Flow, Scene Flow, Depth,
387 | Odometry, Object, Tracking, Road, Semantics |
388 | 5,610 |
389 |
390 |
391 |
392 | (back to top)
393 |
394 |
395 |
396 |
397 | Perception Datasets
398 |
399 |
400 |
401 |
402 |
403 | Dataset |
404 | Year
405 | | Diversity |
406 | Sensor |
407 | Annotation |
408 | Paper |
409 |
410 |
411 |
412 | Scenes |
413 | Hours |
414 | Region |
415 | Camera |
416 | Lidar |
417 | Other |
418 |
419 |
420 |
421 | KITTI |
422 | 2012 |
423 | 50 |
424 | 6 |
425 | EU |
426 | Font-view |
427 | ✗ |
428 | GPS & IMU |
429 | 2D BBox & 3D BBox |
430 | Link |
431 |
432 |
433 |
434 | Cityscapes | 2016 |
435 | - |
436 | - |
437 | EU |
438 | Font-view |
439 | ✗ |
440 | |
441 | 2D Seg |
442 | Link |
443 |
444 |
445 |
446 | Lost and Found | 2016 |
448 | 112 |
449 | - |
450 | - |
451 | Font-view |
452 | ✗ |
453 | |
454 | 2D Seg |
455 | Link |
456 |
457 |
458 |
459 | Mapillary |
461 | 2016 |
462 | - |
463 | - |
464 | Global |
465 | Street-view |
466 | ✗ |
467 | |
468 | 2D Seg |
469 | Link |
470 |
471 |
472 |
473 | DDD17 |
475 | 2017 |
476 | 36 |
477 | 12 |
478 | EU |
479 | Front-view |
480 | ✗ |
481 | GPS & CAN-bus & Event Camera |
482 | - |
483 | Link |
484 |
485 |
486 |
487 | Apolloscape |
489 | 2016 |
490 | 103 |
491 | 2.5 |
492 | AS |
493 | Front-view |
494 | ✗ |
495 | GPS & IMU |
496 | 3D BBox & 2D Seg |
497 | Link |
498 |
499 |
500 |
501 | BDD-X |
503 | 2018 |
504 | 6984 |
505 | 77 |
506 | NA |
507 | Front-view |
508 | ✗ |
509 | |
510 | Language |
511 | Link |
512 |
513 |
514 |
515 | HDD |
517 | 2018 |
518 | - |
519 | 104 |
520 | NA |
521 | Front-view |
522 | ✓ |
523 | GPS & IMU & CAN-bus |
524 | 2D BBox |
525 | Link |
526 |
527 |
528 |
529 | IDD |
531 | 2018 |
532 | 182 |
533 | - |
534 | AS |
535 | Front-view |
536 | ✗ |
537 | |
538 | 2D Seg |
539 | Link |
540 |
541 |
542 |
543 | SemanticKITTI |
545 | 2019 |
546 | 50 |
547 | 6 |
548 | EU |
549 | ✗ |
550 | ✓ |
551 | |
552 | 3D Seg |
553 | Link |
554 |
555 |
556 |
557 | Woodscape |
559 | 2019 |
560 | - |
561 | - |
562 | Global |
563 | 360° |
564 | ✓ |
565 | GPS & IMU & CAN-bus |
566 | 3D BBox & 2D Seg |
567 | Link |
568 |
569 |
570 |
571 | DrivingStereo |
573 | 2019 |
574 | 42 |
575 | - |
576 | AS |
577 | Front-view |
578 | ✓ |
579 | |
580 | - |
581 | Link |
582 |
583 |
584 |
585 | Brno-Urban |
587 | 2019 |
588 | 67 |
589 | 10 |
590 | EU |
591 | Front-view |
592 | ✓ |
593 | GPS & IMU & Infrared Camera |
594 | - |
595 | Link |
596 |
597 |
598 |
599 | A*3D |
601 | 2019 |
602 | - |
603 | 55 |
604 | AS |
605 | Front-view |
606 | ✓ |
607 | |
608 | 3D BBox |
609 | Link |
610 |
611 |
612 |
613 | Talk2Car |
615 | 2019 |
616 | 850 |
617 | 283.3 |
618 | NA |
619 | Front-view |
620 | ✓ |
621 | |
622 | Language & 3D BBox |
623 | Link |
624 |
625 |
626 |
627 | Talk2Nav |
629 | 2019 |
630 | 10714 |
631 | - |
632 | Sim |
633 | 360° |
634 | ✗ |
635 | |
636 | Language |
637 | Link |
638 |
639 |
640 |
641 | PIE |
643 | 2019 |
644 | - |
645 | 6 |
646 | NA |
647 | Front-view |
648 | ✗ |
649 | |
650 | 2D BBox |
651 | Link |
652 |
653 |
654 |
655 | UrbanLoco |
657 | 2019 |
658 | 13 |
659 | - |
660 | AS & NA |
661 | 360° |
662 | ✓ |
663 | IMU |
664 | - |
665 | Link |
666 |
667 |
668 |
669 | TITAN |
671 | 2019 |
672 | 700 |
673 | - |
674 | AS |
675 | Front-view |
676 | ✗ |
677 | |
678 | 2D BBox |
679 | Link |
680 |
681 |
682 |
683 | H3D |
685 | 2019 |
686 | 160 |
687 | 0.77 |
688 | NA |
689 | Front-view |
690 | ✓ |
691 | GPS & IMU |
692 | - |
693 | Link |
694 |
695 |
696 |
697 | A2D2 |
699 | 2020 |
700 | - |
701 | 5.6 |
702 | EU |
703 | 360° |
704 | ✓ |
705 | GPS & IMU & CAN-bus |
706 | 3D BBox & 2D Seg |
707 | Link |
708 |
709 |
710 |
711 | CARRADA |
713 | 2020 |
714 | 30 |
715 | 0.3 |
716 | NA |
717 | Front-view |
718 | ✗ |
719 | Radar |
720 | 3D BBox |
721 | Link |
722 |
723 |
724 |
725 | DAWN |
727 | 2019 |
728 | - |
729 | - |
730 | Global |
731 | Front-view |
732 | ✗ |
733 | |
734 | 2D BBox |
735 | Link |
736 |
737 |
738 |
739 | 4Seasons |
741 | 2019 |
742 | - |
743 | - |
744 | - |
745 | Front-view |
746 | ✗ |
747 | GPS & IMU |
748 | - |
749 | Link |
750 |
751 |
752 |
753 | UNDD |
755 | 2019 |
756 | - |
757 | - |
758 | - |
759 | Front-view |
760 | ✗ |
761 | |
762 | 2D Seg |
763 | Link |
765 |
766 |
767 |
768 | SemanticPOSS |
770 | 2020 |
771 | - |
772 | - |
773 | AS |
774 | ✗ |
775 | ✓ |
776 | GPS & IMU |
777 | 3D Seg |
778 | Link |
779 |
780 |
781 |
782 | Toronto-3D |
784 | 2020 |
785 | 4 |
786 | - |
787 | NA |
788 | ✗ |
789 | ✓ |
790 | |
791 | 3D Seg |
792 | Link |
793 |
794 |
795 |
796 | ROAD |
798 | 2021 |
799 | 22 |
800 | - |
801 | EU |
802 | Front-view |
803 | ✗ |
804 | |
805 | 2D BBox & Topology |
806 | Link |
807 |
808 |
809 |
810 | Reasonable Crowd |
812 | 2021 |
813 | - |
814 | - |
815 | Sim |
816 | Front-view |
817 | ✗ |
818 | |
819 | Language |
820 | Link |
821 |
822 |
823 |
824 | METEOR |
826 | 2021 |
827 | 1250 |
828 | 20.9 |
829 | AS |
830 | Front-view |
831 | ✗ |
832 | GPS |
833 | Language |
834 | Link |
835 |
836 |
837 |
838 | PandaSet |
840 | 2021 |
841 | 179 |
842 | - |
843 | NA |
844 | 360° |
845 | ✓ |
846 | GPS & IMU |
847 | 3D BBox |
848 | Link |
849 |
850 |
851 |
852 | MUAD |
854 | 2022 |
855 | - |
856 | - |
857 | Sim |
858 | 360° |
859 | ✓ |
860 | |
861 | 2D Seg& 2D BBox |
862 | Link |
863 |
864 |
865 |
866 | TAS-NIR |
868 | 2022 |
869 | - |
870 | - |
871 | - |
872 | Front-view |
873 | ✗ |
874 | Infrared Camera |
875 | 2D Seg |
876 | Link |
877 |
878 |
879 |
880 | LiDAR-CS |
882 | 2022 |
883 | 6 |
884 | - |
885 | Sim |
886 | ✗ |
887 | ✓ |
888 | |
889 | 3D BBox |
890 | Link |
891 |
892 |
893 |
894 | WildDash |
896 | 2022 |
897 | - |
898 | - |
899 | - |
900 | Front-view |
901 | ✗ |
902 | |
903 | 2D Seg |
904 | Link |
905 |
906 |
907 |
908 | OpenScene |
910 | 2023 |
911 | 1000 |
912 | 5.5 |
913 | AS & NA |
914 | 360° |
915 | ✗ |
916 | |
917 | 3D Occ |
918 | Link |
919 |
920 |
921 |
922 | ZOD |
924 | 2023 |
925 | 1473 |
926 | 8.2 |
927 | EU |
928 | 360° |
929 | ✓ |
930 | GPS & IMU & CAN-bus |
931 | 3D BBox & 2D Seg |
932 | Link |
933 |
934 |
935 |
936 | nuScenes |
938 | 2019 |
939 | 1000 |
940 | 5.5 |
941 | AS & NA |
942 | 360° |
943 | ✓ |
944 | GPS & CAN-bus & Radar & HDMap |
945 | 3D BBox & 3D Seg |
946 | Link |
947 |
948 |
949 |
950 | Argoverse V1 |
952 | 2019 |
953 | 324k |
954 | 320 |
955 | NA |
956 | 360° |
957 | ✓ |
958 | HDMap |
959 | 3D BBox & 3D Seg |
960 | Link |
961 |
962 |
963 |
964 | Waymo |
966 | 2019 |
967 | 1000 |
968 | 6.4 |
969 | NA |
970 | 360° |
971 | ✓ |
972 | |
973 | 2D BBox & 3D BBox |
974 | Link |
975 |
976 |
977 |
978 | KITTI-360 |
980 | 2020 |
981 | 366 |
982 | 2.5 |
983 | EU |
984 | 360° |
985 | ✓ |
986 | |
987 | 3D BBox & 3D Seg |
988 | Link |
989 |
990 |
991 |
992 | ONCE |
994 | 2021 |
995 | - |
996 | 144 |
997 | AS |
998 | 360° |
999 | ✓ |
1000 | |
1001 | 3D BBox |
1002 | Link |
1003 |
1004 |
1005 |
1006 | nuPlan |
1008 | 2021 |
1009 | - |
1010 | 120 |
1011 | AS & NA |
1012 | 360° |
1013 | ✓ |
1014 | |
1015 | 3D BBox |
1016 | Link |
1017 |
1018 |
1019 |
1020 | Argoverse V2 |
1022 | 2022 |
1023 | 1000 |
1024 | 4 |
1025 | NA |
1026 | 360° |
1027 | ✓ |
1028 | HDMap |
1029 | 3D BBox |
1030 | Link |
1031 |
1032 |
1033 |
1034 | DriveLM |
1036 | 2023 |
1037 | 1000 |
1038 | 5.5 |
1039 | AS & NA |
1040 | 360° |
1041 | ✗ |
1042 | |
1043 | Language |
1044 | Link |
1045 |
1046 |
1047 |
1048 |
1049 |
1050 |
1051 | (back to top)
1052 |
1053 |
1054 |
1055 | Mapping Datasets
1056 |
1057 |
1058 |
1059 |
1060 |
1061 | Dataset
1062 | | Year
1063 | | Diversity |
1064 | Sensor |
1065 | Annotation |
1066 | Paper |
1067 |
1068 |
1069 |
1070 | Scenes |
1071 | Frames |
1072 | Camera |
1073 | Lidar |
1074 | Type |
1075 | Space |
1076 | Inst. |
1077 | Track |
1078 |
1079 |
1080 |
1081 | Caltech Lanes |
1082 | 2008 |
1083 | 4 |
1084 | 1224/1224 |
1085 | |
1086 | ✗ |
1087 | |
1088 | PV |
1089 | ✓ |
1090 | ✗ |
1091 | Link |
1092 |
1093 |
1094 |
1095 | VPG |
1096 | 2017 |
1097 | - |
1098 | 20K/20K |
1099 | |
1100 | ✗ |
1101 | |
1102 | PV |
1103 | ✗ |
1104 | - |
1105 | Link |
1106 |
1107 |
1108 |
1109 | TUsimple |
1110 | 2017 |
1111 | 6.4K |
1112 | 6.4K/128K |
1113 | |
1114 | ✗ |
1115 | |
1116 | PV |
1117 | ✓ |
1118 | ✗ |
1119 | Link |
1120 |
1121 |
1122 |
1123 | CULane |
1124 | 2018 |
1125 | - |
1126 | 133K/133K |
1127 | |
1128 | ✗ |
1129 | |
1130 | PV |
1131 | ✓ |
1132 | - |
1133 | Link |
1134 |
1135 |
1136 |
1137 | ApolloScape |
1138 | 2018 |
1139 | 235 |
1140 | 115K/115K |
1141 | |
1142 | ✓ |
1143 | |
1144 | PV |
1145 | ✗ |
1146 | ✗ |
1147 | Link |
1148 |
1149 |
1150 |
1151 | LLAMAS |
1152 | 2019 |
1153 | 14 |
1154 | 79K/100K |
1155 | Front-view Image |
1156 | ✗ |
1157 | Laneline |
1158 | PV |
1159 | ✓ |
1160 | ✗ |
1161 | Link |
1162 |
1163 |
1164 |
1165 | 3D Synthetic |
1166 | 2020 |
1167 | - |
1168 | 10K/10K |
1169 | |
1170 | ✗ |
1171 | |
1172 | PV |
1173 | ✓ |
1174 | - |
1175 | Link |
1176 |
1177 |
1178 |
1179 | CurveLanes |
1180 | 2020 |
1181 | - |
1182 | 150K/150K |
1183 | |
1184 | ✗ |
1185 | |
1186 | PV |
1187 | ✓ |
1188 | - |
1189 | Link |
1190 |
1191 |
1192 |
1193 | VIL-100 |
1194 | 2021 |
1195 | 100 |
1196 | 10K/10K |
1197 | |
1198 | ✗ |
1199 | |
1200 | PV |
1201 | ✓ |
1202 | ✗ |
1203 | Link |
1204 |
1205 |
1206 |
1207 | OpenLane-V1 |
1208 | 2022 |
1209 | 1K |
1210 | 200K/200K |
1211 | |
1212 | ✗ |
1213 | |
1214 | 3D |
1215 | ✓ |
1216 | ✓ |
1217 | Link |
1218 |
1219 |
1220 |
1221 | ONCE-3DLane |
1222 | 2022 |
1223 | - |
1224 | 211K/211K |
1225 | |
1226 | ✗ |
1227 | |
1228 | 3D |
1229 | ✓ |
1230 | - |
1231 | Link |
1232 |
1233 |
1234 |
1235 | OpenLane-V2 |
1236 | 2023 |
1237 | 2K |
1238 | 72K/72K |
1239 | Multi-view Image |
1240 | ✗ |
1241 | Lane Centerline, Lane Segment |
1242 | 3D |
1243 | ✓ |
1244 | ✓ |
1245 | Link |
1246 |
1247 |
1248 |
1249 |
1250 |
1251 |
1252 |
1253 |
1254 |
1255 | Prediction and Planning Datasets
1256 |
1257 |
1258 |
1259 |
1260 |
1261 | Subtask |
1262 | Input |
1263 | Output |
1264 | Evaluation |
1265 | Dataset |
1266 |
1267 |
1268 |
1269 | Motion Prediction |
1270 | Surrounding Traffic States |
1271 | Spatiotemporal Trajectories of Single/Multiple Vehicle(s) |
1272 | Displacement Error |
1273 | Argoverse |
1275 |
1276 |
1277 |
1278 | nuScenes |
1280 |
1281 |
1282 |
1283 | Waymo |
1285 |
1286 |
1287 |
1288 | Interaction |
1290 |
1291 |
1292 |
1293 | MONA |
1295 |
1296 |
1297 | Trajectory Planning |
1298 | Motion States for Ego Vehicles, Scenario Cognition and Prediction |
1299 | Trajectories for Ego Vehicles |
1300 | Displacement Error, Safety, Compliance, Comfort |
1301 | nuPlan |
1303 |
1304 |
1305 |
1306 | CARLA |
1308 |
1309 |
1310 |
1311 | MetaDrive |
1312 |
1313 |
1314 |
1315 | Apollo |
1316 |
1317 |
1318 |
1319 | Path Planning |
1320 | Maps for Road Network |
1321 | Routes Connecting to Nodes and Links |
1322 | Efficiency, Energy Conservation |
1323 | OpenStreetMap |
1325 |
1326 |
1327 |
1328 | Transportation Networks |
1330 |
1331 |
1332 |
1333 | DTAlite |
1335 |
1336 |
1337 |
1338 | PeMS |
1340 |
1341 |
1342 |
1343 | New York City Taxi Data |
1345 |
1346 |
1347 |
1348 |
1349 |
1350 |
1351 |
1352 |
1353 | ## OpenScene
1354 |
1355 |
1356 | The Largest up-to-date **3D Occupancy Forecasting** dataset for visual pre-training.
1357 |
1358 | **Quick facts:**
1359 | - Task: given the large amount of data, predict the 3D occupancy in the environment.
1360 | - Origin dataset: `nuPlan`
1361 | - Repo: https://github.com/OpenDriveLab/OpenScene
1362 | - Related work: [OccNet](https://github.com/OpenDriveLab/OccNet)
1363 | - Related challenge: [3D Occupancy Prediction Challenge 2023](https://opendrivelab.com/AD23Challenge.html#Track3), [Occupancy and Flow AGC Challenge 2024](https://opendrivelab.com/challenge2024/#occupancy_and_flow), [Predictive World Model AGC Challenge 2024](https://opendrivelab.com/challenge2024/#predictive_world_model)
1364 |
1365 |
1366 | ## OpenLane-V2 Update
1367 |
1368 |
1369 | Flourishing [OpenLane-V2](https://github.com/OpenDriveLab/OpenLane-V2) with **Standard Definition (SD) Map and Map Elements**.
1370 |
1371 | **Quick facts:**
1372 | - Task: given multi-view images and SD-map (also known as ADAS map) as input, build the driving scene on the fly _without_ the aid of HD-map.
1373 | - Repo: https://github.com/OpenDriveLab/OpenLane-V2
1374 | - Related work: [OpenLane-V2](https://openreview.net/forum?id=OMOOO3ls6g), [TopoNet](https://github.com/OpenDriveLab/TopoNet), [LaneSegNet](https://github.com/OpenDriveLab/LaneSegNet)
1375 | - Related challenge: [Lane Topology Challenge 2023](https://opendrivelab.com/AD23Challenge.html#openlane_topology), [Mapless Driving AGC Challenge 2024](https://opendrivelab.com/challenge2024/#mapless_driving)
1376 |
1377 |
1378 |
1379 |
--------------------------------------------------------------------------------
/assets/Affiliation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/Affiliation.png
--------------------------------------------------------------------------------
/assets/Drivedata_overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/Drivedata_overview.jpg
--------------------------------------------------------------------------------
/assets/Drivedata_timeline.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/Drivedata_timeline.jpg
--------------------------------------------------------------------------------
/assets/cvpr24_genad_poster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/cvpr24_genad_poster.png
--------------------------------------------------------------------------------
/assets/opendv_examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/opendv_examples.png
--------------------------------------------------------------------------------
/assets/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/overview.png
--------------------------------------------------------------------------------
/assets/vista-teaser.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/vista-teaser.gif
--------------------------------------------------------------------------------
/opendv/.gitignore:
--------------------------------------------------------------------------------
1 | # full OpenDV-YouTube dataset
2 | *meta/
3 | OpenDV-YouTube/
4 | annos
5 |
6 | # logs
7 | *exceptions.txt
8 | *output.txt
9 | *finished.txt
10 |
11 | # Byte-compiled / optimized / DLL files
12 | __pycache__/
13 | *.py[cod]
14 | *$py.class
15 |
16 | # C extensions
17 | *.so
18 |
19 | # Distribution / packaging
20 | .Python
21 | build/
22 | develop-eggs/
23 | dist/
24 | downloads/
25 | eggs/
26 | .eggs/
27 | lib/
28 | lib64/
29 | parts/
30 | sdist/
31 | var/
32 | wheels/
33 | pip-wheel-metadata/
34 | share/python-wheels/
35 | *.egg-info/
36 | .installed.cfg
37 | *.egg
38 | MANIFEST
39 |
40 | # PyInstaller
41 | # Usually these files are written by a python script from a template
42 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
43 | *.manifest
44 | *.spec
45 |
46 | # Installer logs
47 | pip-log.txt
48 | pip-delete-this-directory.txt
49 |
50 | # Unit test / coverage reports
51 | htmlcov/
52 | .tox/
53 | .nox/
54 | .coverage
55 | .coverage.*
56 | .cache
57 | nosetests.xml
58 | coverage.xml
59 | *.cover
60 | *.py,cover
61 | .hypothesis/
62 | .pytest_cache/
63 |
64 | # Translations
65 | *.mo
66 | *.pot
67 |
68 | # Django stuff:
69 | *.log
70 | local_settings.py
71 | db.sqlite3
72 | db.sqlite3-journal
73 |
74 | # Flask stuff:
75 | instance/
76 | .webassets-cache
77 |
78 | # Scrapy stuff:
79 | .scrapy
80 |
81 | # Sphinx documentation
82 | docs/_build/
83 |
84 | # PyBuilder
85 | target/
86 |
87 | # Jupyter Notebook
88 | .ipynb_checkpoints
89 |
90 | # IPython
91 | profile_default/
92 | ipython_config.py
93 |
94 | # pyenv
95 | .python-version
96 |
97 | # pipenv
98 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
99 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
100 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
101 | # install all needed dependencies.
102 | #Pipfile.lock
103 |
104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
105 | __pypackages__/
106 |
107 | # Celery stuff
108 | celerybeat-schedule
109 | celerybeat.pid
110 |
111 | # SageMath parsed files
112 | *.sage.py
113 |
114 | # Environments
115 | .env
116 | .venv
117 | env/
118 | venv/
119 | ENV/
120 | env.bak/
121 | venv.bak/
122 |
123 | # Spyder project settings
124 | .spyderproject
125 | .spyproject
126 |
127 | # Rope project settings
128 | .ropeproject
129 |
130 | # mkdocs documentation
131 | /site
132 |
133 | # mypy
134 | .mypy_cache/
135 | .dmypy.json
136 | dmypy.json
137 |
138 | # Pyre type checker
139 | .pyre/
140 |
141 | .gitconfig
142 | .local
143 | .jupyter
144 | .DS_Store
145 | .python_history
146 |
147 | data/OpenLane-V2/*
148 | !data/OpenLane-V2/data_dict_sample.json
149 | !data/OpenLane-V2/data_dict_example.json
150 | !data/OpenLane-V2/openlanev2.md5
151 | !data/OpenLane-V2/preprocess*
152 | !data/OpenLane-V2/data_dict_subset_A.json
153 |
154 | RoadData/vis
155 | RoadData/gt_results.json
156 | RoadDataTool/vis
157 | RoadDataTool/gt_result.json
158 | RoadDataTool/pred_case1_no_turbulence.json
159 |
--------------------------------------------------------------------------------
/opendv/README.md:
--------------------------------------------------------------------------------
1 | # OpenDV-YouTube
2 | Due to YouTube License, we could not directly offer our processed data. However, you can follow the steps below to download the raw data and process it by yourself.
3 |
4 | **
[ NEW❗️]**: We just released the **
OpenDV-mini** subset!
5 | Please feel free to try the mini subset by following steps. Necessary information is also contained in our
OpenDV-YouTube Google Sheet (marked as `Mini` in the column `Mini / Full Set`).
6 |
7 | ## About OpenDV-YouTube and OpenDV-mini
8 |
9 | - The complete dataset
OpenDV-YouTube is the **largest driving video dataset** to date, containing more than **1700 hours** of real-world driving videos and being 300 times larger than the widely used nuScenes dataset.
10 |
11 | - The mini subset, OpenDV-mini, contains about **28 hours** of videos, with diverse geographical distribution and various camera settings. Among these videos, **25 hours** are used as `mini-train` and the other **3 hours** are used as `mini-val`.
12 |
13 | ## Environment Setup
14 |
15 | **We recommend to process the dataset in `Linux` environment since `Windows` may have issues with the file paths.**
16 |
17 | Install the required packages by running the following command.
18 |
19 | ```cmd
20 | conda create -n opendv python=3.10 -y
21 | conda activate opendv
22 | pip install -r requirements.txt
23 | ```
24 |
25 | In case the meta data of videos downloaded are fragmented, we recommend installing `ffmpeg<=3.4.9`. Instead of using the following commands, you can also directly clone and build from [their official repository](https://github.com/FFmpeg/FFmpeg/tree/release/3.4).
26 |
27 | ```cmd
28 | # 1. prepare yasm for ffmpeg. If it is already satisfied by your machine, skip to the next step.
29 | wget https://tortall.net/projects/yasm/releases/yasm-1.3.0.tar.gz
30 | tar -xzvf yasm-1.3.0.tar.gz
31 | cd yasm-1.3.0
32 | ./configure
33 | make
34 | make install
35 |
36 | # 2. install ffmpeg<=3.4.9.
37 | wget https://ffmpeg.org/releases/ffmpeg-3.4.9.tar.gz
38 | tar -xzvf ffmpeg-3.4.9.tar.gz
39 | cd ffmpeg-3.4.9
40 | ./configure
41 | make
42 | make install
43 |
44 | # 3. check the installation. Sometimes you may need to reactivate the conda environment to see it working.
45 | ffprobe
46 | ```
47 |
48 | ## Meta Data Preparation
49 | First, download the
OpenDV-YouTube Google Sheet as a `csv` file. For default setting, you should save the file as `meta/OpenDV-YouTube.csv`. You could change it to whatever path you want as long as you change the `csv_path` in the command in the next step.
50 |
51 | Then, run the following command to preprocess the meta data. The default value for `--csv_path` (or `-i`) and `--json_path` (or `-o`) are `meta/OpenDV-YouTube.csv` and `meta/OpenDV-YouTube.json` respectively.
52 |
53 | ```cmd
54 | python scripts/meta_preprocess.py -i CSV_PATH -o JSON_PATH
55 | ```
56 |
57 | ## Raw Data Download (Raw videos)
58 |
59 | To download the raw data from YouTube, you should first change the configures in `configs/download.json`.
60 |
61 | Note that the script **supports multi-threading download**, so please set the `num_workers` to a proper value according to your hardware and network condition.
62 |
63 | Also, the `format` key in the config file **should strictly obey** the format selection rules of the `youtube-dl` package. We do not recommend changing it unless you are familiar with the package.
64 |
65 | Now you can run the following command to download the raw video data.
66 |
67 | ```cmd
68 | python scripts/youtube_download.py >> download_output.txt
69 | ```
70 |
71 | The download will take about $2000/\mathrm{NUM_{WORKERS}}$ hours, which may vary your network condition.
72 | The default $\mathrm{NUM_{WORKERS}} = 90$, and you can adjust it in [config](configs/download.json#L7).
73 | The data will take about **3TB** of disk space.
74 |
75 | If you wish to **
use the mini subset**, just simply add the `mini` option in your command, i.e. run the following command.
76 |
77 | ```cmd
78 | python scripts/youtube_download.py --mini >> download_output.txt
79 | ```
80 |
81 | You may refer to the `download_exceptions.txt` to check whether the download is successful or not. The file will be automatically generated by the script in the root of the `opendv` codebase.
82 |
83 | If downloading with `youtube-dl` is not successful, you can change the `method` in [config](configs/download.json#L4) from `youtube-dl` to `yt-dlp`.
84 |
85 | ## Data Preprocessing (Converting videos to images)
86 |
87 | When the download is finished, you can first set the configures in `configs/video2img.json` to those you expect. The script also **supports multi-threading processing**, so you can set the `num_workers` to a proper value according to your hardware condition.
88 |
89 | Note that if you want to align with the annotations we provide, `frame_rate` **should not be changed.**
90 |
91 | Then, you can run the following command to preprocess the raw video data.
92 |
93 | ```cmd
94 | python scripts/video2img.py >> vid2img_output.txt
95 | ```
96 |
97 | The preprocessing will take about $8000/\mathrm{NUM_{WORKERS}}$ hours, which may vary your network condition.
98 | The default $\mathrm{NUM_{WORKERS}} = 90$, and you can adjust it in [config](configs/video2img.json#L6).
99 | Resulting images will take about **25TB** of disk space.
100 |
101 | If you wish to **
use the mini subset**, just simply add the `mini` option in your command, i.e. run the following command.
102 |
103 | ```cmd
104 | python scripts/video2img.py --mini >> vid2img_output.txt
105 | ```
106 |
107 | You may refer to the `vid2img_exceptions.txt` to check the status.
108 |
109 | ## Language Annotations
110 |
111 | The full annotation data, including **commands** and **contexts** of video clips, is available at
OpenDV-YouTube-Language. The files are in `json` format, with total size of about **14GB**.
112 |
113 | The annotation data is aligned with the structure of the preprocessed data. You can use the following code to load in annotations respectively.
114 |
115 | ```python
116 | import json
117 |
118 | # for train
119 | full_annos = []
120 | for split_id in range(10):
121 | split = json.load(open("10hz_YouTube_train_split{}.json".format(str(split_id)), "r"))
122 | full_annos.extend(split)
123 |
124 | # for val
125 | val_annos = json.load(open("10hz_YouTube_val.json", "r"))
126 | ```
127 |
128 | Annotations will be loaded in `full_annos` as a list where each element contains annotations for one video clip. All elements in the list are dictionaries of the following structure.
129 |
130 | ```python
131 | {
132 | "cmd":
-- command, i.e. the command of the ego vehicle in the video clip.
133 | "blip": -- context, i.e. the BLIP description of the center frame in the video clip.
134 | "folder": -- the relative path from the processed OpenDV-YouTube dataset root to the image folder of the video clip.
135 | "first_frame": -- the filename of the first frame in the clip. Note that this file is included in the video clip.
136 | "last_frame": -- the filename of the last frame in the clip. Note that this file is included in the video clip.
137 | }
138 | ```
139 |
140 | The command, *i.e.* the `cmd` field, can be converted to natural language using the `map_category_to_caption` function. You may refer to [cmd2caption.py](utils/cmd2caption.py#L158) for details.
141 |
142 | The context, *i.e.* the `blip` field, is the description of the **center frame** in the video generated by `BLIP2`.
143 |
144 |
145 | ## Citation
146 |
147 | If you find our work helpful, please cite the following paper.
148 |
149 | ```bibtex
150 | @inproceedings{yang2024genad,
151 | title={Generalized Predictive Model for Autonomous Driving},
152 | author={Jiazhi Yang and Shenyuan Gao and Yihang Qiu and Li Chen and Tianyu Li and Bo Dai and Kashyap Chitta and Penghao Wu and Jia Zeng and Ping Luo and Jun Zhang and Andreas Geiger and Yu Qiao and Hongyang Li},
153 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
154 | year={2024}
155 | }
156 | ```
--------------------------------------------------------------------------------
/opendv/configs/download.json:
--------------------------------------------------------------------------------
1 | {
2 | "root": "OpenDV-YouTube/videos",
3 | "video_list": "meta/OpenDV-YouTube.json",
4 | "method": "yt-dlp",
5 | "format": "bestvideo[height>=720,height<=1080]/best[height>=720,height<=1080]/bestvideo[height>=720]/best[height>=720]",
6 | "format_for_ytdlp": "bv*[height<=?1080][height>=720]/b*[height<=?1080][height>=720]",
7 | "num_workers": 90,
8 | "exception_file": "download_exceptions.txt"
9 | }
--------------------------------------------------------------------------------
/opendv/configs/video2img.json:
--------------------------------------------------------------------------------
1 | {
2 | "video_root": "OpenDV-YouTube/videos",
3 | "train_img_root": "OpenDV-YouTube/full_images",
4 | "val_img_root": "OpenDV-YouTube/val_images",
5 | "meta_info": "meta/OpenDV-YouTube.json",
6 | "num_workers": 90,
7 | "frame_rate": 10,
8 | "exception_file": "vid2img_exceptions.txt",
9 | "finish_log": "vid2img_finished.txt"
10 | }
--------------------------------------------------------------------------------
/opendv/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/ytdl-org/youtube-dl
2 | git+https://github.com/yt-dlp/yt-dlp@a065086640e888e8d58c615d52ed2f4f4e4c9d18
3 |
4 | opencv-python
5 | decord
6 | tqdm
7 | pandas
--------------------------------------------------------------------------------
/opendv/scripts/meta_preprocess.py:
--------------------------------------------------------------------------------
1 | """
2 | This script is used for preprocessing OpenDV-YouTube meta data, from Google sheet (as csv file) to json file.
3 | The script is a part of the [`GenAD`](https://arxiv.org/abs/2403.09630) project.
4 | """
5 |
6 | import json
7 | import pandas as pd
8 | import numpy as np
9 | import argparse
10 | from tqdm import tqdm
11 |
12 | KEY_MAP = {
13 | 'train / val': 'split',
14 | 'mini / full set': 'subset',
15 | 'nation or area (inferred by gpt)': 'area',
16 | 'state, province, or city (inferred by gpt and refined by human)': 'state',
17 | 'discarded length at the begininning (second)': 'start_discard',
18 | 'discarded length at the ending (second)': 'end_discard'
19 | }
20 |
21 | SPECIFIC_TYPE_MAP = {
22 | 'state': str
23 | }
24 |
25 | def duration2length(duration):
26 | """
27 | duration: HH:MM:SS, or MM:SS
28 | length: int (seconds)
29 | """
30 | duration = duration.split(":")
31 | length = int(duration[0]) * 60 + int(duration[1])
32 | if len(duration) == 3:
33 | length = length * 60 + int(duration[2])
34 | return length
35 |
36 |
37 | def csv2json(csv_path, json_path):
38 | df = pd.read_csv(csv_path)
39 | vid_list = []
40 | keys = df.keys()
41 | for vid_id in tqdm(range(len(df["ID"]))):
42 | vid_info = dict()
43 | for key in keys:
44 | value = df[key][vid_id]
45 | assigned_key = KEY_MAP.get(key.lower(), key.lower())
46 | if assigned_key in SPECIFIC_TYPE_MAP:
47 | value = SPECIFIC_TYPE_MAP[assigned_key](value)
48 | if isinstance(value, np.int64):
49 | value = int(value)
50 | elif value == "nan":
51 | value = "N/A"
52 | vid_info[assigned_key] = value
53 |
54 | vid_info["length"] = duration2length(vid_info["duration"])
55 | vid_list.append(vid_info)
56 |
57 | with open(json_path, "w") as f:
58 | json.dump(vid_list, f, indent=4, ensure_ascii=True)
59 |
60 |
61 | if __name__ == "__main__":
62 | parser = argparse.ArgumentParser(description='Convert OpenDV-YouTube meta data from csv to json')
63 | parser.add_argument('--csv_path', '-i', type=str, default="meta/OpenDV-YouTube.csv", help='path to the csv file')
64 | parser.add_argument('--json_path', '-o', type=str, default="meta/OpenDV-YouTube.json", help='path to the json file')
65 | args = parser.parse_args()
66 |
67 | csv2json(args.csv_path, args.json_path)
--------------------------------------------------------------------------------
/opendv/scripts/video2img.py:
--------------------------------------------------------------------------------
1 | """
2 | This script is used for preprocessing OpenDV-YouTube meta data, from raw video files to image files.
3 | The script is a part of the [`GenAD`](https://arxiv.org/abs/2403.09630) project.
4 | """
5 |
6 | import json
7 | import os, sys
8 | import time
9 | import argparse
10 | from multiprocessing import Pool
11 |
12 | from tqdm import tqdm
13 |
14 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
15 | sys.path.append(root_dir)
16 |
17 | from utils.easydict import EasyDict
18 | from utils.frame_extraction import extract_frames
19 | from utils.download import POSSIBLE_EXTS, youtuber_formatize, get_mini_opendv
20 |
21 |
22 | def collect_unfinished_videos(config, mini=False):
23 | configs = EasyDict(json.load(open(config, "r")))
24 | root = {
25 | "train": configs.train_img_root,
26 | "val": configs.val_img_root
27 | }
28 |
29 | meta_infos = json.load(open(configs.meta_info, "r"))
30 | if mini:
31 | meta_infos = get_mini_opendv(meta_infos)
32 | if os.path.exists(configs.finish_log):
33 | finish_log = set(open(configs.finish_log, "r").readlines())
34 | finish_log = {x.strip() for x in finish_log}
35 | else:
36 | finish_log = set()
37 |
38 | unfinished_videos = []
39 | print("collecting unfinished videos...")
40 | for video_meta in tqdm(meta_infos):
41 | if video_meta["videoid"] in finish_log:
42 | continue
43 | video_path = os.path.join(configs.video_root, youtuber_formatize(video_meta["youtuber"]), video_meta['videoid'])
44 | for ext in POSSIBLE_EXTS:
45 | if os.path.exists(f"{video_path}.{ext}"):
46 | break
47 | if not os.path.exists(f"{video_path}.{ext}"):
48 | raise ValueError(f"Video {video_meta['videoid']} not found. maybe something wrong in the download process?")
49 |
50 | video_info = {
51 | "video_id": video_meta["videoid"],
52 | "video_path": f"{video_path}.{ext}",
53 | "output_dir": os.path.join(root[video_meta["split"].lower()], youtuber_formatize(video_meta["youtuber"]), video_meta['videoid']),
54 | "freq": configs.frame_rate,
55 | "start_discard": video_meta["start_discard"],
56 | "end_discard": video_meta["end_discard"],
57 | "exception_file": configs.exception_file,
58 | "finish_log": configs.finish_log
59 | }
60 | unfinished_videos.append(video_info)
61 |
62 | return unfinished_videos, EasyDict(configs)
63 |
64 |
65 | def convert_multiprocess(video_lists, configs):
66 | video_count = len(video_lists)
67 | with Pool(configs.num_workers) as p:
68 | current_time = time.perf_counter()
69 | for _ in tqdm(p.imap(extract_frames, video_lists), total=video_count):
70 | pass
71 |
72 |
73 | if __name__ == '__main__':
74 | parser = argparse.ArgumentParser()
75 | parser.add_argument('--config', type=str, default='configs/video2img.json')
76 | parser.add_argument('--mini', action='store_true', default=False, help='Convert mini dataset only.')
77 | # parser.add_argument('--start_id', type=int, default=0)
78 | # parser.add_argument('--end_id', type=int, default=-1)
79 | # parser.add_argument('--test_video', type=str, default=None)
80 |
81 | args = parser.parse_args()
82 | video_lists, meta_configs = collect_unfinished_videos(args.config, args.mini)
83 |
84 | # if args.end_id == -1:
85 | # args.end_id = len(video_lists)
86 | # video_lists = video_lists[args.start_id:args.end_id]
87 | # if args.test_video is not None:
88 | # convert_multiprocess([{**video_lists[0], "video_path": args.test_video}], meta_config)
89 | # exit(0)
90 |
91 | convert_multiprocess(video_lists, meta_configs)
--------------------------------------------------------------------------------
/opendv/scripts/youtube_download.py:
--------------------------------------------------------------------------------
1 | """
2 | This script is used for downloading OpenDV-YouTube raw data.
3 | The script is a part of the [`GenAD`](https://arxiv.org/abs/2403.09630) project.
4 | """
5 |
6 | from multiprocessing import Pool
7 | from tqdm import tqdm
8 | import os, sys
9 | import time
10 | import json
11 | import cv2
12 |
13 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
14 | sys.path.append(root_dir)
15 |
16 | from utils.easydict import EasyDict
17 | from utils.download import youtuber_formatize, POSSIBLE_EXTS, get_video_with_meta, get_mini_opendv
18 |
19 | CONFIGS = dict()
20 |
21 | def single_download(args):
22 | vid_info, CONFIGS = args
23 |
24 | url = vid_info["link"]
25 | filename = vid_info["videoid"]
26 | folder = youtuber_formatize(vid_info["youtuber"])
27 | path = os.path.join(CONFIGS.root, folder)
28 |
29 | for ext in POSSIBLE_EXTS:
30 | if os.path.exists(f"{path}/{filename}.{ext}"):
31 | print(f"Video {filename} already exists in {path}. Skipping...")
32 | return
33 | if not os.path.exists(path):
34 | os.makedirs(path, exist_ok=True)
35 |
36 | try:
37 | ret = os.system(f"{CONFIGS.method} -f '{CONFIGS.format}' -o '{path}/{filename}.%(ext)s' {url}")
38 | if ret != 0:
39 | raise Exception("ERROR: Video unavailable or network error.")
40 | except Exception as e:
41 | with open(CONFIGS.exception_file, "a") as f:
42 | f.write("Error downloading video [{}]: {}\n".format(filename, e))
43 | return
44 |
45 |
46 | def multiple_download(video_list, configs):
47 | global CONFIGS
48 |
49 | video_count = len(video_list)
50 | CONFIGS["method"] = configs["method"]
51 | assert CONFIGS["method"] in ["youtube-dl", "yt-dlp"], "Only support `youtube-dl` and `yt-dlp`."
52 | CONFIGS["format"] = configs["format"] if configs["method"] == "youtube-dl" else configs["format_for_ytdlp"]
53 | CONFIGS["root"] = configs.root
54 | CONFIGS["exception_file"] = configs.exception_file
55 | CONFIGS = EasyDict(CONFIGS)
56 | finished = 0
57 | with Pool(configs.num_workers) as p:
58 | current_time = time.perf_counter()
59 | for _ in tqdm(p.imap(single_download, [(vid_info, CONFIGS) for vid_info in video_list]), total=video_count):
60 | finished += 1
61 | working_time = time.perf_counter() - current_time
62 | eta = working_time / finished * (video_count - finished)
63 | eta = time.strftime("%H:%M:%S", time.gmtime(eta))
64 | print("Finished {}/{} videos. ETA: {}.".format(finished, video_count, eta))
65 |
66 |
67 | def check_status(video_list, configs):
68 | if "exception_file" not in configs:
69 | print("No exception file specified. Skipping...")
70 | return
71 |
72 | print("Checking download status...")
73 | with open(configs.exception_file, "a") as f:
74 | f.write("\n\nChecking download status...\n")
75 |
76 | for vid_info in tqdm(video_list):
77 | exists = False
78 | path = os.path.join(configs.root, youtuber_formatize(vid_info["youtuber"]))
79 | for ext in POSSIBLE_EXTS:
80 | if os.path.exists("{}/{}.{}".format(path, vid_info["videoid"], ext)):
81 | exists = True
82 | break
83 | if not exists:
84 | with open(configs.exception_file, "a") as f:
85 | f.write(f"Video [{vid_info['videoid']}] not found in [{path}].\n")
86 | continue
87 |
88 | _, true_duration = get_video_with_meta("{}/{}.{}".format(path, vid_info["videoid"], ext), ["duration"])
89 |
90 | duration_in_json = vid_info["duration"]
91 | expected_duration = vid_info["length"]
92 |
93 | if abs(true_duration - expected_duration) > 5:
94 | with open(configs.exception_file, "a") as f:
95 | f.write(f"Video [{vid_info['videoid']}]: Duration mismatch. Expected: {duration_in_json} ({expected_duration} seconds), True: {true_duration} seconds.\n")
96 |
97 | with open(configs.exception_file, "a") as f:
98 | f.write("\nChecking download status finished.")
99 |
100 |
101 | if __name__ == '__main__':
102 | import argparse
103 | parser = argparse.ArgumentParser()
104 | parser.add_argument("--config", type=str, default="configs/download.json", help="Path to the config file. should be a `json` file.")
105 | parser.add_argument("--mini", action="store_true", default=False, help="Download mini dataset only.")
106 | args = parser.parse_args()
107 |
108 | configs = EasyDict(json.load(open(args.config, "r")))
109 | with open(configs.exception_file, "w") as f:
110 | f.write("")
111 |
112 | video_list = json.load(open(configs.pop("video_list"), "r"))
113 | if args.mini:
114 | video_list = get_mini_opendv(video_list)
115 | if not os.path.exists(configs.root):
116 | os.makedirs(configs.root, exist_ok=True)
117 |
118 | multiple_download(video_list, configs)
119 | check_status(video_list, configs)
--------------------------------------------------------------------------------
/opendv/utils/cmd2caption.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | plain_caption_dict = {
4 | 0: "Go straight.",
5 | 1: "Pass the intersection.",
6 | 2: "Turn left.",
7 | 3: "Turn right.",
8 | 4: "Change to the left lane.",
9 | 5: "Change to the right lane.",
10 | 6: "Go to the left lane branch.",
11 | 7: "Go to the right lane branch.",
12 | 8: "Pass the crosswalk.",
13 | 9: "Pass the railroad.",
14 | 10: "Merge.",
15 | 11: "Make a U-turn.",
16 | 12: "Stop.",
17 | 13: "Deviate."
18 | }
19 |
20 | diverse_caption_dict = {
21 | 0: [
22 | "Move forward.",
23 | "Move steady.",
24 | "Go forward.",
25 | "Go straight.",
26 | "Proceed.",
27 | "Drive forward.",
28 | "Drive straight.",
29 | "Drive steady.",
30 | "Keep the direction.",
31 | "Maintain the direction.",
32 | ],
33 | 1: [
34 | "Pass the intersection.",
35 | "Cross the intersection.",
36 | "Traverse the intersection.",
37 | "Drive through the intersection.",
38 | "Move past the intersection.",
39 | "Pass the junction.",
40 | "Cross the junction.",
41 | "Traverse the junction.",
42 | "Drive through the junction.",
43 | "Move past the junction.",
44 | "Pass the crossroad.",
45 | "Cross the crossroad.",
46 | "Traverse the crossroad.",
47 | "Drive through the crossroad.",
48 | "Move past the crossroad.",
49 | ],
50 | 2: [
51 | "Turn left.",
52 | "Turn to the left.",
53 | "Make a left turn.",
54 | "Take a left turn.",
55 | "Turn to the left.",
56 | "Left turn.",
57 | "Steer left.",
58 | "Steer to the left.",
59 | ],
60 | 3: [
61 | "Turn right.",
62 | "Turn to the right.",
63 | "Make a right turn.",
64 | "Take a right turn.",
65 | "Turn to the right.",
66 | "Right turn.",
67 | "Steer right.",
68 | "Steer to the right.",
69 | ],
70 | 4: [
71 | "Make a left lane change.",
72 | "Change to the left lane.",
73 | "Switch to the left lane.",
74 | "Shift to the left lane.",
75 | "Move to the left lane.",
76 | ],
77 | 5: [
78 | "Make a right lane change.",
79 | "Change to the right lane.",
80 | "Switch to the right lane.",
81 | "Shift to the right lane.",
82 | "Move to the right lane.",
83 | ],
84 | 6: [
85 | "Go to the left lane branch.",
86 | "Take the left lane branch.",
87 | "Move into the left lane branch.",
88 | "Follow the left lane branch.",
89 | "Follow the left side road.",
90 | ],
91 | 7: [
92 | "Go to the right lane branch.",
93 | "Take the right lane branch.",
94 | "Move into the right lane branch.",
95 | "Follow the right lane branch.",
96 | "Follow the right side road.",
97 | ],
98 | 8: [
99 | "Pass the crosswalk.",
100 | "Cross the crosswalk.",
101 | "Traverse the crosswalk.",
102 | "Drive through the crosswalk.",
103 | "Move past the crosswalk.",
104 | "Pass the crossing area.",
105 | "Cross the crossing area.",
106 | "Traverse the crossing area.",
107 | "Drive through the crossing area.",
108 | "Move past the crossing area.",
109 | ],
110 | 9: [
111 | "Pass the railroad.",
112 | "Cross the railroad.",
113 | "Traverse the railroad.",
114 | "Drive through the railroad.",
115 | "Move past the railroad.",
116 | "Pass the railway.",
117 | "Cross the railway.",
118 | "Traverse the railway.",
119 | "Drive through the railway.",
120 | "Move past the railway.",
121 | ],
122 | 10: [
123 | "Merge.",
124 | "Merge traffic.",
125 | "Merge into traffic.",
126 | "Merge into the traffic.",
127 | "Join the traffic.",
128 | "Merge into the traffic flow.",
129 | "Join the traffic flow.",
130 | "Merge into the traffic stream.",
131 | "Join the traffic stream.",
132 | "Merge into the lane.",
133 | ],
134 | 11: [
135 | "Make a U-turn.",
136 | "Make a 180-degree turn.",
137 | "Turn 180 degree.",
138 | "Turn around.",
139 | "Drive in a U-turn.",
140 | ],
141 | 12: [
142 | "Stop.",
143 | "Halt.",
144 | "Decelerate.",
145 | "Slow down.",
146 | "Brake.",
147 | ],
148 | 13: [
149 | "Deviate.",
150 | "Deviate from the path.",
151 | "Deviate from the lane.",
152 | "Change the direction.",
153 | "Shift the direction.",
154 | ]
155 | }
156 |
157 |
158 | def map_category_to_caption(category_index, diverse=True):
159 | if diverse:
160 | return random.choice(diverse_caption_dict[category_index])
161 | else:
162 | return plain_caption_dict[category_index]
163 |
--------------------------------------------------------------------------------
/opendv/utils/download.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 |
4 | POSSIBLE_EXTS = ["mp4", "webm", "mkv"]
5 |
6 | def youtuber_formatize(youtuber):
7 | return youtuber.replace(" ", "_")
8 |
9 |
10 | def get_video_with_meta(video_path, need_metas=["fps", "duration", "num_frames"]):
11 | if not os.path.exists(video_path):
12 | video = None
13 | fps = -1
14 | duration = -1
15 | num_frames = -1
16 | else:
17 | try:
18 | video = cv2.VideoCapture(video_path)
19 | fps = video.get(cv2.CAP_PROP_FPS)
20 | if fps == 0:
21 | cmd = "ffprobe -v error -select_streams v -of default=noprint_wrappers=1:nokey=1 -show_entries stream=r_frame_rate {}".format(video_path)
22 | precise_fps = os.popen(cmd).read().split("/")
23 | fps = float(precise_fps[0]) / float(precise_fps[1])
24 | if ("num_frames" in need_metas) or ("duration" in need_metas):
25 | cmd = "ffprobe -show_entries format=duration -v quiet -of csv=\"p=0\" {}".format(video_path)
26 | precise_duration = os.popen(cmd).read()
27 | duration = int(float(precise_duration))
28 | if "num_frames" in need_metas:
29 | num_frames = int(duration * fps)
30 | else:
31 | if "num_frames" in need_metas:
32 | num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
33 | if "duration" in need_metas:
34 | duration = video.get(cv2.CAP_PROP_FRAME_COUNT) / fps
35 |
36 | except Exception as e:
37 | print("Error: ", e)
38 | video = None
39 | fps = -1
40 | duration = -1
41 | num_frames = -1
42 |
43 | return_params = (video,)
44 | if "fps" in need_metas:
45 | return_params += (fps,)
46 | if "duration" in need_metas:
47 | return_params += (duration,)
48 | if "num_frames" in need_metas:
49 | return_params += (num_frames,)
50 |
51 | return return_params
52 |
53 | def get_mini_opendv(full_video_list):
54 | mini_list = []
55 | for vid_info in full_video_list:
56 | if vid_info["subset"] != "Mini":
57 | continue
58 | mini_list.append(vid_info)
59 |
60 | return mini_list
--------------------------------------------------------------------------------
/opendv/utils/easydict.py:
--------------------------------------------------------------------------------
1 | class EasyDict(dict):
2 | """
3 | Get attributes
4 |
5 | >>> d = EasyDict({'foo':3})
6 | >>> d['foo']
7 | 3
8 | >>> d.foo
9 | 3
10 | >>> d.bar
11 | Traceback (most recent call last):
12 | ...
13 | AttributeError: 'EasyDict' object has no attribute 'bar'
14 |
15 | Works recursively
16 |
17 | >>> d = EasyDict({'foo':3, 'bar':{'x':1, 'y':2}})
18 | >>> isinstance(d.bar, dict)
19 | True
20 | >>> d.bar.x
21 | 1
22 |
23 | Bullet-proof
24 |
25 | >>> EasyDict({})
26 | {}
27 | >>> EasyDict(d={})
28 | {}
29 | >>> EasyDict(None)
30 | {}
31 | >>> d = {'a': 1}
32 | >>> EasyDict(**d)
33 | {'a': 1}
34 |
35 | Set attributes
36 |
37 | >>> d = EasyDict()
38 | >>> d.foo = 3
39 | >>> d.foo
40 | 3
41 | >>> d.bar = {'prop': 'value'}
42 | >>> d.bar.prop
43 | 'value'
44 | >>> d
45 | {'foo': 3, 'bar': {'prop': 'value'}}
46 | >>> d.bar.prop = 'newer'
47 | >>> d.bar.prop
48 | 'newer'
49 |
50 |
51 | Values extraction
52 |
53 | >>> d = EasyDict({'foo':0, 'bar':[{'x':1, 'y':2}, {'x':3, 'y':4}]})
54 | >>> isinstance(d.bar, list)
55 | True
56 | >>> from operator import attrgetter
57 | >>> map(attrgetter('x'), d.bar)
58 | [1, 3]
59 | >>> map(attrgetter('y'), d.bar)
60 | [2, 4]
61 | >>> d = EasyDict()
62 | >>> d.keys()
63 | []
64 | >>> d = EasyDict(foo=3, bar=dict(x=1, y=2))
65 | >>> d.foo
66 | 3
67 | >>> d.bar.x
68 | 1
69 |
70 | Still like a dict though
71 |
72 | >>> o = EasyDict({'clean':True})
73 | >>> o.items()
74 | [('clean', True)]
75 |
76 | And like a class
77 |
78 | >>> class Flower(EasyDict):
79 | ... power = 1
80 | ...
81 | >>> f = Flower()
82 | >>> f.power
83 | 1
84 | >>> f = Flower({'height': 12})
85 | >>> f.height
86 | 12
87 | >>> f['power']
88 | 1
89 | >>> sorted(f.keys())
90 | ['height', 'power']
91 |
92 | update and pop items
93 | >>> d = EasyDict(a=1, b='2')
94 | >>> e = EasyDict(c=3.0, a=9.0)
95 | >>> d.update(e)
96 | >>> d.c
97 | 3.0
98 | >>> d['c']
99 | 3.0
100 | >>> d.get('c')
101 | 3.0
102 | >>> d.update(a=4, b=4)
103 | >>> d.b
104 | 4
105 | >>> d.pop('a')
106 | 4
107 | >>> d.a
108 | Traceback (most recent call last):
109 | ...
110 | AttributeError: 'EasyDict' object has no attribute 'a'
111 | """
112 |
113 | def __init__(self, d=None, **kwargs):
114 | if d is None:
115 | d = {}
116 | if kwargs:
117 | d.update(**kwargs)
118 | for k, v in d.items():
119 | setattr(self, k, v)
120 | # Class attributes
121 | for k in self.__class__.__dict__.keys():
122 | if not (k.startswith("__") and k.endswith("__")) and not k in ("update", "pop"):
123 | setattr(self, k, getattr(self, k))
124 |
125 | def __setattr__(self, name, value):
126 | if isinstance(value, (list, tuple)):
127 | value = [self.__class__(x) if isinstance(x, dict) else x for x in value]
128 | elif isinstance(value, dict) and not isinstance(value, self.__class__):
129 | value = self.__class__(value)
130 | super(EasyDict, self).__setattr__(name, value)
131 | super(EasyDict, self).__setitem__(name, value)
132 |
133 | __setitem__ = __setattr__
134 |
135 | def update(self, e=None, **f):
136 | d = e or dict()
137 | d.update(f)
138 | for k in d:
139 | setattr(self, k, d[k])
140 |
141 | def pop(self, k, d=None):
142 | if hasattr(self, k):
143 | delattr(self, k)
144 | return super(EasyDict, self).pop(k, d)
145 |
146 |
147 | if __name__ == "__main__":
148 | import doctest
--------------------------------------------------------------------------------
/opendv/utils/frame_extraction.py:
--------------------------------------------------------------------------------
1 | """
2 | This script is used for preprocessing OpenDV-YouTube raw data.
3 | The script is a part of the [`GenAD`](https://arxiv.org/abs/2403.09630) project.
4 | """
5 |
6 | import os
7 | import time
8 | import traceback
9 | import json
10 |
11 | import numpy as np
12 | import decord
13 | import cv2
14 | from tqdm import tqdm
15 |
16 | from utils.download import get_video_with_meta
17 |
18 | DECORD_ACCEPTABLE_TYPES = ['mp4']
19 | FORCE_USE_CV2 = True
20 |
21 | IDX_WIDTH = 9
22 | # set [IDX_WIDTH] to [None] if you want to use the default format, i.e. zero padding to the maximal index of a video
23 | INFO_INTERVAL = 1000
24 | DEFAULT_FPS = 10
25 |
26 |
27 | def extract_frames(video_info):
28 | video_path = video_info.get("video_path", None)
29 | output_dir = video_info.get("output_dir", None)
30 | fps = video_info.get("freq", DEFAULT_FPS)
31 | discard_begin = video_info.get("start_discard", 90)
32 | discard_end = video_info.get("end_discard", 60)
33 | exception_file = video_info.get("exception_file", None)
34 | finish_log = video_info.get("finish_log", None)
35 |
36 | if video_path is None or output_dir is None:
37 | print("skipping invalid video info...")
38 | return
39 |
40 | try:
41 | if (FORCE_USE_CV2) or (video_path.split('.')[-1] not in DECORD_ACCEPTABLE_TYPES):
42 | print("[opencv] extracting frames from video [{}]...".format(video_path))
43 | cv2_extract_frames(video_path, output_dir, fps, discard_begin, discard_end, exception_file)
44 | else:
45 | print("[decord] extracting frames from video [{}]...".format(video_path))
46 | decord_extract_frames(video_path, output_dir, fps, discard_begin, discard_end, exception_file)
47 |
48 | if finish_log is not None:
49 | with open(finish_log, "a") as f:
50 | f.write(video_info.get("video_id", video_path.split("/")[-1]))
51 | f.write("\n")
52 |
53 | except Exception as e:
54 | exceptions = dict()
55 | exceptions["video_path"] = video_path
56 | exceptions["problem"] = str(e)
57 | exceptions["action"] = "skipped"
58 | exceptions["details"] = traceback.format_exc()
59 | json.dump(exceptions, open(exception_file, "a"), indent=4)
60 | with open(exception_file, "a") as f:
61 | f.write(",\n")
62 |
63 | traceback.print_exc()
64 |
65 |
66 | def count_done_frames(save_path):
67 | return len(os.listdir(save_path))
68 |
69 | def special_video_setting_log(video_path, exception_file, height=None, width=None, video_reader=None):
70 | skipped = False
71 |
72 | exception = None
73 | if video_reader is None:
74 | exception = {
75 | "video_path": video_path,
76 | "problem": "video not found or corrupted",
77 | "action": "skipped",
78 | "details": "video not found or corrupted"
79 | }
80 | return True
81 |
82 | if (height is None) or (width is None):
83 | height = video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT)
84 | width = video_reader.get(cv2.CAP_PROP_FRAME_WIDTH)
85 |
86 | if (width < 1280) and (height < 720):
87 | exception = {
88 | "video_path": video_path,
89 | "problem": "< 720p",
90 | "action": "skipped",
91 | "details": "{} x {}".format(width, height)
92 | }
93 | skipped = True
94 |
95 | elif (width / height != 16 / 9):
96 | exception = {
97 | "video_path": video_path,
98 | "problem": "not 16:9",
99 | "action": "as normal",
100 | "details": "{} x {}".format(width, height)
101 | }
102 |
103 | if exception is not None:
104 | json.dump(exception, open(exception_file, "a"), indent=4)
105 | with open(exception_file, "a") as f:
106 | f.write(",\n")
107 |
108 | return skipped
109 |
110 |
111 | def decord_extract_frames(video_path, save_path, fps=10, discard_begin=90, discard_end=60, msg_file=None):
112 | start_index = 0
113 | if not os.path.exists(save_path):
114 | os.makedirs(save_path)
115 | else:
116 | start_index = count_done_frames(save_path) -1
117 | # so that we could rewrite the last frame, in case the last frame is corrupted
118 |
119 | video = decord.VideoReader(video_path, ctx=decord.cpu(), num_threads=1)
120 | video_fps = video.get_avg_fps()
121 | num_frames = int( fps * (len(video) // video_fps - discard_begin - discard_end) )
122 | idx_width = len(str(num_frames)) if IDX_WIDTH is None else IDX_WIDTH
123 | interval = video_fps / fps
124 |
125 | img = video[0].asnumpy()
126 | frame_height, frame_width, _ = img.shape
127 | if special_video_setting_log(video_path, msg_file, frame_height, frame_width):
128 | return
129 | del img
130 | first_log = True
131 |
132 | indices = np.array([ int(discard_begin * video_fps) + int(np.round(i * interval)) for i in range(num_frames)])
133 | start_time = time.perf_counter()
134 | ids = list(range(num_frames))
135 | for id in ids[start_index:]:
136 | frame = video[indices[id]].asnumpy()
137 | file_path = os.path.join(save_path, str(id).zfill(idx_width) + ".jpg")
138 | cv2.imwrite(file_path, cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
139 |
140 | if (first_log) or ((id+1) % INFO_INTERVAL == 0):
141 | first_log = False
142 | elapsed_time = time.perf_counter() - start_time
143 | eta = elapsed_time / (id+1) * (len(indices) - id - 1)
144 | elapsed_time = time.strftime("%H:%M:%S", time.gmtime(elapsed_time))
145 | eta = time.strftime("%H:%M:%S", time.gmtime(eta))
146 | progress_bar = "\u2588" * int((id+1) / len(indices) * 20) + " " * (20 - int((id+1) / len(indices) * 20))
147 | print("{} {}/{} Elapsed: {}\t ETA: {}".format(progress_bar, id+1, len(indices), elapsed_time, eta))
148 |
149 |
150 | def cv2_extract_frames(video_path, save_path, fps=10, discard_begin=90, discard_end=60, msg_file=None):
151 | start_index = 0
152 | if not os.path.exists(save_path):
153 | os.makedirs(save_path)
154 | else:
155 | start_index = count_done_frames(save_path) -1
156 | # so that we could rewrite the last frame, in case the last frame is corrupted
157 |
158 | video, video_fps, total_frames = get_video_with_meta(video_path, need_metas=["fps", "num_frames"])
159 | if video is not None:
160 | num_frames = int( fps * (total_frames // video_fps - discard_begin - discard_end) )
161 | idx_width = len(str(num_frames)) if IDX_WIDTH is None else IDX_WIDTH
162 | interval = video_fps / fps
163 |
164 | if special_video_setting_log(video_path, msg_file, video_reader=video):
165 | return
166 | first_log, first_frame = True, True
167 |
168 | indices = np.array([ int(discard_begin * video_fps) + int(np.round(i * interval)) for i in range(num_frames)])
169 | start_time = time.perf_counter()
170 | ids = list(range(num_frames))
171 | for id in ids[start_index:]:
172 | if first_frame:
173 | video.set(cv2.CAP_PROP_POS_FRAMES, indices[id])
174 | video.grab()
175 | first_frame = False
176 | else:
177 | for _ in range(indices[id] - indices[id-1]):
178 | video.grab()
179 |
180 | _, frame = video.retrieve()
181 | file_path = os.path.join(save_path, str(id).zfill(idx_width) + ".jpg")
182 | cv2.imwrite(file_path, frame)
183 |
184 | if (first_log) or ((id+1) % INFO_INTERVAL == 0):
185 | first_log = False
186 | elapsed_time = time.perf_counter() - start_time
187 | eta = elapsed_time / (id+1) * (len(indices) - id - 1)
188 | elapsed_time = time.strftime("%H:%M:%S", time.gmtime(elapsed_time))
189 | eta = time.strftime("%H:%M:%S", time.gmtime(eta))
190 | progress_bar = "\u2588" * int((id+1) / len(indices) * 20) + " " * (20 - int((id+1) / len(indices) * 20))
191 | print("{} {}/{} Elapsed: {}\t ETA: {}".format(progress_bar, id+1, len(indices), elapsed_time, eta))
--------------------------------------------------------------------------------