├── .github └── FUNDING.yml ├── .gitignore ├── Drivedata.md ├── LICENSE ├── README.md ├── assets ├── Affiliation.png ├── Drivedata_overview.jpg ├── Drivedata_timeline.jpg ├── cvpr24_genad_poster.png ├── opendv_examples.png ├── overview.png └── vista-teaser.gif └── opendv ├── .gitignore ├── README.md ├── configs ├── download.json └── video2img.json ├── requirements.txt ├── scripts ├── meta_preprocess.py ├── video2img.py └── youtube_download.py └── utils ├── cmd2caption.py ├── download.py ├── easydict.py └── frame_extraction.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [OpenDriveLab] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /Drivedata.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | # Open-sourced Data Ecosystem in Autonomous Driving: the Present and Future 4 | 5 | > **This repo is all you need for Open-sourced Data Ecosystem in Autonomous Driving.** 6 | 7 | We present comprehensive paper collections, leaderboards, and challenges. 8 | 9 | 11 | 12 | ## Table of Contents 13 | 14 | - [Open-sourced Data Ecosystem in Autonomous Driving: the Present and Future](#open-sourced-data-ecosystem-in-autonomous-driving-the-present-and-future) 15 | - [Table of Contents](#table-of-contents) 16 | - [Citation](#citation) 17 | - [Challenges and Leaderboards](#challenges-and-leaderboards) 18 | - [Dataset Collection](#dataset-collection) 19 | - [Perception](#perception) 20 | - [Mapping](#mapping) 21 | - [Prediction and Planning](#prediction-and-planning) 22 | 25 | - [License](#license) 26 | 27 | ## Citation 28 | If you find this project useful in your research, please consider to cite: 29 | ```BibTeX 30 | @misc{li2023opensourced, 31 | title={Open-sourced Data Ecosystem in Autonomous Driving: the Present and Future}, 32 | author={Hongyang Li and Yang Li and Huijie Wang and Jia Zeng and Pinlong Cai and Huilin Xu and Dahua Lin and Junchi Yan and Feng Xu and Lu Xiong and Jingdong Wang and Futang Zhu and Kai Yan and Chunjing Xu and Tiancai Wang and Beipeng Mu and Shaoqing Ren and Zhihui Peng and Yu Qiao}, 33 | year={2023}, 34 | eprint={2312.03408}, 35 | archivePrefix={arXiv}, 36 | primaryClass={cs.CV} 37 | } 38 | ``` 39 | 40 |

(back to top)

41 | 42 | ## Challenges and Leaderboards 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 266 | 267 | 268 | 269 |
TitleHostYearTaskEntry
Autonomous Driving Challenge OpenDriveLabCVPR2023Perception / OpenLane Topology 111
Perception / Online HD Map Construction
Perception / 3D Occupancy Prediction
Prediction & Planning / nuPlan Planning
Waymo Open Dataset Challenges WaymoCVPR2023Perception / 2D Video Panoptic Segmentation 35
Perception / Pose Estimation
Prediction / Motion Prediction
Prediction / Sim Agents
CVPR2022Prediction / Motion Prediction 128
Prediction / Occupancy and Flow Prediction
Perception / 3D Semantic Segmentation
Perception / 3D Camera-only Detection
CVPR2021Prediction / Motion Prediction 115
Prediction / Interaction Prediction
Perception / Real-time 3D Detection
Perception / Real-time 2D Detection
Argoverse Challenges ArgoverseCVPR2023Prediction / Multi-agent Forecasting 81
Perception & Prediction / Unified Sensorbased Detection, Tracking, and Forecasting
Perception / LiDAR Scene Flow
Prediction / 3D Occupancy Forecasting
CVPR2022Perception / 3D Object Detection 81
Prediction / Motion Forecasting
Perception / Stereo Depth Estimation
CVPR2021Perception / Stereo Depth Estimation 368
Prediction / Motion Forecasting
Perception / Streaming 2D Detection
CARLA Autonomous Driving Challenge CARLA Team, Intel2023Planning / CARLA AD Challenge 2.0 -
NeurIPS2022Planning / CARLA AD Challenge 1.0 19
NeurIPS2021Planning / CARLA AD Challenge 1.0 -
粤港澳大湾区 203 | (黄埔)国际算法算例大赛 琶洲实验室2023感知 / 跨场景单目深度估计 -
感知 / 路侧毫米波雷达标定和目标跟踪 -
2022感知 / 路侧三维感知算法 -
感知 / 街景图像店面招牌文字识别 -
AI Driving Olympics ETH Zurich, University of Montreal,Motional NeurIP2021 Perception / nuScenes Panoptic 11
ICRA2021Perception / nuScenes Detection 456
Perception / nuScenes Tracking
Prediction / nuScenes Prediction
Perception / nuScenes LiDAR Segmentation
计图 (Jittor)人工智能算法挑战赛 国家自然科学基金委信息科学部 2021 感知 / 交通标志检测 37
KITTI Vision Benchmark Suite University of Tübingen 2012 Perception / Stereo, Flow, Scene Flow, Depth, 265 | Odometry, Object, Tracking, Road, Semantics 5,610
270 |

(back to top)

271 | 272 | ## Dataset Collection 273 | 274 | 275 | 276 | ### Perception 277 | 278 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 648 | 649 | 650 | 651 | 652 | 653 | 654 | 655 | 656 | 657 | 658 | 659 | 660 | 662 | 663 | 664 | 665 | 666 | 667 | 668 | 669 | 670 | 672 | 673 | 674 | 675 | 677 | 678 | 679 | 680 | 681 | 682 | 683 | 684 | 685 | 686 | 687 | 688 | 689 | 691 | 692 | 693 | 694 | 695 | 696 | 697 | 698 | 699 | 700 | 701 | 702 | 703 | 705 | 706 | 707 | 708 | 709 | 710 | 711 | 712 | 713 | 714 | 715 | 716 | 717 | 719 | 720 | 721 | 722 | 723 | 724 | 725 | 726 | 727 | 728 | 729 | 730 | 731 | 733 | 734 | 735 | 736 | 737 | 738 | 739 | 740 | 741 | 742 | 743 | 744 | 745 | 747 | 748 | 749 | 750 | 751 | 752 | 753 | 754 | 755 | 756 | 757 | 758 | 759 | 761 | 762 | 763 | 764 | 765 | 766 | 767 | 768 | 769 | 770 | 771 | 772 | 773 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 783 | 784 | 785 | 786 | 787 | 789 | 790 | 791 | 792 | 793 | 794 | 795 | 796 | 797 | 798 | 799 | 800 | 801 | 803 | 804 | 805 | 806 | 807 | 808 | 809 | 810 | 811 | 812 | 813 | 814 | 815 | 817 | 818 | 819 | 820 | 821 | 822 | 823 | 824 | 825 | 826 | 827 | 828 | 829 | 831 | 832 | 833 | 834 | 835 | 836 | 837 | 838 | 839 | 840 | 841 | 842 | 843 | 845 | 846 | 847 | 848 | 849 | 850 | 851 | 852 | 853 | 854 | 855 | 856 | 857 | 859 | 860 | 861 | 862 | 863 | 864 | 865 | 866 | 867 | 868 | 869 | 870 | 871 | 873 | 874 | 875 | 876 | 877 | 878 | 879 | 880 | 881 | 882 | 883 | 884 | 885 | 887 | 888 | 889 | 890 | 891 | 892 | 893 | 894 | 895 | 896 | 897 | 898 | 899 | 901 | 902 | 903 | 904 | 905 | 906 | 907 | 908 | 909 | 910 | 911 | 912 | 913 | 915 | 916 | 917 | 918 | 919 | 920 | 921 | 922 | 923 | 924 | 925 | 926 | 927 | 929 | 930 | 931 | 932 | 933 | 934 | 935 | 936 | 937 | 938 | 939 | 940 | 941 | 943 | 944 | 945 | 946 | 947 | 948 | 949 | 950 | 951 | 952 | 953 | 954 | 955 |
DatasetYear 312 | DiversitySensorAnnotationPaper
Scenes Hours Region Camera Lidar Other
KITTI 2012 50 6 EU Font-view GPS & IMU 2D BBox & 3D BBoxLink
Cityscapes 2016 - - EU Font-view 2D SegLink
Lost and Found 2016 112 - - Font-view 2D SegLink
Mapillary 2016 - - Global Street-view 2D SegLink
DDD17 2017 36 12 EU Front-view GPS & CAN-bus & Event Camera-Link
Apolloscape 2016 103 2.5 AS Front-view GPS & IMU 3D BBox & 2D SegLink
BDD-X 2018 6984 77 NA Front-view LanguageLink
HDD 2018 - 104 NA Front-view GPS & IMU & CAN-bus 2D BBox Link
IDD 2018 182 - AS Front-view 2D Seg Link
SemanticKITTI 2019 50 6 EU 3D Seg Link
Woodscape 2019 - - Global 360° GPS & IMU & CAN-bus 3D BBox & 2D Seg Link
DrivingStereo 2019 42 - AS Front-view -Link
Brno-Urban 2019 67 10 EU Front-view GPS & IMU & Infrared Camera -Link
A*3D 2019 - 55 AS Front-view 3D BBox Link
Talk2Car 2019 850 283.3 NA Front-view Language & 3D BBox Link
Talk2Nav 2019 10714 - Sim 360° Language Link
PIE 2019 - 6 NA Front-view 2D BBox Link
UrbanLoco 2019 13 -AS & NA 360° IMU - Link
TITAN 2019 700 - AS Front-view 2D BBox Link
H3D 2019 160 0.77 NA Front-view GPS & IMU - Link
A2D2 2020 - 5.6 EU 360° GPS & IMU & CAN-bus3D BBox & 2D Seg Link
CARRADA 2020 30 0.3 NA Front-view Radar3D BBox Link
DAWN 2019 - - Global Front-view 2D BBox Link
4Seasons 2019 - - - Front-view GPS & IMU- Link
UNDD 2019 - - - Front-view 2D Seg Link
SemanticPOSS 2020 - - AS GPS & IMU 3D Seg Link
Toronto-3D 2020 4 - NA 3D Seg Link
ROAD 2021 22 - EUFront-view 2D BBox & Topology Link
Reasonable Crowd 2021 - - Sim Front-view Language Link
METEOR 2021 1250 20.9 AS Front-view GPS Language Link
PandaSet 2021 179 - NA 360° GPS & IMU 3D BBox Link
MUAD 2022 - - Sim 360° 2D Seg& 2D BBox Link
TAS-NIR 2022 - - - Front-view Infrared Camera 2D SegLink
LiDAR-CS 2022 6 - Sim 3D BBox Link
WildDash 2022 - - - Front-view 2D Seg Link
OpenScene 2023 1000 5.5 AS & NA 360° 3D Occ Link
ZOD 2023 1473 8.2 EU 360° GPS & IMU & CAN-bus 3D BBox & 2D Seg Link
nuScenes 2019 1000 5.5 AS & NA 360° GPS & CAN-bus & Radar & HDMap3D BBox & 3D Seg Link
Argoverse V1 2019 324k 320 NA 360° HDMap3D BBox & 3D Seg Link
Waymo 2019 1000 6.4 NA 360° 2D BBox & 3D BBox Link
KITTI-360 2020 366 2.5 EU 360° 3D BBox & 3D Seg Link
ONCE 2021 - 144 AS 360° 3D BBox Link
nuPlan 2021 - 120 AS & NA 360° 3D BBox Link
Argoverse V2 2022 1000 4 NA 360° HDMap3D BBox Link
DriveLM 2023 1000 5.5 AS & NA 360° Language Link
956 | 957 | 958 |

(back to top)

959 | 960 | 961 | 962 | ### Mapping 963 | 964 | 965 | 966 | 967 | 968 | 971 | 972 | 973 | 974 | 975 | 976 | 977 | 978 | 979 | 980 | 981 | 982 | 983 | 984 | 985 | 986 | 987 | 988 | 989 | 990 | 991 | 992 | 993 | 994 | 995 | 996 | 997 | 998 | 999 | 1000 | 1001 | 1002 | 1003 | 1004 | 1005 | 1006 | 1007 | 1008 | 1009 | 1010 | 1011 | 1012 | 1013 | 1014 | 1015 | 1016 | 1017 | 1018 | 1019 | 1020 | 1021 | 1022 | 1023 | 1024 | 1025 | 1026 | 1027 | 1028 | 1029 | 1030 | 1031 | 1032 | 1033 | 1034 | 1035 | 1036 | 1037 | 1038 | 1039 | 1040 | 1041 | 1042 | 1043 | 1044 | 1045 | 1046 | 1047 | 1048 | 1049 | 1050 | 1051 | 1052 | 1053 | 1054 | 1055 | 1056 | 1057 | 1058 | 1059 | 1060 | 1061 | 1062 | 1063 | 1064 | 1065 | 1066 | 1067 | 1068 | 1069 | 1070 | 1071 | 1072 | 1073 | 1074 | 1075 | 1076 | 1077 | 1078 | 1079 | 1080 | 1081 | 1082 | 1083 | 1084 | 1085 | 1086 | 1087 | 1088 | 1089 | 1090 | 1091 | 1092 | 1093 | 1094 | 1095 | 1096 | 1097 | 1098 | 1099 | 1100 | 1101 | 1102 | 1103 | 1104 | 1105 | 1106 | 1107 | 1108 | 1109 | 1110 | 1111 | 1112 | 1113 | 1114 | 1115 | 1116 | 1117 | 1118 | 1119 | 1120 | 1121 | 1122 | 1123 | 1124 | 1125 | 1126 | 1127 | 1128 | 1129 | 1130 | 1131 | 1132 | 1133 | 1134 | 1135 | 1136 | 1137 | 1138 | 1139 | 1140 | 1141 | 1142 | 1143 | 1144 | 1145 | 1146 | 1147 | 1148 | 1149 | 1150 | 1151 | 1152 | 1153 | 1154 | 1155 | 1156 | 1157 | 1158 |
Dataset 969 | Year 970 | DiversitySensorAnnotationPaper
Scenes Frames Camera Lidar Type Space Inst. Track
Caltech Lanes 20084 1224/1224 PV Link
VPG 2017- 20K/20K PV -Link
TUsimple 20176.4K 6.4K/128K PV Link
CULane 2018- 133K/133K PV -Link
ApolloScape 2018 235115K/115K PV Link
LLAMAS 201914 79K/100K Front-view Image Laneline PV Link
3D Synthetic 2020- 10K/10K PV -Link
CurveLanes 2020- 150K/150K PV -Link
VIL-100 2021 100 10K/10K PV Link
OpenLane-V1 20221K 200K/200K 3D Link
ONCE-3DLane 2022 - 211K/211K 3D -Link
OpenLane-V2 2023 2K 72K/72K Multi-view Image Lane Centerline, Lane Segment 3D Link
1159 | 1160 | 1161 |

(back to top)

1162 | 1163 | ### Prediction and Planning 1164 | 1165 | 1166 | 1167 | 1168 | 1169 | 1170 | 1171 | 1172 | 1173 | 1174 | 1175 | 1176 | 1177 | 1178 | 1179 | 1180 | 1181 | 1183 | 1184 | 1185 | 1186 | 1188 | 1189 | 1190 | 1191 | 1193 | 1194 | 1195 | 1196 | 1198 | 1199 | 1200 | 1201 | 1203 | 1204 | 1205 | 1206 | 1207 | 1208 | 1209 | 1211 | 1212 | 1213 | 1214 | 1216 | 1217 | 1218 | 1219 | 1220 | 1221 | 1222 | 1223 | 1224 | 1225 | 1226 | 1227 | 1228 | 1229 | 1230 | 1231 | 1233 | 1234 | 1235 | 1236 | 1238 | 1239 | 1240 | 1241 | 1243 | 1244 | 1245 | 1246 | 1248 | 1249 | 1250 | 1251 | 1253 | 1254 | 1255 |
Subtask InputOutputEvaluationDataset
Motion Prediction Surrounding Traffic States Spatiotemporal Trajectories of Single/Multiple Vehicle(s) Displacement Error Argoverse
nuScenes
Waymo
Interaction
MONA
Trajectory Planning Motion States for Ego Vehicles, Scenario Cognition and Prediction Trajectories for Ego Vehicles Displacement Error, Safety, Compliance, Comfort nuPlan
CARLA
MetaDrive
Apollo
Path Planning Maps for Road Network Routes Connecting to Nodes and Links Efficiency, Energy Conservation OpenStreetMap
Transportation Networks
DTAlite
PeMS
New York City Taxi Data
1256 | 1257 |

(back to top)

1258 | 1259 | 1260 | 1433 | 1434 | 1435 | 1436 | ## License 1437 | Open-sourced Data Ecosystem in Autonomous Driving is released under the [MIT license](./LICENSE). 1438 | 1439 | 1440 |

(back to top)

1441 | 1442 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DriveAGI 2 | This is **"The One"** project that [**`OpenDriveLab`**](https://opendrivelab.com/) is committed to contribute to the community, providing some thought and general picture of how to embrace `foundation models` into autonomous driving. 3 | 4 | ## Table of Contents 5 | - [NEWS](#news) 6 | - [At A Glance](#at-a-glance) 7 | - 🚀 [Vista](#vista) (NeurIPS 2024) 8 | - ⭐ [GenAD: OpenDV Dataset](#opendv) (CVPR 2024 Hightlight) 9 | - ⭐ [DriveLM](#drivelm) (ECCV 2024 Oral) 10 | - [DriveData Survey](#drivedata-survey) 11 | 13 | - [OpenScene](#openscene) 14 | - [OpenLane-V2 Update](#openlane-v2-update) 15 | 16 | 17 | 18 | ## NEWS 19 |
20 | 21 | **[ NEW❗️] `2024/09/08`** We released a mini version of `OpenDV-YouTube`, containing **25 hours** of driving videos. Feel free to try the mini subset by following instructions at [OpenDV-mini](https://github.com/OpenDriveLab/DriveAGI/blob/main/opendv/README.md)! 22 | 23 | **`2024/05/28`** We released our latest research, [Vista](#vista), a generalizable driving world model. It's capable of predicting high-fidelity and long-horizon futures, executing multi-modal actions, and serving as a generalizable reward function to assess driving behaviors. 24 | 25 | 26 | **`2024/03/24`** `OpenDV-YouTube Update:` **Full suite of toolkits for OpenDV-YouTube** is now available, including data downloading and processing scripts, as well as language annotations. Please refer to [OpenDV-YouTube](https://github.com/OpenDriveLab/DriveAGI/tree/main/opendv). 27 | 28 | **`2024/03/15`** We released the complete video list of `OpenDV-YouTube`, a large-scale driving video dataset, for [GenAD](https://arxiv.org/abs/2403.09630) project. Data downloading and processing script, as well as language annotations, will be released next week. Stay tuned. 29 | 30 | **`2024/01/24`** 31 | We are excited to announce some update to [our survey](#drivedata-survey) and would like to thank John Lambert, Klemens Esterle from the public community for their advice to improve the manuscript. 32 |
33 | 34 | 35 | ## At A Glance 36 | 37 |
38 | Here are some key components to construct a large foundation model curated for an autonomous system. 39 | 40 | ![overview](assets/overview.png "overview") 41 | 42 | 43 | Below we would like to share the latest update from our team on the **`DriveData`** side. We will release the detail of the **`DriveEngine`** and the **`DriveAGI`** in the future. 44 |
45 | 46 | ## Vista 47 |
48 |

49 | 50 |

51 |
52 | 53 | > Simulated futures in a wide range of driving scenarios by [Vista](https://arxiv.org/abs/2405.17398). Best viewed on [demo page](https://vista-demo.github.io/). 54 | 55 | ### [🌏 **A Generalizable Driving World Model with High Fidelity and Versatile Controllability**](https://arxiv.org/abs/2405.17398) (NeurIPS 2024) 56 | 57 | **Quick facts:** 58 | - Introducing the world's first **generalizable driving world model**. 59 | - Task: High-fidelity, action-conditioned, and long-horizon future prediction for driving scenes in the wild. 60 | - Dataset: [`OpenDV-YouTube`](https://github.com/OpenDriveLab/DriveAGI/tree/main/opendv), `nuScenes` 61 | - Code and model: https://github.com/OpenDriveLab/Vista 62 | - Video Demo: https://vista-demo.github.io 63 | - Related work: [Vista](https://arxiv.org/abs/2405.17398), [GenAD](https://arxiv.org/abs/2403.09630) 64 | 65 | ```bibtex 66 | @inproceedings{gao2024vista, 67 | title={Vista: A Generalizable Driving World Model with High Fidelity and Versatile Controllability}, 68 | author={Shenyuan Gao and Jiazhi Yang and Li Chen and Kashyap Chitta and Yihang Qiu and Andreas Geiger and Jun Zhang and Hongyang Li}, 69 | booktitle={Advances in Neural Information Processing Systems (NeurIPS)}, 70 | year={2024} 71 | } 72 | 73 | @inproceedings{yang2024genad, 74 | title={{Generalized Predictive Model for Autonomous Driving}}, 75 | author={Jiazhi Yang and Shenyuan Gao and Yihang Qiu and Li Chen and Tianyu Li and Bo Dai and Kashyap Chitta and Penghao Wu and Jia Zeng and Ping Luo and Jun Zhang and Andreas Geiger and Yu Qiao and Hongyang Li}, 76 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 77 | year={2024} 78 | } 79 | ``` 80 | 81 | ## GenAD: OpenDV Dataset 82 | ![opendv](assets/opendv_examples.png) 83 | > Examples of **real-world** driving scenarios in the OpenDV dataset, including urban, highway, rural scenes, etc. 84 | 85 | ### [⭐ **Generalized Predictive Model for Autonomous Driving**](https://arxiv.org/abs/2403.09630) (**CVPR 2024, Highlight**) 86 | 87 | ### [Paper](https://arxiv.org/abs/2403.09630) | [Video](https://www.youtube.com/watch?v=a4H6Jj-7IC0) | [Poster](assets/cvpr24_genad_poster.png) | [Slides](https://opendrivelab.github.io/content/GenAD_slides_with_vista.pdf) 88 | 89 | 🎦 The **Largest Driving Video dataset** to date, containing more than **1700 hours** of real-world driving videos and being 300 times larger than the widely used nuScenes dataset. 90 | 91 | 92 | - **Complete video list** (under YouTube license): [OpenDV Videos](https://docs.google.com/spreadsheets/d/1bHWWP_VXeEe5UzIG-QgKFBdH7mNlSC4GFSJkEhFnt2I). 93 | - The downloaded raw videos (`mostly 1080P`) consume about `3 TB` storage space. However, these hour-long videos cannot be directly applied for model training as they are extremely memory consuming. 94 | - Therefore, we preprocess them into conseductive images which are more flexible and efficient to load during training. Processed images consumes about `24 TB` storage space in total. 95 | - It's recommended to set up your experiments on a small subset, say **1/20** of the whole dataset. An official mini subset is also provided and you can refer to [**OpenDV-mini**](https://github.com/OpenDriveLab/DriveAGI/tree/main/opendv#about-opendv-youtube-and-opendv-mini) for details. After stablizing the training, you can then apply your method on the whole dataset and hope for the best 🤞. 96 | - **[ New❗️]** **Mini subset**: [OpenDV-mini](https://github.com/OpenDriveLab/DriveAGI/tree/main/opendv). 97 | - A mini version of `OpenDV-YouTube`. The raw videos consume about `44 GB` of storage space and the processed images will consume about `390 GB` of storage space. 98 | - **Step-by-step instruction for data preparation**: [OpenDV-YouTube](https://github.com/OpenDriveLab/DriveAGI/tree/main/opendv/README.md). 99 | - **Language annotation for OpenDV-YouTube**: [OpenDV-YouTube-Language](https://huggingface.co/datasets/OpenDriveLab/OpenDV-YouTube-Language). 100 | 101 | 102 | **Quick facts:** 103 | - Task: large-scale video prediction for driving scenes. 104 | - Data source: `YouTube`, with careful collection and filtering process. 105 | - Diversity Highlights: 1700 hours of driving videos, covering more than 244 cities in 40 countries. 106 | - Related work: [GenAD](https://arxiv.org/abs/2403.09630) **`Accepted at CVPR 2024, Highlight`** 107 | - `Note`: Annotations for other public datasets in OpenDV-2K will not be released since we randomly sampled a subset of them in training, which are incomplete and hard to trace back to their origins (i.e., file name). Nevertheless, it's easy to reproduce the collection and annotation process on your own following [our paper]((https://arxiv.org/abs/2403.09630)). 108 | 109 | ```bibtex 110 | @inproceedings{yang2024genad, 111 | title={Generalized Predictive Model for Autonomous Driving}, 112 | author={Jiazhi Yang and Shenyuan Gao and Yihang Qiu and Li Chen and Tianyu Li and Bo Dai and Kashyap Chitta and Penghao Wu and Jia Zeng and Ping Luo and Jun Zhang and Andreas Geiger and Yu Qiao and Hongyang Li}, 113 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, 114 | year={2024} 115 | } 116 | ``` 117 | 118 | ## DriveLM 119 | Introducing the First benchmark on **Language Prompt for Driving**. 120 | 121 | **Quick facts:** 122 | - Task: given the language prompts as input, predict the trajectory in the scene 123 | - Origin dataset: `nuScenes`, `CARLA (To be released)` 124 | - Repo: https://github.com/OpenDriveLab/DriveLM, https://github.com/OpenDriveLab/ELM 125 | - Related work: [DriveLM](https://arxiv.org/abs/2312.14150), [ELM](https://arxiv.org/abs/2403.04593) 126 | - Related challenge: [Driving with Language AGC Challenge 2024](https://opendrivelab.com/challenge2024/#driving_with_language) 127 | 128 | 129 | ## DriveData Survey 130 |
131 | 132 | ### Abstract 133 | With the continuous maturation and application of autonomous driving technology, a systematic examination of open-source autonomous driving datasets becomes instrumental in fostering the robust evolution of the industry ecosystem. In this survey, we provide a comprehensive analysis of more than 70 papers on the timeline, impact, challenges, and future trends in autonomous driving dataset. 134 | 135 | > **Open-sourced Data Ecosystem in Autonomous Driving: the Present and Future** 136 | > - [English Version](https://arxiv.org/abs/2312.03408) 137 | > - [Chinese Version](https://www.sciengine.com/SSI/doi/10.1360/SSI-2023-0313) **`Accepted at SCIENTIA SINICA Informationis (中文版)`** 138 | 139 | ```bib 140 | @article{li2024_driving_dataset_survey, 141 | title = {Open-sourced Data Ecosystem in Autonomous Driving: the Present and Future}, 142 | author = {Hongyang Li and Yang Li and Huijie Wang and Jia Zeng and Huilin Xu and Pinlong Cai and Li Chen and Junchi Yan and Feng Xu and Lu Xiong and Jingdong Wang and Futang Zhu and Chunjing Xu and Tiancai Wang and Fei Xia and Beipeng Mu and Zhihui Peng and Dahua Lin and Yu Qiao}, 143 | journal = {SCIENTIA SINICA Informationis}, 144 | year = {2024}, 145 | doi = {10.1360/SSI-2023-0313} 146 | } 147 | ``` 148 | 149 | 153 | 154 | ![overview](assets/Drivedata_overview.jpg "Drivedata_overview") 155 | >Current autonomous driving datasets can broadly be categorized into two generations since the 2010s. We define the Impact (y-axis) of a dataset based on sensor configuration, input modality, task category, data scale, ecosystem, etc. 156 | 157 | ![overview](assets/Drivedata_timeline.jpg "Drivedata_timeline") 158 | 159 | ### Related Work Collection 160 | 161 | We present comprehensive paper collections, leaderboards, and challenges.(Click to expand) 162 | 163 |
164 | Challenges and Leaderboards 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 388 | 389 | 390 | 391 |
TitleHostYearTaskEntry
Autonomous Driving Challenge OpenDriveLabCVPR2023Perception / OpenLane Topology 111
Perception / Online HD Map Construction
Perception / 3D Occupancy Prediction
Prediction & Planning / nuPlan Planning
Waymo Open Dataset Challenges WaymoCVPR2023Perception / 2D Video Panoptic Segmentation 35
Perception / Pose Estimation
Prediction / Motion Prediction
Prediction / Sim Agents
CVPR2022Prediction / Motion Prediction 128
Prediction / Occupancy and Flow Prediction
Perception / 3D Semantic Segmentation
Perception / 3D Camera-only Detection
CVPR2021Prediction / Motion Prediction 115
Prediction / Interaction Prediction
Perception / Real-time 3D Detection
Perception / Real-time 2D Detection
Argoverse Challenges ArgoverseCVPR2023Prediction / Multi-agent Forecasting 81
Perception & Prediction / Unified Sensorbased Detection, Tracking, and Forecasting
Perception / LiDAR Scene Flow
Prediction / 3D Occupancy Forecasting
CVPR2022Perception / 3D Object Detection 81
Prediction / Motion Forecasting
Perception / Stereo Depth Estimation
CVPR2021Perception / Stereo Depth Estimation 368
Prediction / Motion Forecasting
Perception / Streaming 2D Detection
CARLA Autonomous Driving Challenge CARLA Team, Intel2023Planning / CARLA AD Challenge 2.0 -
NeurIPS2022Planning / CARLA AD Challenge 1.0 19
NeurIPS2021Planning / CARLA AD Challenge 1.0 -
粤港澳大湾区 325 | (黄埔)国际算法算例大赛 琶洲实验室2023感知 / 跨场景单目深度估计 -
感知 / 路侧毫米波雷达标定和目标跟踪 -
2022感知 / 路侧三维感知算法 -
感知 / 街景图像店面招牌文字识别 -
AI Driving Olympics ETH Zurich, University of Montreal,Motional NeurIP2021 Perception / nuScenes Panoptic 11
ICRA2021Perception / nuScenes Detection 456
Perception / nuScenes Tracking
Prediction / nuScenes Prediction
Perception / nuScenes LiDAR Segmentation
计图 (Jittor)人工智能算法挑战赛 国家自然科学基金委信息科学部 2021 感知 / 交通标志检测 37
KITTI Vision Benchmark Suite University of Tübingen 2012 Perception / Stereo, Flow, Scene Flow, Depth, 387 | Odometry, Object, Tracking, Road, Semantics 5,610
392 |

(back to top)

393 | 394 |
395 | 396 |
397 | Perception Datasets 398 | 399 | 400 | 401 | 402 | 403 | 404 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | 654 | 655 | 657 | 658 | 659 | 660 | 661 | 662 | 663 | 664 | 665 | 666 | 667 | 668 | 669 | 671 | 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 681 | 682 | 683 | 685 | 686 | 687 | 688 | 689 | 690 | 691 | 692 | 693 | 694 | 695 | 696 | 697 | 699 | 700 | 701 | 702 | 703 | 704 | 705 | 706 | 707 | 708 | 709 | 710 | 711 | 713 | 714 | 715 | 716 | 717 | 718 | 719 | 720 | 721 | 722 | 723 | 724 | 725 | 727 | 728 | 729 | 730 | 731 | 732 | 733 | 734 | 735 | 736 | 737 | 738 | 739 | 741 | 742 | 743 | 744 | 745 | 746 | 747 | 748 | 749 | 750 | 751 | 752 | 753 | 755 | 756 | 757 | 758 | 759 | 760 | 761 | 762 | 763 | 765 | 766 | 767 | 768 | 770 | 771 | 772 | 773 | 774 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 784 | 785 | 786 | 787 | 788 | 789 | 790 | 791 | 792 | 793 | 794 | 795 | 796 | 798 | 799 | 800 | 801 | 802 | 803 | 804 | 805 | 806 | 807 | 808 | 809 | 810 | 812 | 813 | 814 | 815 | 816 | 817 | 818 | 819 | 820 | 821 | 822 | 823 | 824 | 826 | 827 | 828 | 829 | 830 | 831 | 832 | 833 | 834 | 835 | 836 | 837 | 838 | 840 | 841 | 842 | 843 | 844 | 845 | 846 | 847 | 848 | 849 | 850 | 851 | 852 | 854 | 855 | 856 | 857 | 858 | 859 | 860 | 861 | 862 | 863 | 864 | 865 | 866 | 868 | 869 | 870 | 871 | 872 | 873 | 874 | 875 | 876 | 877 | 878 | 879 | 880 | 882 | 883 | 884 | 885 | 886 | 887 | 888 | 889 | 890 | 891 | 892 | 893 | 894 | 896 | 897 | 898 | 899 | 900 | 901 | 902 | 903 | 904 | 905 | 906 | 907 | 908 | 910 | 911 | 912 | 913 | 914 | 915 | 916 | 917 | 918 | 919 | 920 | 921 | 922 | 924 | 925 | 926 | 927 | 928 | 929 | 930 | 931 | 932 | 933 | 934 | 935 | 936 | 938 | 939 | 940 | 941 | 942 | 943 | 944 | 945 | 946 | 947 | 948 | 949 | 950 | 952 | 953 | 954 | 955 | 956 | 957 | 958 | 959 | 960 | 961 | 962 | 963 | 964 | 966 | 967 | 968 | 969 | 970 | 971 | 972 | 973 | 974 | 975 | 976 | 977 | 978 | 980 | 981 | 982 | 983 | 984 | 985 | 986 | 987 | 988 | 989 | 990 | 991 | 992 | 994 | 995 | 996 | 997 | 998 | 999 | 1000 | 1001 | 1002 | 1003 | 1004 | 1005 | 1006 | 1008 | 1009 | 1010 | 1011 | 1012 | 1013 | 1014 | 1015 | 1016 | 1017 | 1018 | 1019 | 1020 | 1022 | 1023 | 1024 | 1025 | 1026 | 1027 | 1028 | 1029 | 1030 | 1031 | 1032 | 1033 | 1034 | 1036 | 1037 | 1038 | 1039 | 1040 | 1041 | 1042 | 1043 | 1044 | 1045 | 1046 | 1047 | 1048 |
DatasetYear 405 | DiversitySensorAnnotationPaper
Scenes Hours Region Camera Lidar Other
KITTI 2012 50 6 EU Font-view GPS & IMU 2D BBox & 3D BBoxLink
Cityscapes 2016 - - EU Font-view 2D SegLink
Lost and Found 2016 112 - - Font-view 2D SegLink
Mapillary 2016 - - Global Street-view 2D SegLink
DDD17 2017 36 12 EU Front-view GPS & CAN-bus & Event Camera-Link
Apolloscape 2016 103 2.5 AS Front-view GPS & IMU 3D BBox & 2D SegLink
BDD-X 2018 6984 77 NA Front-view LanguageLink
HDD 2018 - 104 NA Front-view GPS & IMU & CAN-bus 2D BBox Link
IDD 2018 182 - AS Front-view 2D Seg Link
SemanticKITTI 2019 50 6 EU 3D Seg Link
Woodscape 2019 - - Global 360° GPS & IMU & CAN-bus 3D BBox & 2D Seg Link
DrivingStereo 2019 42 - AS Front-view -Link
Brno-Urban 2019 67 10 EU Front-view GPS & IMU & Infrared Camera -Link
A*3D 2019 - 55 AS Front-view 3D BBox Link
Talk2Car 2019 850 283.3 NA Front-view Language & 3D BBox Link
Talk2Nav 2019 10714 - Sim 360° Language Link
PIE 2019 - 6 NA Front-view 2D BBox Link
UrbanLoco 2019 13 -AS & NA 360° IMU - Link
TITAN 2019 700 - AS Front-view 2D BBox Link
H3D 2019 160 0.77 NA Front-view GPS & IMU - Link
A2D2 2020 - 5.6 EU 360° GPS & IMU & CAN-bus3D BBox & 2D Seg Link
CARRADA 2020 30 0.3 NA Front-view Radar3D BBox Link
DAWN 2019 - - Global Front-view 2D BBox Link
4Seasons 2019 - - - Front-view GPS & IMU- Link
UNDD 2019 - - - Front-view 2D Seg Link
SemanticPOSS 2020 - - AS GPS & IMU 3D Seg Link
Toronto-3D 2020 4 - NA 3D Seg Link
ROAD 2021 22 - EUFront-view 2D BBox & Topology Link
Reasonable Crowd 2021 - - Sim Front-view Language Link
METEOR 2021 1250 20.9 AS Front-view GPS Language Link
PandaSet 2021 179 - NA 360° GPS & IMU 3D BBox Link
MUAD 2022 - - Sim 360° 2D Seg& 2D BBox Link
TAS-NIR 2022 - - - Front-view Infrared Camera 2D SegLink
LiDAR-CS 2022 6 - Sim 3D BBox Link
WildDash 2022 - - - Front-view 2D Seg Link
OpenScene 2023 1000 5.5 AS & NA 360° 3D Occ Link
ZOD 2023 1473 8.2 EU 360° GPS & IMU & CAN-bus 3D BBox & 2D Seg Link
nuScenes 2019 1000 5.5 AS & NA 360° GPS & CAN-bus & Radar & HDMap3D BBox & 3D Seg Link
Argoverse V1 2019 324k 320 NA 360° HDMap3D BBox & 3D Seg Link
Waymo 2019 1000 6.4 NA 360° 2D BBox & 3D BBox Link
KITTI-360 2020 366 2.5 EU 360° 3D BBox & 3D Seg Link
ONCE 2021 - 144 AS 360° 3D BBox Link
nuPlan 2021 - 120 AS & NA 360° 3D BBox Link
Argoverse V2 2022 1000 4 NA 360° HDMap3D BBox Link
DriveLM 2023 1000 5.5 AS & NA 360° Language Link
1049 | 1050 | 1051 |

(back to top)

1052 |
1053 | 1054 |
1055 | Mapping Datasets 1056 | 1057 | 1058 | 1059 | 1060 | 1061 | 1064 | 1065 | 1066 | 1067 | 1068 | 1069 | 1070 | 1071 | 1072 | 1073 | 1074 | 1075 | 1076 | 1077 | 1078 | 1079 | 1080 | 1081 | 1082 | 1083 | 1084 | 1085 | 1086 | 1087 | 1088 | 1089 | 1090 | 1091 | 1092 | 1093 | 1094 | 1095 | 1096 | 1097 | 1098 | 1099 | 1100 | 1101 | 1102 | 1103 | 1104 | 1105 | 1106 | 1107 | 1108 | 1109 | 1110 | 1111 | 1112 | 1113 | 1114 | 1115 | 1116 | 1117 | 1118 | 1119 | 1120 | 1121 | 1122 | 1123 | 1124 | 1125 | 1126 | 1127 | 1128 | 1129 | 1130 | 1131 | 1132 | 1133 | 1134 | 1135 | 1136 | 1137 | 1138 | 1139 | 1140 | 1141 | 1142 | 1143 | 1144 | 1145 | 1146 | 1147 | 1148 | 1149 | 1150 | 1151 | 1152 | 1153 | 1154 | 1155 | 1156 | 1157 | 1158 | 1159 | 1160 | 1161 | 1162 | 1163 | 1164 | 1165 | 1166 | 1167 | 1168 | 1169 | 1170 | 1171 | 1172 | 1173 | 1174 | 1175 | 1176 | 1177 | 1178 | 1179 | 1180 | 1181 | 1182 | 1183 | 1184 | 1185 | 1186 | 1187 | 1188 | 1189 | 1190 | 1191 | 1192 | 1193 | 1194 | 1195 | 1196 | 1197 | 1198 | 1199 | 1200 | 1201 | 1202 | 1203 | 1204 | 1205 | 1206 | 1207 | 1208 | 1209 | 1210 | 1211 | 1212 | 1213 | 1214 | 1215 | 1216 | 1217 | 1218 | 1219 | 1220 | 1221 | 1222 | 1223 | 1224 | 1225 | 1226 | 1227 | 1228 | 1229 | 1230 | 1231 | 1232 | 1233 | 1234 | 1235 | 1236 | 1237 | 1238 | 1239 | 1240 | 1241 | 1242 | 1243 | 1244 | 1245 | 1246 | 1247 | 1248 | 1249 | 1250 | 1251 |
Dataset 1062 | Year 1063 | DiversitySensorAnnotationPaper
Scenes Frames Camera Lidar Type Space Inst. Track
Caltech Lanes 20084 1224/1224 PV Link
VPG 2017- 20K/20K PV -Link
TUsimple 20176.4K 6.4K/128K PV Link
CULane 2018- 133K/133K PV -Link
ApolloScape 2018 235115K/115K PV Link
LLAMAS 201914 79K/100K Front-view Image Laneline PV Link
3D Synthetic 2020- 10K/10K PV -Link
CurveLanes 2020- 150K/150K PV -Link
VIL-100 2021 100 10K/10K PV Link
OpenLane-V1 20221K 200K/200K 3D Link
ONCE-3DLane 2022 - 211K/211K 3D -Link
OpenLane-V2 2023 2K 72K/72K Multi-view Image Lane Centerline, Lane Segment 3D Link
1252 | 1253 |
1254 |
1255 | Prediction and Planning Datasets 1256 | 1257 | 1258 | 1259 | 1260 | 1261 | 1262 | 1263 | 1264 | 1265 | 1266 | 1267 | 1268 | 1269 | 1270 | 1271 | 1272 | 1273 | 1275 | 1276 | 1277 | 1278 | 1280 | 1281 | 1282 | 1283 | 1285 | 1286 | 1287 | 1288 | 1290 | 1291 | 1292 | 1293 | 1295 | 1296 | 1297 | 1298 | 1299 | 1300 | 1301 | 1303 | 1304 | 1305 | 1306 | 1308 | 1309 | 1310 | 1311 | 1312 | 1313 | 1314 | 1315 | 1316 | 1317 | 1318 | 1319 | 1320 | 1321 | 1322 | 1323 | 1325 | 1326 | 1327 | 1328 | 1330 | 1331 | 1332 | 1333 | 1335 | 1336 | 1337 | 1338 | 1340 | 1341 | 1342 | 1343 | 1345 | 1346 | 1347 |
Subtask InputOutputEvaluationDataset
Motion Prediction Surrounding Traffic States Spatiotemporal Trajectories of Single/Multiple Vehicle(s) Displacement Error Argoverse
nuScenes
Waymo
Interaction
MONA
Trajectory Planning Motion States for Ego Vehicles, Scenario Cognition and Prediction Trajectories for Ego Vehicles Displacement Error, Safety, Compliance, Comfort nuPlan
CARLA
MetaDrive
Apollo
Path Planning Maps for Road Network Routes Connecting to Nodes and Links Efficiency, Energy Conservation OpenStreetMap
Transportation Networks
DTAlite
PeMS
New York City Taxi Data
1348 |
1349 |
1350 | 1351 | 1352 | 1353 | ## OpenScene 1354 |
1355 | 1356 | The Largest up-to-date **3D Occupancy Forecasting** dataset for visual pre-training. 1357 | 1358 | **Quick facts:** 1359 | - Task: given the large amount of data, predict the 3D occupancy in the environment. 1360 | - Origin dataset: `nuPlan` 1361 | - Repo: https://github.com/OpenDriveLab/OpenScene 1362 | - Related work: [OccNet](https://github.com/OpenDriveLab/OccNet) 1363 | - Related challenge: [3D Occupancy Prediction Challenge 2023](https://opendrivelab.com/AD23Challenge.html#Track3), [Occupancy and Flow AGC Challenge 2024](https://opendrivelab.com/challenge2024/#occupancy_and_flow), [Predictive World Model AGC Challenge 2024](https://opendrivelab.com/challenge2024/#predictive_world_model) 1364 |
1365 | 1366 | ## OpenLane-V2 Update 1367 |
1368 | 1369 | Flourishing [OpenLane-V2](https://github.com/OpenDriveLab/OpenLane-V2) with **Standard Definition (SD) Map and Map Elements**. 1370 | 1371 | **Quick facts:** 1372 | - Task: given multi-view images and SD-map (also known as ADAS map) as input, build the driving scene on the fly _without_ the aid of HD-map. 1373 | - Repo: https://github.com/OpenDriveLab/OpenLane-V2 1374 | - Related work: [OpenLane-V2](https://openreview.net/forum?id=OMOOO3ls6g), [TopoNet](https://github.com/OpenDriveLab/TopoNet), [LaneSegNet](https://github.com/OpenDriveLab/LaneSegNet) 1375 | - Related challenge: [Lane Topology Challenge 2023](https://opendrivelab.com/AD23Challenge.html#openlane_topology), [Mapless Driving AGC Challenge 2024](https://opendrivelab.com/challenge2024/#mapless_driving) 1376 |
1377 | 1378 | 1379 | -------------------------------------------------------------------------------- /assets/Affiliation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/Affiliation.png -------------------------------------------------------------------------------- /assets/Drivedata_overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/Drivedata_overview.jpg -------------------------------------------------------------------------------- /assets/Drivedata_timeline.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/Drivedata_timeline.jpg -------------------------------------------------------------------------------- /assets/cvpr24_genad_poster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/cvpr24_genad_poster.png -------------------------------------------------------------------------------- /assets/opendv_examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/opendv_examples.png -------------------------------------------------------------------------------- /assets/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/overview.png -------------------------------------------------------------------------------- /assets/vista-teaser.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenDriveLab/DriveAGI/890666febf54909d804243134308001a8401b5ed/assets/vista-teaser.gif -------------------------------------------------------------------------------- /opendv/.gitignore: -------------------------------------------------------------------------------- 1 | # full OpenDV-YouTube dataset 2 | *meta/ 3 | OpenDV-YouTube/ 4 | annos 5 | 6 | # logs 7 | *exceptions.txt 8 | *output.txt 9 | *finished.txt 10 | 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | 16 | # C extensions 17 | *.so 18 | 19 | # Distribution / packaging 20 | .Python 21 | build/ 22 | develop-eggs/ 23 | dist/ 24 | downloads/ 25 | eggs/ 26 | .eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | wheels/ 33 | pip-wheel-metadata/ 34 | share/python-wheels/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | MANIFEST 39 | 40 | # PyInstaller 41 | # Usually these files are written by a python script from a template 42 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 43 | *.manifest 44 | *.spec 45 | 46 | # Installer logs 47 | pip-log.txt 48 | pip-delete-this-directory.txt 49 | 50 | # Unit test / coverage reports 51 | htmlcov/ 52 | .tox/ 53 | .nox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | *.cover 60 | *.py,cover 61 | .hypothesis/ 62 | .pytest_cache/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | db.sqlite3 72 | db.sqlite3-journal 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | target/ 86 | 87 | # Jupyter Notebook 88 | .ipynb_checkpoints 89 | 90 | # IPython 91 | profile_default/ 92 | ipython_config.py 93 | 94 | # pyenv 95 | .python-version 96 | 97 | # pipenv 98 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 99 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 100 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 101 | # install all needed dependencies. 102 | #Pipfile.lock 103 | 104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 105 | __pypackages__/ 106 | 107 | # Celery stuff 108 | celerybeat-schedule 109 | celerybeat.pid 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # Environments 115 | .env 116 | .venv 117 | env/ 118 | venv/ 119 | ENV/ 120 | env.bak/ 121 | venv.bak/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | .dmypy.json 136 | dmypy.json 137 | 138 | # Pyre type checker 139 | .pyre/ 140 | 141 | .gitconfig 142 | .local 143 | .jupyter 144 | .DS_Store 145 | .python_history 146 | 147 | data/OpenLane-V2/* 148 | !data/OpenLane-V2/data_dict_sample.json 149 | !data/OpenLane-V2/data_dict_example.json 150 | !data/OpenLane-V2/openlanev2.md5 151 | !data/OpenLane-V2/preprocess* 152 | !data/OpenLane-V2/data_dict_subset_A.json 153 | 154 | RoadData/vis 155 | RoadData/gt_results.json 156 | RoadDataTool/vis 157 | RoadDataTool/gt_result.json 158 | RoadDataTool/pred_case1_no_turbulence.json 159 | -------------------------------------------------------------------------------- /opendv/README.md: -------------------------------------------------------------------------------- 1 | # OpenDV-YouTube 2 | Due to YouTube License, we could not directly offer our processed data. However, you can follow the steps below to download the raw data and process it by yourself. 3 | 4 | **[ NEW❗️]**: We just released the **OpenDV-mini** subset! 5 | Please feel free to try the mini subset by following steps. Necessary information is also contained in our OpenDV-YouTube Google Sheet (marked as `Mini` in the column `Mini / Full Set`). 6 | 7 | ## About OpenDV-YouTube and OpenDV-mini 8 | 9 | - The complete dataset OpenDV-YouTube is the **largest driving video dataset** to date, containing more than **1700 hours** of real-world driving videos and being 300 times larger than the widely used nuScenes dataset. 10 | 11 | - The mini subset, OpenDV-mini, contains about **28 hours** of videos, with diverse geographical distribution and various camera settings. Among these videos, **25 hours** are used as `mini-train` and the other **3 hours** are used as `mini-val`. 12 | 13 | ## Environment Setup 14 | 15 | **We recommend to process the dataset in `Linux` environment since `Windows` may have issues with the file paths.** 16 | 17 | Install the required packages by running the following command. 18 | 19 | ```cmd 20 | conda create -n opendv python=3.10 -y 21 | conda activate opendv 22 | pip install -r requirements.txt 23 | ``` 24 | 25 | In case the meta data of videos downloaded are fragmented, we recommend installing `ffmpeg<=3.4.9`. Instead of using the following commands, you can also directly clone and build from [their official repository](https://github.com/FFmpeg/FFmpeg/tree/release/3.4). 26 | 27 | ```cmd 28 | # 1. prepare yasm for ffmpeg. If it is already satisfied by your machine, skip to the next step. 29 | wget https://tortall.net/projects/yasm/releases/yasm-1.3.0.tar.gz 30 | tar -xzvf yasm-1.3.0.tar.gz 31 | cd yasm-1.3.0 32 | ./configure 33 | make 34 | make install 35 | 36 | # 2. install ffmpeg<=3.4.9. 37 | wget https://ffmpeg.org/releases/ffmpeg-3.4.9.tar.gz 38 | tar -xzvf ffmpeg-3.4.9.tar.gz 39 | cd ffmpeg-3.4.9 40 | ./configure 41 | make 42 | make install 43 | 44 | # 3. check the installation. Sometimes you may need to reactivate the conda environment to see it working. 45 | ffprobe 46 | ``` 47 | 48 | ## Meta Data Preparation 49 | First, download the OpenDV-YouTube Google Sheet as a `csv` file. For default setting, you should save the file as `meta/OpenDV-YouTube.csv`. You could change it to whatever path you want as long as you change the `csv_path` in the command in the next step. 50 | 51 | Then, run the following command to preprocess the meta data. The default value for `--csv_path` (or `-i`) and `--json_path` (or `-o`) are `meta/OpenDV-YouTube.csv` and `meta/OpenDV-YouTube.json` respectively. 52 | 53 | ```cmd 54 | python scripts/meta_preprocess.py -i CSV_PATH -o JSON_PATH 55 | ``` 56 | 57 | ## Raw Data Download (Raw videos) 58 | 59 | To download the raw data from YouTube, you should first change the configures in `configs/download.json`. 60 | 61 | Note that the script **supports multi-threading download**, so please set the `num_workers` to a proper value according to your hardware and network condition. 62 | 63 | Also, the `format` key in the config file **should strictly obey** the format selection rules of the `youtube-dl` package. We do not recommend changing it unless you are familiar with the package. 64 | 65 | Now you can run the following command to download the raw video data. 66 | 67 | ```cmd 68 | python scripts/youtube_download.py >> download_output.txt 69 | ``` 70 | 71 | The download will take about $2000/\mathrm{NUM_{WORKERS}}$ hours, which may vary your network condition. 72 | The default $\mathrm{NUM_{WORKERS}} = 90$, and you can adjust it in [config](configs/download.json#L7). 73 | The data will take about **3TB** of disk space. 74 | 75 | If you wish to **use the mini subset**, just simply add the `mini` option in your command, i.e. run the following command. 76 | 77 | ```cmd 78 | python scripts/youtube_download.py --mini >> download_output.txt 79 | ``` 80 | 81 | You may refer to the `download_exceptions.txt` to check whether the download is successful or not. The file will be automatically generated by the script in the root of the `opendv` codebase. 82 | 83 | If downloading with `youtube-dl` is not successful, you can change the `method` in [config](configs/download.json#L4) from `youtube-dl` to `yt-dlp`. 84 | 85 | ## Data Preprocessing (Converting videos to images) 86 | 87 | When the download is finished, you can first set the configures in `configs/video2img.json` to those you expect. The script also **supports multi-threading processing**, so you can set the `num_workers` to a proper value according to your hardware condition. 88 | 89 | Note that if you want to align with the annotations we provide, `frame_rate` **should not be changed.** 90 | 91 | Then, you can run the following command to preprocess the raw video data. 92 | 93 | ```cmd 94 | python scripts/video2img.py >> vid2img_output.txt 95 | ``` 96 | 97 | The preprocessing will take about $8000/\mathrm{NUM_{WORKERS}}$ hours, which may vary your network condition. 98 | The default $\mathrm{NUM_{WORKERS}} = 90$, and you can adjust it in [config](configs/video2img.json#L6). 99 | Resulting images will take about **25TB** of disk space. 100 | 101 | If you wish to **use the mini subset**, just simply add the `mini` option in your command, i.e. run the following command. 102 | 103 | ```cmd 104 | python scripts/video2img.py --mini >> vid2img_output.txt 105 | ``` 106 | 107 | You may refer to the `vid2img_exceptions.txt` to check the status. 108 | 109 | ## Language Annotations 110 | 111 | The full annotation data, including **commands** and **contexts** of video clips, is available at OpenDV-YouTube-Language. The files are in `json` format, with total size of about **14GB**. 112 | 113 | The annotation data is aligned with the structure of the preprocessed data. You can use the following code to load in annotations respectively. 114 | 115 | ```python 116 | import json 117 | 118 | # for train 119 | full_annos = [] 120 | for split_id in range(10): 121 | split = json.load(open("10hz_YouTube_train_split{}.json".format(str(split_id)), "r")) 122 | full_annos.extend(split) 123 | 124 | # for val 125 | val_annos = json.load(open("10hz_YouTube_val.json", "r")) 126 | ``` 127 | 128 | Annotations will be loaded in `full_annos` as a list where each element contains annotations for one video clip. All elements in the list are dictionaries of the following structure. 129 | 130 | ```python 131 | { 132 | "cmd": -- command, i.e. the command of the ego vehicle in the video clip. 133 | "blip": -- context, i.e. the BLIP description of the center frame in the video clip. 134 | "folder": -- the relative path from the processed OpenDV-YouTube dataset root to the image folder of the video clip. 135 | "first_frame": -- the filename of the first frame in the clip. Note that this file is included in the video clip. 136 | "last_frame": -- the filename of the last frame in the clip. Note that this file is included in the video clip. 137 | } 138 | ``` 139 | 140 | The command, *i.e.* the `cmd` field, can be converted to natural language using the `map_category_to_caption` function. You may refer to [cmd2caption.py](utils/cmd2caption.py#L158) for details. 141 | 142 | The context, *i.e.* the `blip` field, is the description of the **center frame** in the video generated by `BLIP2`. 143 | 144 | 145 | ## Citation 146 | 147 | If you find our work helpful, please cite the following paper. 148 | 149 | ```bibtex 150 | @inproceedings{yang2024genad, 151 | title={Generalized Predictive Model for Autonomous Driving}, 152 | author={Jiazhi Yang and Shenyuan Gao and Yihang Qiu and Li Chen and Tianyu Li and Bo Dai and Kashyap Chitta and Penghao Wu and Jia Zeng and Ping Luo and Jun Zhang and Andreas Geiger and Yu Qiao and Hongyang Li}, 153 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, 154 | year={2024} 155 | } 156 | ``` -------------------------------------------------------------------------------- /opendv/configs/download.json: -------------------------------------------------------------------------------- 1 | { 2 | "root": "OpenDV-YouTube/videos", 3 | "video_list": "meta/OpenDV-YouTube.json", 4 | "method": "yt-dlp", 5 | "format": "bestvideo[height>=720,height<=1080]/best[height>=720,height<=1080]/bestvideo[height>=720]/best[height>=720]", 6 | "format_for_ytdlp": "bv*[height<=?1080][height>=720]/b*[height<=?1080][height>=720]", 7 | "num_workers": 90, 8 | "exception_file": "download_exceptions.txt" 9 | } -------------------------------------------------------------------------------- /opendv/configs/video2img.json: -------------------------------------------------------------------------------- 1 | { 2 | "video_root": "OpenDV-YouTube/videos", 3 | "train_img_root": "OpenDV-YouTube/full_images", 4 | "val_img_root": "OpenDV-YouTube/val_images", 5 | "meta_info": "meta/OpenDV-YouTube.json", 6 | "num_workers": 90, 7 | "frame_rate": 10, 8 | "exception_file": "vid2img_exceptions.txt", 9 | "finish_log": "vid2img_finished.txt" 10 | } -------------------------------------------------------------------------------- /opendv/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/ytdl-org/youtube-dl 2 | git+https://github.com/yt-dlp/yt-dlp@a065086640e888e8d58c615d52ed2f4f4e4c9d18 3 | 4 | opencv-python 5 | decord 6 | tqdm 7 | pandas -------------------------------------------------------------------------------- /opendv/scripts/meta_preprocess.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script is used for preprocessing OpenDV-YouTube meta data, from Google sheet (as csv file) to json file. 3 | The script is a part of the [`GenAD`](https://arxiv.org/abs/2403.09630) project. 4 | """ 5 | 6 | import json 7 | import pandas as pd 8 | import numpy as np 9 | import argparse 10 | from tqdm import tqdm 11 | 12 | KEY_MAP = { 13 | 'train / val': 'split', 14 | 'mini / full set': 'subset', 15 | 'nation or area (inferred by gpt)': 'area', 16 | 'state, province, or city (inferred by gpt and refined by human)': 'state', 17 | 'discarded length at the begininning (second)': 'start_discard', 18 | 'discarded length at the ending (second)': 'end_discard' 19 | } 20 | 21 | SPECIFIC_TYPE_MAP = { 22 | 'state': str 23 | } 24 | 25 | def duration2length(duration): 26 | """ 27 | duration: HH:MM:SS, or MM:SS 28 | length: int (seconds) 29 | """ 30 | duration = duration.split(":") 31 | length = int(duration[0]) * 60 + int(duration[1]) 32 | if len(duration) == 3: 33 | length = length * 60 + int(duration[2]) 34 | return length 35 | 36 | 37 | def csv2json(csv_path, json_path): 38 | df = pd.read_csv(csv_path) 39 | vid_list = [] 40 | keys = df.keys() 41 | for vid_id in tqdm(range(len(df["ID"]))): 42 | vid_info = dict() 43 | for key in keys: 44 | value = df[key][vid_id] 45 | assigned_key = KEY_MAP.get(key.lower(), key.lower()) 46 | if assigned_key in SPECIFIC_TYPE_MAP: 47 | value = SPECIFIC_TYPE_MAP[assigned_key](value) 48 | if isinstance(value, np.int64): 49 | value = int(value) 50 | elif value == "nan": 51 | value = "N/A" 52 | vid_info[assigned_key] = value 53 | 54 | vid_info["length"] = duration2length(vid_info["duration"]) 55 | vid_list.append(vid_info) 56 | 57 | with open(json_path, "w") as f: 58 | json.dump(vid_list, f, indent=4, ensure_ascii=True) 59 | 60 | 61 | if __name__ == "__main__": 62 | parser = argparse.ArgumentParser(description='Convert OpenDV-YouTube meta data from csv to json') 63 | parser.add_argument('--csv_path', '-i', type=str, default="meta/OpenDV-YouTube.csv", help='path to the csv file') 64 | parser.add_argument('--json_path', '-o', type=str, default="meta/OpenDV-YouTube.json", help='path to the json file') 65 | args = parser.parse_args() 66 | 67 | csv2json(args.csv_path, args.json_path) -------------------------------------------------------------------------------- /opendv/scripts/video2img.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script is used for preprocessing OpenDV-YouTube meta data, from raw video files to image files. 3 | The script is a part of the [`GenAD`](https://arxiv.org/abs/2403.09630) project. 4 | """ 5 | 6 | import json 7 | import os, sys 8 | import time 9 | import argparse 10 | from multiprocessing import Pool 11 | 12 | from tqdm import tqdm 13 | 14 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 15 | sys.path.append(root_dir) 16 | 17 | from utils.easydict import EasyDict 18 | from utils.frame_extraction import extract_frames 19 | from utils.download import POSSIBLE_EXTS, youtuber_formatize, get_mini_opendv 20 | 21 | 22 | def collect_unfinished_videos(config, mini=False): 23 | configs = EasyDict(json.load(open(config, "r"))) 24 | root = { 25 | "train": configs.train_img_root, 26 | "val": configs.val_img_root 27 | } 28 | 29 | meta_infos = json.load(open(configs.meta_info, "r")) 30 | if mini: 31 | meta_infos = get_mini_opendv(meta_infos) 32 | if os.path.exists(configs.finish_log): 33 | finish_log = set(open(configs.finish_log, "r").readlines()) 34 | finish_log = {x.strip() for x in finish_log} 35 | else: 36 | finish_log = set() 37 | 38 | unfinished_videos = [] 39 | print("collecting unfinished videos...") 40 | for video_meta in tqdm(meta_infos): 41 | if video_meta["videoid"] in finish_log: 42 | continue 43 | video_path = os.path.join(configs.video_root, youtuber_formatize(video_meta["youtuber"]), video_meta['videoid']) 44 | for ext in POSSIBLE_EXTS: 45 | if os.path.exists(f"{video_path}.{ext}"): 46 | break 47 | if not os.path.exists(f"{video_path}.{ext}"): 48 | raise ValueError(f"Video {video_meta['videoid']} not found. maybe something wrong in the download process?") 49 | 50 | video_info = { 51 | "video_id": video_meta["videoid"], 52 | "video_path": f"{video_path}.{ext}", 53 | "output_dir": os.path.join(root[video_meta["split"].lower()], youtuber_formatize(video_meta["youtuber"]), video_meta['videoid']), 54 | "freq": configs.frame_rate, 55 | "start_discard": video_meta["start_discard"], 56 | "end_discard": video_meta["end_discard"], 57 | "exception_file": configs.exception_file, 58 | "finish_log": configs.finish_log 59 | } 60 | unfinished_videos.append(video_info) 61 | 62 | return unfinished_videos, EasyDict(configs) 63 | 64 | 65 | def convert_multiprocess(video_lists, configs): 66 | video_count = len(video_lists) 67 | with Pool(configs.num_workers) as p: 68 | current_time = time.perf_counter() 69 | for _ in tqdm(p.imap(extract_frames, video_lists), total=video_count): 70 | pass 71 | 72 | 73 | if __name__ == '__main__': 74 | parser = argparse.ArgumentParser() 75 | parser.add_argument('--config', type=str, default='configs/video2img.json') 76 | parser.add_argument('--mini', action='store_true', default=False, help='Convert mini dataset only.') 77 | # parser.add_argument('--start_id', type=int, default=0) 78 | # parser.add_argument('--end_id', type=int, default=-1) 79 | # parser.add_argument('--test_video', type=str, default=None) 80 | 81 | args = parser.parse_args() 82 | video_lists, meta_configs = collect_unfinished_videos(args.config, args.mini) 83 | 84 | # if args.end_id == -1: 85 | # args.end_id = len(video_lists) 86 | # video_lists = video_lists[args.start_id:args.end_id] 87 | # if args.test_video is not None: 88 | # convert_multiprocess([{**video_lists[0], "video_path": args.test_video}], meta_config) 89 | # exit(0) 90 | 91 | convert_multiprocess(video_lists, meta_configs) -------------------------------------------------------------------------------- /opendv/scripts/youtube_download.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script is used for downloading OpenDV-YouTube raw data. 3 | The script is a part of the [`GenAD`](https://arxiv.org/abs/2403.09630) project. 4 | """ 5 | 6 | from multiprocessing import Pool 7 | from tqdm import tqdm 8 | import os, sys 9 | import time 10 | import json 11 | import cv2 12 | 13 | root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 14 | sys.path.append(root_dir) 15 | 16 | from utils.easydict import EasyDict 17 | from utils.download import youtuber_formatize, POSSIBLE_EXTS, get_video_with_meta, get_mini_opendv 18 | 19 | CONFIGS = dict() 20 | 21 | def single_download(args): 22 | vid_info, CONFIGS = args 23 | 24 | url = vid_info["link"] 25 | filename = vid_info["videoid"] 26 | folder = youtuber_formatize(vid_info["youtuber"]) 27 | path = os.path.join(CONFIGS.root, folder) 28 | 29 | for ext in POSSIBLE_EXTS: 30 | if os.path.exists(f"{path}/{filename}.{ext}"): 31 | print(f"Video {filename} already exists in {path}. Skipping...") 32 | return 33 | if not os.path.exists(path): 34 | os.makedirs(path, exist_ok=True) 35 | 36 | try: 37 | ret = os.system(f"{CONFIGS.method} -f '{CONFIGS.format}' -o '{path}/{filename}.%(ext)s' {url}") 38 | if ret != 0: 39 | raise Exception("ERROR: Video unavailable or network error.") 40 | except Exception as e: 41 | with open(CONFIGS.exception_file, "a") as f: 42 | f.write("Error downloading video [{}]: {}\n".format(filename, e)) 43 | return 44 | 45 | 46 | def multiple_download(video_list, configs): 47 | global CONFIGS 48 | 49 | video_count = len(video_list) 50 | CONFIGS["method"] = configs["method"] 51 | assert CONFIGS["method"] in ["youtube-dl", "yt-dlp"], "Only support `youtube-dl` and `yt-dlp`." 52 | CONFIGS["format"] = configs["format"] if configs["method"] == "youtube-dl" else configs["format_for_ytdlp"] 53 | CONFIGS["root"] = configs.root 54 | CONFIGS["exception_file"] = configs.exception_file 55 | CONFIGS = EasyDict(CONFIGS) 56 | finished = 0 57 | with Pool(configs.num_workers) as p: 58 | current_time = time.perf_counter() 59 | for _ in tqdm(p.imap(single_download, [(vid_info, CONFIGS) for vid_info in video_list]), total=video_count): 60 | finished += 1 61 | working_time = time.perf_counter() - current_time 62 | eta = working_time / finished * (video_count - finished) 63 | eta = time.strftime("%H:%M:%S", time.gmtime(eta)) 64 | print("Finished {}/{} videos. ETA: {}.".format(finished, video_count, eta)) 65 | 66 | 67 | def check_status(video_list, configs): 68 | if "exception_file" not in configs: 69 | print("No exception file specified. Skipping...") 70 | return 71 | 72 | print("Checking download status...") 73 | with open(configs.exception_file, "a") as f: 74 | f.write("\n\nChecking download status...\n") 75 | 76 | for vid_info in tqdm(video_list): 77 | exists = False 78 | path = os.path.join(configs.root, youtuber_formatize(vid_info["youtuber"])) 79 | for ext in POSSIBLE_EXTS: 80 | if os.path.exists("{}/{}.{}".format(path, vid_info["videoid"], ext)): 81 | exists = True 82 | break 83 | if not exists: 84 | with open(configs.exception_file, "a") as f: 85 | f.write(f"Video [{vid_info['videoid']}] not found in [{path}].\n") 86 | continue 87 | 88 | _, true_duration = get_video_with_meta("{}/{}.{}".format(path, vid_info["videoid"], ext), ["duration"]) 89 | 90 | duration_in_json = vid_info["duration"] 91 | expected_duration = vid_info["length"] 92 | 93 | if abs(true_duration - expected_duration) > 5: 94 | with open(configs.exception_file, "a") as f: 95 | f.write(f"Video [{vid_info['videoid']}]: Duration mismatch. Expected: {duration_in_json} ({expected_duration} seconds), True: {true_duration} seconds.\n") 96 | 97 | with open(configs.exception_file, "a") as f: 98 | f.write("\nChecking download status finished.") 99 | 100 | 101 | if __name__ == '__main__': 102 | import argparse 103 | parser = argparse.ArgumentParser() 104 | parser.add_argument("--config", type=str, default="configs/download.json", help="Path to the config file. should be a `json` file.") 105 | parser.add_argument("--mini", action="store_true", default=False, help="Download mini dataset only.") 106 | args = parser.parse_args() 107 | 108 | configs = EasyDict(json.load(open(args.config, "r"))) 109 | with open(configs.exception_file, "w") as f: 110 | f.write("") 111 | 112 | video_list = json.load(open(configs.pop("video_list"), "r")) 113 | if args.mini: 114 | video_list = get_mini_opendv(video_list) 115 | if not os.path.exists(configs.root): 116 | os.makedirs(configs.root, exist_ok=True) 117 | 118 | multiple_download(video_list, configs) 119 | check_status(video_list, configs) -------------------------------------------------------------------------------- /opendv/utils/cmd2caption.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | plain_caption_dict = { 4 | 0: "Go straight.", 5 | 1: "Pass the intersection.", 6 | 2: "Turn left.", 7 | 3: "Turn right.", 8 | 4: "Change to the left lane.", 9 | 5: "Change to the right lane.", 10 | 6: "Go to the left lane branch.", 11 | 7: "Go to the right lane branch.", 12 | 8: "Pass the crosswalk.", 13 | 9: "Pass the railroad.", 14 | 10: "Merge.", 15 | 11: "Make a U-turn.", 16 | 12: "Stop.", 17 | 13: "Deviate." 18 | } 19 | 20 | diverse_caption_dict = { 21 | 0: [ 22 | "Move forward.", 23 | "Move steady.", 24 | "Go forward.", 25 | "Go straight.", 26 | "Proceed.", 27 | "Drive forward.", 28 | "Drive straight.", 29 | "Drive steady.", 30 | "Keep the direction.", 31 | "Maintain the direction.", 32 | ], 33 | 1: [ 34 | "Pass the intersection.", 35 | "Cross the intersection.", 36 | "Traverse the intersection.", 37 | "Drive through the intersection.", 38 | "Move past the intersection.", 39 | "Pass the junction.", 40 | "Cross the junction.", 41 | "Traverse the junction.", 42 | "Drive through the junction.", 43 | "Move past the junction.", 44 | "Pass the crossroad.", 45 | "Cross the crossroad.", 46 | "Traverse the crossroad.", 47 | "Drive through the crossroad.", 48 | "Move past the crossroad.", 49 | ], 50 | 2: [ 51 | "Turn left.", 52 | "Turn to the left.", 53 | "Make a left turn.", 54 | "Take a left turn.", 55 | "Turn to the left.", 56 | "Left turn.", 57 | "Steer left.", 58 | "Steer to the left.", 59 | ], 60 | 3: [ 61 | "Turn right.", 62 | "Turn to the right.", 63 | "Make a right turn.", 64 | "Take a right turn.", 65 | "Turn to the right.", 66 | "Right turn.", 67 | "Steer right.", 68 | "Steer to the right.", 69 | ], 70 | 4: [ 71 | "Make a left lane change.", 72 | "Change to the left lane.", 73 | "Switch to the left lane.", 74 | "Shift to the left lane.", 75 | "Move to the left lane.", 76 | ], 77 | 5: [ 78 | "Make a right lane change.", 79 | "Change to the right lane.", 80 | "Switch to the right lane.", 81 | "Shift to the right lane.", 82 | "Move to the right lane.", 83 | ], 84 | 6: [ 85 | "Go to the left lane branch.", 86 | "Take the left lane branch.", 87 | "Move into the left lane branch.", 88 | "Follow the left lane branch.", 89 | "Follow the left side road.", 90 | ], 91 | 7: [ 92 | "Go to the right lane branch.", 93 | "Take the right lane branch.", 94 | "Move into the right lane branch.", 95 | "Follow the right lane branch.", 96 | "Follow the right side road.", 97 | ], 98 | 8: [ 99 | "Pass the crosswalk.", 100 | "Cross the crosswalk.", 101 | "Traverse the crosswalk.", 102 | "Drive through the crosswalk.", 103 | "Move past the crosswalk.", 104 | "Pass the crossing area.", 105 | "Cross the crossing area.", 106 | "Traverse the crossing area.", 107 | "Drive through the crossing area.", 108 | "Move past the crossing area.", 109 | ], 110 | 9: [ 111 | "Pass the railroad.", 112 | "Cross the railroad.", 113 | "Traverse the railroad.", 114 | "Drive through the railroad.", 115 | "Move past the railroad.", 116 | "Pass the railway.", 117 | "Cross the railway.", 118 | "Traverse the railway.", 119 | "Drive through the railway.", 120 | "Move past the railway.", 121 | ], 122 | 10: [ 123 | "Merge.", 124 | "Merge traffic.", 125 | "Merge into traffic.", 126 | "Merge into the traffic.", 127 | "Join the traffic.", 128 | "Merge into the traffic flow.", 129 | "Join the traffic flow.", 130 | "Merge into the traffic stream.", 131 | "Join the traffic stream.", 132 | "Merge into the lane.", 133 | ], 134 | 11: [ 135 | "Make a U-turn.", 136 | "Make a 180-degree turn.", 137 | "Turn 180 degree.", 138 | "Turn around.", 139 | "Drive in a U-turn.", 140 | ], 141 | 12: [ 142 | "Stop.", 143 | "Halt.", 144 | "Decelerate.", 145 | "Slow down.", 146 | "Brake.", 147 | ], 148 | 13: [ 149 | "Deviate.", 150 | "Deviate from the path.", 151 | "Deviate from the lane.", 152 | "Change the direction.", 153 | "Shift the direction.", 154 | ] 155 | } 156 | 157 | 158 | def map_category_to_caption(category_index, diverse=True): 159 | if diverse: 160 | return random.choice(diverse_caption_dict[category_index]) 161 | else: 162 | return plain_caption_dict[category_index] 163 | -------------------------------------------------------------------------------- /opendv/utils/download.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | 4 | POSSIBLE_EXTS = ["mp4", "webm", "mkv"] 5 | 6 | def youtuber_formatize(youtuber): 7 | return youtuber.replace(" ", "_") 8 | 9 | 10 | def get_video_with_meta(video_path, need_metas=["fps", "duration", "num_frames"]): 11 | if not os.path.exists(video_path): 12 | video = None 13 | fps = -1 14 | duration = -1 15 | num_frames = -1 16 | else: 17 | try: 18 | video = cv2.VideoCapture(video_path) 19 | fps = video.get(cv2.CAP_PROP_FPS) 20 | if fps == 0: 21 | cmd = "ffprobe -v error -select_streams v -of default=noprint_wrappers=1:nokey=1 -show_entries stream=r_frame_rate {}".format(video_path) 22 | precise_fps = os.popen(cmd).read().split("/") 23 | fps = float(precise_fps[0]) / float(precise_fps[1]) 24 | if ("num_frames" in need_metas) or ("duration" in need_metas): 25 | cmd = "ffprobe -show_entries format=duration -v quiet -of csv=\"p=0\" {}".format(video_path) 26 | precise_duration = os.popen(cmd).read() 27 | duration = int(float(precise_duration)) 28 | if "num_frames" in need_metas: 29 | num_frames = int(duration * fps) 30 | else: 31 | if "num_frames" in need_metas: 32 | num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) 33 | if "duration" in need_metas: 34 | duration = video.get(cv2.CAP_PROP_FRAME_COUNT) / fps 35 | 36 | except Exception as e: 37 | print("Error: ", e) 38 | video = None 39 | fps = -1 40 | duration = -1 41 | num_frames = -1 42 | 43 | return_params = (video,) 44 | if "fps" in need_metas: 45 | return_params += (fps,) 46 | if "duration" in need_metas: 47 | return_params += (duration,) 48 | if "num_frames" in need_metas: 49 | return_params += (num_frames,) 50 | 51 | return return_params 52 | 53 | def get_mini_opendv(full_video_list): 54 | mini_list = [] 55 | for vid_info in full_video_list: 56 | if vid_info["subset"] != "Mini": 57 | continue 58 | mini_list.append(vid_info) 59 | 60 | return mini_list -------------------------------------------------------------------------------- /opendv/utils/easydict.py: -------------------------------------------------------------------------------- 1 | class EasyDict(dict): 2 | """ 3 | Get attributes 4 | 5 | >>> d = EasyDict({'foo':3}) 6 | >>> d['foo'] 7 | 3 8 | >>> d.foo 9 | 3 10 | >>> d.bar 11 | Traceback (most recent call last): 12 | ... 13 | AttributeError: 'EasyDict' object has no attribute 'bar' 14 | 15 | Works recursively 16 | 17 | >>> d = EasyDict({'foo':3, 'bar':{'x':1, 'y':2}}) 18 | >>> isinstance(d.bar, dict) 19 | True 20 | >>> d.bar.x 21 | 1 22 | 23 | Bullet-proof 24 | 25 | >>> EasyDict({}) 26 | {} 27 | >>> EasyDict(d={}) 28 | {} 29 | >>> EasyDict(None) 30 | {} 31 | >>> d = {'a': 1} 32 | >>> EasyDict(**d) 33 | {'a': 1} 34 | 35 | Set attributes 36 | 37 | >>> d = EasyDict() 38 | >>> d.foo = 3 39 | >>> d.foo 40 | 3 41 | >>> d.bar = {'prop': 'value'} 42 | >>> d.bar.prop 43 | 'value' 44 | >>> d 45 | {'foo': 3, 'bar': {'prop': 'value'}} 46 | >>> d.bar.prop = 'newer' 47 | >>> d.bar.prop 48 | 'newer' 49 | 50 | 51 | Values extraction 52 | 53 | >>> d = EasyDict({'foo':0, 'bar':[{'x':1, 'y':2}, {'x':3, 'y':4}]}) 54 | >>> isinstance(d.bar, list) 55 | True 56 | >>> from operator import attrgetter 57 | >>> map(attrgetter('x'), d.bar) 58 | [1, 3] 59 | >>> map(attrgetter('y'), d.bar) 60 | [2, 4] 61 | >>> d = EasyDict() 62 | >>> d.keys() 63 | [] 64 | >>> d = EasyDict(foo=3, bar=dict(x=1, y=2)) 65 | >>> d.foo 66 | 3 67 | >>> d.bar.x 68 | 1 69 | 70 | Still like a dict though 71 | 72 | >>> o = EasyDict({'clean':True}) 73 | >>> o.items() 74 | [('clean', True)] 75 | 76 | And like a class 77 | 78 | >>> class Flower(EasyDict): 79 | ... power = 1 80 | ... 81 | >>> f = Flower() 82 | >>> f.power 83 | 1 84 | >>> f = Flower({'height': 12}) 85 | >>> f.height 86 | 12 87 | >>> f['power'] 88 | 1 89 | >>> sorted(f.keys()) 90 | ['height', 'power'] 91 | 92 | update and pop items 93 | >>> d = EasyDict(a=1, b='2') 94 | >>> e = EasyDict(c=3.0, a=9.0) 95 | >>> d.update(e) 96 | >>> d.c 97 | 3.0 98 | >>> d['c'] 99 | 3.0 100 | >>> d.get('c') 101 | 3.0 102 | >>> d.update(a=4, b=4) 103 | >>> d.b 104 | 4 105 | >>> d.pop('a') 106 | 4 107 | >>> d.a 108 | Traceback (most recent call last): 109 | ... 110 | AttributeError: 'EasyDict' object has no attribute 'a' 111 | """ 112 | 113 | def __init__(self, d=None, **kwargs): 114 | if d is None: 115 | d = {} 116 | if kwargs: 117 | d.update(**kwargs) 118 | for k, v in d.items(): 119 | setattr(self, k, v) 120 | # Class attributes 121 | for k in self.__class__.__dict__.keys(): 122 | if not (k.startswith("__") and k.endswith("__")) and not k in ("update", "pop"): 123 | setattr(self, k, getattr(self, k)) 124 | 125 | def __setattr__(self, name, value): 126 | if isinstance(value, (list, tuple)): 127 | value = [self.__class__(x) if isinstance(x, dict) else x for x in value] 128 | elif isinstance(value, dict) and not isinstance(value, self.__class__): 129 | value = self.__class__(value) 130 | super(EasyDict, self).__setattr__(name, value) 131 | super(EasyDict, self).__setitem__(name, value) 132 | 133 | __setitem__ = __setattr__ 134 | 135 | def update(self, e=None, **f): 136 | d = e or dict() 137 | d.update(f) 138 | for k in d: 139 | setattr(self, k, d[k]) 140 | 141 | def pop(self, k, d=None): 142 | if hasattr(self, k): 143 | delattr(self, k) 144 | return super(EasyDict, self).pop(k, d) 145 | 146 | 147 | if __name__ == "__main__": 148 | import doctest -------------------------------------------------------------------------------- /opendv/utils/frame_extraction.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script is used for preprocessing OpenDV-YouTube raw data. 3 | The script is a part of the [`GenAD`](https://arxiv.org/abs/2403.09630) project. 4 | """ 5 | 6 | import os 7 | import time 8 | import traceback 9 | import json 10 | 11 | import numpy as np 12 | import decord 13 | import cv2 14 | from tqdm import tqdm 15 | 16 | from utils.download import get_video_with_meta 17 | 18 | DECORD_ACCEPTABLE_TYPES = ['mp4'] 19 | FORCE_USE_CV2 = True 20 | 21 | IDX_WIDTH = 9 22 | # set [IDX_WIDTH] to [None] if you want to use the default format, i.e. zero padding to the maximal index of a video 23 | INFO_INTERVAL = 1000 24 | DEFAULT_FPS = 10 25 | 26 | 27 | def extract_frames(video_info): 28 | video_path = video_info.get("video_path", None) 29 | output_dir = video_info.get("output_dir", None) 30 | fps = video_info.get("freq", DEFAULT_FPS) 31 | discard_begin = video_info.get("start_discard", 90) 32 | discard_end = video_info.get("end_discard", 60) 33 | exception_file = video_info.get("exception_file", None) 34 | finish_log = video_info.get("finish_log", None) 35 | 36 | if video_path is None or output_dir is None: 37 | print("skipping invalid video info...") 38 | return 39 | 40 | try: 41 | if (FORCE_USE_CV2) or (video_path.split('.')[-1] not in DECORD_ACCEPTABLE_TYPES): 42 | print("[opencv] extracting frames from video [{}]...".format(video_path)) 43 | cv2_extract_frames(video_path, output_dir, fps, discard_begin, discard_end, exception_file) 44 | else: 45 | print("[decord] extracting frames from video [{}]...".format(video_path)) 46 | decord_extract_frames(video_path, output_dir, fps, discard_begin, discard_end, exception_file) 47 | 48 | if finish_log is not None: 49 | with open(finish_log, "a") as f: 50 | f.write(video_info.get("video_id", video_path.split("/")[-1])) 51 | f.write("\n") 52 | 53 | except Exception as e: 54 | exceptions = dict() 55 | exceptions["video_path"] = video_path 56 | exceptions["problem"] = str(e) 57 | exceptions["action"] = "skipped" 58 | exceptions["details"] = traceback.format_exc() 59 | json.dump(exceptions, open(exception_file, "a"), indent=4) 60 | with open(exception_file, "a") as f: 61 | f.write(",\n") 62 | 63 | traceback.print_exc() 64 | 65 | 66 | def count_done_frames(save_path): 67 | return len(os.listdir(save_path)) 68 | 69 | def special_video_setting_log(video_path, exception_file, height=None, width=None, video_reader=None): 70 | skipped = False 71 | 72 | exception = None 73 | if video_reader is None: 74 | exception = { 75 | "video_path": video_path, 76 | "problem": "video not found or corrupted", 77 | "action": "skipped", 78 | "details": "video not found or corrupted" 79 | } 80 | return True 81 | 82 | if (height is None) or (width is None): 83 | height = video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT) 84 | width = video_reader.get(cv2.CAP_PROP_FRAME_WIDTH) 85 | 86 | if (width < 1280) and (height < 720): 87 | exception = { 88 | "video_path": video_path, 89 | "problem": "< 720p", 90 | "action": "skipped", 91 | "details": "{} x {}".format(width, height) 92 | } 93 | skipped = True 94 | 95 | elif (width / height != 16 / 9): 96 | exception = { 97 | "video_path": video_path, 98 | "problem": "not 16:9", 99 | "action": "as normal", 100 | "details": "{} x {}".format(width, height) 101 | } 102 | 103 | if exception is not None: 104 | json.dump(exception, open(exception_file, "a"), indent=4) 105 | with open(exception_file, "a") as f: 106 | f.write(",\n") 107 | 108 | return skipped 109 | 110 | 111 | def decord_extract_frames(video_path, save_path, fps=10, discard_begin=90, discard_end=60, msg_file=None): 112 | start_index = 0 113 | if not os.path.exists(save_path): 114 | os.makedirs(save_path) 115 | else: 116 | start_index = count_done_frames(save_path) -1 117 | # so that we could rewrite the last frame, in case the last frame is corrupted 118 | 119 | video = decord.VideoReader(video_path, ctx=decord.cpu(), num_threads=1) 120 | video_fps = video.get_avg_fps() 121 | num_frames = int( fps * (len(video) // video_fps - discard_begin - discard_end) ) 122 | idx_width = len(str(num_frames)) if IDX_WIDTH is None else IDX_WIDTH 123 | interval = video_fps / fps 124 | 125 | img = video[0].asnumpy() 126 | frame_height, frame_width, _ = img.shape 127 | if special_video_setting_log(video_path, msg_file, frame_height, frame_width): 128 | return 129 | del img 130 | first_log = True 131 | 132 | indices = np.array([ int(discard_begin * video_fps) + int(np.round(i * interval)) for i in range(num_frames)]) 133 | start_time = time.perf_counter() 134 | ids = list(range(num_frames)) 135 | for id in ids[start_index:]: 136 | frame = video[indices[id]].asnumpy() 137 | file_path = os.path.join(save_path, str(id).zfill(idx_width) + ".jpg") 138 | cv2.imwrite(file_path, cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)) 139 | 140 | if (first_log) or ((id+1) % INFO_INTERVAL == 0): 141 | first_log = False 142 | elapsed_time = time.perf_counter() - start_time 143 | eta = elapsed_time / (id+1) * (len(indices) - id - 1) 144 | elapsed_time = time.strftime("%H:%M:%S", time.gmtime(elapsed_time)) 145 | eta = time.strftime("%H:%M:%S", time.gmtime(eta)) 146 | progress_bar = "\u2588" * int((id+1) / len(indices) * 20) + " " * (20 - int((id+1) / len(indices) * 20)) 147 | print("{} {}/{} Elapsed: {}\t ETA: {}".format(progress_bar, id+1, len(indices), elapsed_time, eta)) 148 | 149 | 150 | def cv2_extract_frames(video_path, save_path, fps=10, discard_begin=90, discard_end=60, msg_file=None): 151 | start_index = 0 152 | if not os.path.exists(save_path): 153 | os.makedirs(save_path) 154 | else: 155 | start_index = count_done_frames(save_path) -1 156 | # so that we could rewrite the last frame, in case the last frame is corrupted 157 | 158 | video, video_fps, total_frames = get_video_with_meta(video_path, need_metas=["fps", "num_frames"]) 159 | if video is not None: 160 | num_frames = int( fps * (total_frames // video_fps - discard_begin - discard_end) ) 161 | idx_width = len(str(num_frames)) if IDX_WIDTH is None else IDX_WIDTH 162 | interval = video_fps / fps 163 | 164 | if special_video_setting_log(video_path, msg_file, video_reader=video): 165 | return 166 | first_log, first_frame = True, True 167 | 168 | indices = np.array([ int(discard_begin * video_fps) + int(np.round(i * interval)) for i in range(num_frames)]) 169 | start_time = time.perf_counter() 170 | ids = list(range(num_frames)) 171 | for id in ids[start_index:]: 172 | if first_frame: 173 | video.set(cv2.CAP_PROP_POS_FRAMES, indices[id]) 174 | video.grab() 175 | first_frame = False 176 | else: 177 | for _ in range(indices[id] - indices[id-1]): 178 | video.grab() 179 | 180 | _, frame = video.retrieve() 181 | file_path = os.path.join(save_path, str(id).zfill(idx_width) + ".jpg") 182 | cv2.imwrite(file_path, frame) 183 | 184 | if (first_log) or ((id+1) % INFO_INTERVAL == 0): 185 | first_log = False 186 | elapsed_time = time.perf_counter() - start_time 187 | eta = elapsed_time / (id+1) * (len(indices) - id - 1) 188 | elapsed_time = time.strftime("%H:%M:%S", time.gmtime(elapsed_time)) 189 | eta = time.strftime("%H:%M:%S", time.gmtime(eta)) 190 | progress_bar = "\u2588" * int((id+1) / len(indices) * 20) + " " * (20 - int((id+1) / len(indices) * 20)) 191 | print("{} {}/{} Elapsed: {}\t ETA: {}".format(progress_bar, id+1, len(indices), elapsed_time, eta)) --------------------------------------------------------------------------------