├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── data └── nuscenes │ └── data.md ├── nuScenesTrajectoryVisualizationTool_env.yaml ├── projects ├── __init__.py ├── configs │ ├── runtime.py │ ├── runtime_wo_wandb.py │ └── tracking │ │ └── petr │ │ └── f3_q500_800x320.py ├── mmdet3d_plugin │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-38.pyc │ ├── core │ │ └── bbox │ │ │ ├── __pycache__ │ │ │ ├── array_converter.cpython-38.pyc │ │ │ └── util.cpython-38.pyc │ │ │ ├── array_converter.py │ │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── hungarian_assigner_3d.cpython-38.pyc │ │ │ └── hungarian_assigner_3d.py │ │ │ ├── coders │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── nms_free_coder.cpython-38.pyc │ │ │ └── nms_free_coder.py │ │ │ ├── iou_calculators │ │ │ ├── __init__.py │ │ │ └── iou3d_calculator.py │ │ │ ├── match_costs │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── match_cost.cpython-38.pyc │ │ │ └── match_cost.py │ │ │ └── util.py │ ├── datasets │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ └── nuscenes_dataset.cpython-38.pyc │ │ ├── nuscenes_dataset.py │ │ └── pipelines │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── loading.cpython-38.pyc │ │ │ └── transform_3d.cpython-38.pyc │ │ │ ├── loading.py │ │ │ └── transform_3d.py │ └── models │ │ ├── backbones │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── vovnet.cpython-38.pyc │ │ │ └── vovnetcp.cpython-38.pyc │ │ ├── vovnet.py │ │ └── vovnetcp.py │ │ ├── dense_heads │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── detr3d_head.cpython-38.pyc │ │ │ ├── dgcnn3d_head.cpython-38.pyc │ │ │ ├── petr_head.cpython-38.pyc │ │ │ └── petrv2_head.cpython-38.pyc │ │ ├── detr3d_head.py │ │ ├── dgcnn3d_head.py │ │ ├── petr_head.py │ │ └── petrv2_head.py │ │ ├── detectors │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── detr3d.cpython-38.pyc │ │ │ ├── obj_dgcnn.cpython-38.pyc │ │ │ └── petr3d.cpython-38.pyc │ │ ├── detr3d.py │ │ ├── obj_dgcnn.py │ │ └── petr3d.py │ │ ├── necks │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ └── cp_fpn.cpython-38.pyc │ │ └── cp_fpn.py │ │ └── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── detr.cpython-38.pyc │ │ ├── detr3d_transformer.cpython-38.pyc │ │ ├── dgcnn_attn.cpython-38.pyc │ │ ├── grid_mask.cpython-38.pyc │ │ ├── petr_transformer.cpython-38.pyc │ │ └── positional_encoding.cpython-38.pyc │ │ ├── detr.py │ │ ├── detr3d_transformer.py │ │ ├── dgcnn_attn.py │ │ ├── grid_mask.py │ │ ├── petr_transformer.py │ │ └── positional_encoding.py └── tracking_plugin │ ├── __init__.py │ ├── __pycache__ │ └── __init__.cpython-38.pyc │ ├── core │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── coder.cpython-38.pyc │ │ └── instances.cpython-38.pyc │ ├── coder.py │ └── instances.py │ ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── nuscenes_forecasting_bbox.cpython-38.pyc │ │ └── nuscenes_tracking_dataset.cpython-38.pyc │ ├── nuscenes_forecasting_bbox.py │ ├── nuscenes_tracking_dataset.py │ └── pipelines │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── pipeline.cpython-38.pyc │ │ └── track_transform_3d.cpython-38.pyc │ │ ├── pipeline.py │ │ └── track_transform_3d.py │ ├── models │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-38.pyc │ ├── dense_heads │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── detr3d_tracking_head.cpython-38.pyc │ │ │ └── petr_tracking_head.cpython-38.pyc │ │ ├── detr3d_tracking_head.py │ │ └── petr_tracking_head.py │ ├── losses │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── tracking_loss.cpython-38.pyc │ │ │ ├── tracking_loss_base.cpython-38.pyc │ │ │ ├── tracking_loss_combo.cpython-38.pyc │ │ │ ├── tracking_loss_mem_bank.cpython-38.pyc │ │ │ └── tracking_loss_prediction.cpython-38.pyc │ │ ├── tracking_loss.py │ │ ├── tracking_loss_base.py │ │ ├── tracking_loss_combo.py │ │ ├── tracking_loss_mem_bank.py │ │ └── tracking_loss_prediction.py │ ├── trackers │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── runtime_tracker.cpython-38.pyc │ │ │ ├── spatial_temporal_reason.cpython-38.pyc │ │ │ ├── tracker.cpython-38.pyc │ │ │ └── utils.cpython-38.pyc │ │ ├── runtime_tracker.py │ │ ├── spatial_temporal_reason.py │ │ ├── tracker.py │ │ └── utils.py │ └── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── detr3d_tracking_transformer.cpython-38.pyc │ │ ├── petr_tracker_transformer.cpython-38.pyc │ │ └── temporal_transformer.cpython-38.pyc │ │ ├── detr3d_tracking_transformer.py │ │ ├── petr_tracker_transformer.py │ │ └── temporal_transformer.py │ ├── test_track_api.py │ └── visualization │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-38.pyc │ ├── functions.cpython-38.pyc │ └── visualizer.cpython-38.pyc │ ├── functions.py │ └── visualizer.py ├── tools ├── __init__.py ├── bev_traj_visualization.py ├── camera_visualization.py ├── create_data.py ├── create_data.sh └── data_converter │ ├── __init__.py │ ├── create_gt_database.py │ ├── indoor_converter.py │ ├── kitti_converter.py │ ├── kitti_data_utils.py │ ├── lyft_converter.py │ ├── lyft_data_fixer.py │ ├── nuimage_converter.py │ ├── nuscenes_converter.py │ ├── nuscenes_prediction_tools.py │ ├── nuscenes_tracking_converter.py │ ├── s3dis_data_utils.py │ ├── scannet_data_utils.py │ ├── sunrgbd_data_utils.py │ └── waymo_converter.py └── work_dirs ├── cam_visualization └── fcbccedd61424f1b85dcbf8f897f9754 │ ├── 0_camera.png │ ├── 1_camera.png │ ├── 2_camera.png │ └── video 00_00_00-00_00_30.gif ├── jpg1.png └── tracking_visualization └── fcbccedd61424f1b85dcbf8f897f9754 ├── 0.png ├── 1.png ├── 2.png └── videobev 00_00_00-00_00_30.gif /.gitignore: -------------------------------------------------------------------------------- 1 | # http://www.gnu.org/software/automake 2 | 3 | Makefile.in 4 | /ar-lib 5 | /mdate-sh 6 | /py-compile 7 | /test-driver 8 | /ylwrap 9 | .deps/ 10 | .dirstamp 11 | 12 | # http://www.gnu.org/software/autoconf 13 | 14 | autom4te.cache 15 | /autoscan.log 16 | /autoscan-*.log 17 | /aclocal.m4 18 | /compile 19 | /config.cache 20 | /config.guess 21 | /config.h.in 22 | /config.log 23 | /config.status 24 | /config.sub 25 | /configure 26 | /configure.scan 27 | /depcomp 28 | /install-sh 29 | /missing 30 | /stamp-h1 31 | 32 | # https://www.gnu.org/software/libtool/ 33 | 34 | /ltmain.sh 35 | 36 | # http://www.gnu.org/software/texinfo 37 | 38 | /texinfo.tex 39 | 40 | # http://www.gnu.org/software/m4/ 41 | 42 | m4/libtool.m4 43 | m4/ltoptions.m4 44 | m4/ltsugar.m4 45 | m4/ltversion.m4 46 | m4/lt~obsolete.m4 47 | 48 | # Generated Makefile 49 | # (meta build system like autotools, 50 | # can automatically generate from config.status script 51 | # (which is called by configure script)) 52 | Makefile 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nuScenes_TrajectoryVisualizationTool 2 | The visualization project of the BEV model inference result on [NuScenes Dataset](https://www.nuscenes.org/), including detBox and trackingLine! 3 | 4 | First of all, I need to thank some Github authors! [PF-Track](https://github.com/TRI-ML/PF-Track) | [SimpleTrack](https://github.com/tusen-ai/SimpleTrack) | [StreamPETR](https://github.com/exiawsh/StreamPETR) 5 | 6 | > We use Nuscenes v1.0-mini DATASET for example 7 | 8 | 9 | 10 | 11 | 12 | > The green dashed line represents the GT trajectory; solid lines represent predicted objects and trajectories, and the same color represents the same predicted object. 13 | 14 | ## 🌵Necessary File Format 15 | - data/nuscenes/ 16 | - v1.0-mini/ 17 | - maps/ 18 | - samples/ 19 | - sweeps/ 20 | - v1.0-mini/ 21 | - projects/ 22 | - tools/ 23 | - SimpleTrack/ 24 | 25 | ## 🌵Build Envs 26 | 27 | You can refer to the [PF-Track](https://github.com/TRI-ML/PF-Track) configuration environment documentation. 28 | 29 | Or use the Conda env configuration file we provide. 30 | 31 | ``` 32 | conda env create -f nuScenesTrajectoryVisualizationTool_env.yaml 33 | ``` 34 | 35 | as for `mot_3d` package, 36 | 37 | ``` 38 | cd SimpleTrack 39 | pip install -e ./ 40 | ``` 41 | 42 | ## 🌵PKL Create 43 | Like othor bev models, you need to use `create_data.py` to create `pkl` files for v1.0mini: 44 | 45 | `tracking_forecasting-mini_infos_train.pkl` 46 | 47 | `tracking_forecasting-mini_infos_val.pkl` 48 | 49 | ``` 50 | python tools/create_data.py nuscenes-tracking --root-path data/nuscenes/v1.0-mini --out-dir data/nuscenes/v1.0-mini --extra-tag tracking_forecasting --version v1.0-mini --forecasting 51 | ``` 52 | 53 | ## 🌵Data Prepare 54 | 55 | The final `json` data structure should like this, it should be notice that key `tracking_id` is necessary. 56 | 57 | 58 | 59 | You can use the [code](https://github.com/PrymceQ/BEVModel_StreamPETR) here to prepare the `json` file with `tracking_id` key from the test result `json` file. 60 | 61 | ## 🌵Camera Visualization Code 62 | 63 | ``` 64 | tools/camera_visualization.py --result mini_track.json --show-dir work_dirs/cam_visualization/ 65 | ``` 66 | 67 | ## 🌵BEV Visualization Code 68 | 69 | ``` 70 | tools/bev_traj_visualization.py projects/configs/tracking/petr/f3_q500_800x320.py --result mini_track.json --show-dir work_dirs/tracking_visualization/ 71 | ``` 72 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/__init__.py -------------------------------------------------------------------------------- /data/nuscenes/data.md: -------------------------------------------------------------------------------- 1 | place the nuscenes data here! -------------------------------------------------------------------------------- /nuScenesTrajectoryVisualizationTool_env.yaml: -------------------------------------------------------------------------------- 1 | name: trackvis 2 | channels: 3 | - http://mirror.tuna.tsinghua.edu.cn/anaconda/pkgs/main 4 | - http://mirror.tuna.tsinghua.edu.cn/anaconda/pkgs/main/ 5 | - http://mirror.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ 6 | - http://mirror.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/ 7 | - defaults 8 | - http://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/ 9 | dependencies: 10 | - ca-certificates=2023.05.30=haa95532_0 11 | - libffi=3.4.4=hd77b12b_0 12 | - openssl=3.0.10=h2bbff1b_0 13 | - pip=23.2.1=py38haa95532_0 14 | - python=3.8.17=h1aa4202_0 15 | - sqlite=3.41.2=h2bbff1b_0 16 | - vc=14.2=h21ff451_1 17 | - vs2015_runtime=14.27.29016=h5e58377_2 18 | - wheel=0.38.4=py38haa95532_0 19 | - pip: 20 | - absl-py==1.4.0 21 | - addict==2.4.0 22 | - aliyun-python-sdk-core==2.13.36 23 | - aliyun-python-sdk-kms==2.16.1 24 | - ansi2html==1.8.0 25 | - anyio==3.7.1 26 | - appdirs==1.4.4 27 | - argon2-cffi==21.3.0 28 | - argon2-cffi-bindings==21.2.0 29 | - arrow==1.2.3 30 | - asttokens==2.2.1 31 | - async-lru==2.0.4 32 | - attrs==23.1.0 33 | - babel==2.12.1 34 | - backcall==0.2.0 35 | - beautifulsoup4==4.12.2 36 | - black==23.7.0 37 | - bleach==6.0.0 38 | - cachetools==5.3.1 39 | - certifi==2023.7.22 40 | - cffi==1.15.1 41 | - charset-normalizer==3.2.0 42 | - click==8.1.6 43 | - colorama==0.4.6 44 | - comm==0.1.4 45 | - configargparse==1.7 46 | - contourpy==1.1.0 47 | - crcmod==1.7 48 | - cryptography==41.0.3 49 | - cycler==0.11.0 50 | - cython==3.0.0 51 | - dash==2.11.1 52 | - dash-core-components==2.0.0 53 | - dash-html-components==2.0.0 54 | - dash-table==5.0.0 55 | - debugpy==1.6.7 56 | - decorator==5.1.1 57 | - defusedxml==0.7.1 58 | - descartes==1.1.0 59 | - docker-pycreds==0.4.0 60 | - exceptiongroup==1.1.2 61 | - executing==1.2.0 62 | - fastjsonschema==2.18.0 63 | - fire==0.5.0 64 | - flake8==6.1.0 65 | - flask==2.2.5 66 | - fonttools==4.42.0 67 | - fqdn==1.5.1 68 | - gitdb==4.0.10 69 | - gitpython==3.1.32 70 | - google-auth==2.22.0 71 | - google-auth-oauthlib==1.0.0 72 | - grpcio==1.56.2 73 | - idna==3.4 74 | - imageio==2.19.3 75 | - imageio-ffmpeg==0.4.7 76 | - importlib-metadata==6.8.0 77 | - importlib-resources==6.0.1 78 | - iniconfig==2.0.0 79 | - ipykernel==6.25.1 80 | - ipython==8.12.2 81 | - ipython-genutils==0.2.0 82 | - ipywidgets==8.1.0 83 | - isoduration==20.11.0 84 | - itsdangerous==2.1.2 85 | - jedi==0.19.0 86 | - jinja2==3.1.2 87 | - jmespath==0.10.0 88 | - joblib==1.3.1 89 | - json5==0.9.14 90 | - jsonpointer==2.4 91 | - jsonschema==4.19.0 92 | - jsonschema-specifications==2023.7.1 93 | - jupyter==1.0.0 94 | - jupyter-client==8.3.0 95 | - jupyter-console==6.6.3 96 | - jupyter-core==5.3.1 97 | - jupyter-events==0.7.0 98 | - jupyter-lsp==2.2.0 99 | - jupyter-server==2.7.0 100 | - jupyter-server-terminals==0.4.4 101 | - jupyterlab==4.0.4 102 | - jupyterlab-pygments==0.2.2 103 | - jupyterlab-server==2.24.0 104 | - jupyterlab-widgets==3.0.8 105 | - kiwisolver==1.4.4 106 | - lazy-loader==0.3 107 | - llvmlite==0.36.0 108 | - lyft-dataset-sdk==0.0.8 109 | - markdown==3.4.4 110 | - markdown-it-py==3.0.0 111 | - markupsafe==2.1.3 112 | - matplotlib==3.6.3 113 | - matplotlib-inline==0.1.6 114 | - mccabe==0.7.0 115 | - mdurl==0.1.2 116 | - mistune==2.0.5 117 | - mmcv==1.4.0 118 | - mmcv-full==1.7.1 119 | - mmdet==2.24.1 120 | - mmdet3d==1.0.0rc4 121 | - mmengine==0.8.4 122 | - mmsegmentation==0.20.2 123 | - model-index==0.1.11 124 | - mypy-extensions==1.0.0 125 | - nbclient==0.8.0 126 | - nbconvert==7.4.0 127 | - nbformat==5.5.0 128 | - nest-asyncio==1.5.7 129 | - networkx==2.2 130 | - notebook==7.0.2 131 | - notebook-shim==0.2.3 132 | - numba==0.53.0 133 | - numpy==1.19.5 134 | - nuscenes-devkit==1.1.7 135 | - oauthlib==3.2.2 136 | - open3d==0.16.0 137 | - opencv-python==4.8.0.74 138 | - opendatalab==0.0.10 139 | - openmim==0.3.9 140 | - openxlab==0.0.17 141 | - ordered-set==4.1.0 142 | - oss2==2.17.0 143 | - overrides==7.4.0 144 | - packaging==23.1 145 | - pandas==1.4.4 146 | - pandocfilters==1.5.0 147 | - parso==0.8.3 148 | - pathspec==0.11.2 149 | - pathtools==0.1.2 150 | - pickleshare==0.7.5 151 | - pillow==10.0.0 152 | - pkgutil-resolve-name==1.3.10 153 | - platformdirs==3.10.0 154 | - plotly==5.15.0 155 | - pluggy==1.2.0 156 | - plyfile==1.0.1 157 | - prettytable==3.8.0 158 | - prometheus-client==0.17.1 159 | - prompt-toolkit==3.0.39 160 | - protobuf==4.23.4 161 | - psutil==5.9.5 162 | - pure-eval==0.2.2 163 | - pyasn1==0.5.0 164 | - pyasn1-modules==0.3.0 165 | - pycocotools==2.0.6 166 | - pycodestyle==2.11.0 167 | - pycparser==2.21 168 | - pycryptodome==3.18.0 169 | - pyflakes==3.1.0 170 | - pygments==2.16.1 171 | - pyparsing==3.0.9 172 | - pyquaternion==0.9.9 173 | - pytest==7.4.0 174 | - python-dateutil==2.8.2 175 | - python-json-logger==2.0.7 176 | - pytz==2023.3 177 | - pywavelets==1.4.1 178 | - pywin32==306 179 | - pywinpty==2.0.11 180 | - pyyaml==6.0.1 181 | - pyzmq==25.1.0 182 | - qtconsole==5.4.3 183 | - qtpy==2.3.1 184 | - referencing==0.30.2 185 | - regex==2023.6.3 186 | - requests==2.28.2 187 | - requests-oauthlib==1.3.1 188 | - retrying==1.3.4 189 | - rfc3339-validator==0.1.4 190 | - rfc3986-validator==0.1.1 191 | - rich==13.4.2 192 | - rpds-py==0.9.2 193 | - rsa==4.9 194 | - scikit-image==0.19.3 195 | - scikit-learn==1.3.0 196 | - scipy==1.10.1 197 | - send2trash==1.8.2 198 | - sentry-sdk==1.29.2 199 | - setproctitle==1.3.2 200 | - setuptools==59.5.0 201 | - shapely==2.0.1 202 | - six==1.16.0 203 | - smmap==5.0.0 204 | - sniffio==1.3.0 205 | - soupsieve==2.4.1 206 | - stack-data==0.6.2 207 | - tabulate==0.9.0 208 | - tenacity==8.2.2 209 | - tensorboard==2.13.0 210 | - tensorboard-data-server==0.7.1 211 | - termcolor==2.3.0 212 | - terminado==0.17.1 213 | - terminaltables==3.1.10 214 | - threadpoolctl==3.2.0 215 | - tifffile==2023.7.10 216 | - tinycss2==1.2.1 217 | - tomli==2.0.1 218 | - torch==1.12.0+cu116 219 | - torchvision==0.13.0+cu116 220 | - tornado==6.3.2 221 | - tqdm==4.65.1 222 | - traitlets==5.9.0 223 | - trimesh==2.35.39 224 | - typing-extensions==4.7.1 225 | - tzdata==2023.3 226 | - uri-template==1.3.0 227 | - urllib3==1.26.16 228 | - wandb==0.15.8 229 | - wcwidth==0.2.6 230 | - webcolors==1.13 231 | - webencodings==0.5.1 232 | - websocket-client==1.6.1 233 | - werkzeug==2.2.3 234 | - widgetsnbextension==4.0.8 235 | - yapf==0.40.1 236 | - zipp==3.16.2 237 | prefix: D:\PythonAnaconda\newmain\envs\trackvis 238 | -------------------------------------------------------------------------------- /projects/__init__.py: -------------------------------------------------------------------------------- 1 | from .mmdet3d_plugin import * 2 | from .tracking_plugin import * -------------------------------------------------------------------------------- /projects/configs/runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable push 3 | # By default we use textlogger hook and tensorboard 4 | # For more loggers see 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook 6 | log_config = dict( 7 | interval=10, 8 | hooks=[ 9 | dict(type='TextLoggerHook'), 10 | dict(type='TensorboardLoggerHook'), 11 | # dict(type='WandbLoggerHook', 12 | # init_kwargs={'project': 'PF-Track'}, 13 | # interval=10, 14 | # ) 15 | ]) 16 | # yapf:enable 17 | dist_params = dict(backend='nccl') 18 | log_level = 'INFO' 19 | work_dir = None 20 | load_from = None 21 | resume_from = None 22 | workflow = [('train', 1)] -------------------------------------------------------------------------------- /projects/configs/runtime_wo_wandb.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable push 3 | # By default we use textlogger hook and tensorboard 4 | # For more loggers see 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook 6 | log_config = dict( 7 | interval=10, 8 | hooks=[ 9 | dict(type='TextLoggerHook'), 10 | dict(type='TensorboardLoggerHook'), 11 | ]) 12 | # yapf:enable 13 | dist_params = dict(backend='nccl') 14 | log_level = 'INFO' 15 | work_dir = None 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 8 | # Copyright (c) OpenMMLab. All rights reserved. 9 | # ------------------------------------------------------------------------ 10 | from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D 11 | from .core.bbox.coders.nms_free_coder import NMSFreeCoder 12 | from .core.bbox.match_costs import BBox3DL1Cost 13 | from .datasets import CustomNuScenesDataset 14 | from .datasets.pipelines import ( 15 | PhotoMetricDistortionMultiViewImage, PadMultiViewImage, 16 | NormalizeMultiviewImage) 17 | from .models.backbones.vovnet import VoVNet 18 | # from .models.backbones.rednet import RedNet 19 | from .models.detectors.obj_dgcnn import ObjDGCNN 20 | from .models.detectors.detr3d import Detr3D 21 | from .models.detectors.petr3d import Petr3D 22 | from .models.dense_heads.dgcnn3d_head import DGCNN3DHead 23 | from .models.dense_heads.detr3d_head import Detr3DHead 24 | from .models.utils.detr import Deformable3DDetrTransformerDecoder 25 | from .models.utils.dgcnn_attn import DGCNNAttn 26 | from .models.utils.detr3d_transformer import Detr3DTransformer, Detr3DTransformerDecoder, Detr3DCrossAtten 27 | from .models.necks import * 28 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/__pycache__/array_converter.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/core/bbox/__pycache__/array_converter.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .hungarian_assigner_3d import HungarianAssigner3D 2 | 3 | __all__ = ['HungarianAssigner3D'] 4 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2021 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | # Modified from mmdetection (https://github.com/open-mmlab/mmdetection) 8 | # Copyright (c) OpenMMLab. All rights reserved. 9 | # ------------------------------------------------------------------------ 10 | import torch 11 | 12 | from mmdet.core.bbox.builder import BBOX_ASSIGNERS 13 | from mmdet.core.bbox.assigners import AssignResult 14 | from mmdet.core.bbox.assigners import BaseAssigner 15 | from mmdet.core.bbox.match_costs import build_match_cost 16 | from mmdet.models.utils.transformer import inverse_sigmoid 17 | from projects.mmdet3d_plugin.core.bbox.util import normalize_bbox 18 | 19 | try: 20 | from scipy.optimize import linear_sum_assignment 21 | except ImportError: 22 | linear_sum_assignment = None 23 | 24 | 25 | @BBOX_ASSIGNERS.register_module() 26 | class HungarianAssigner3D(BaseAssigner): 27 | """Computes one-to-one matching between predictions and ground truth. 28 | This class computes an assignment between the targets and the predictions 29 | based on the costs. The costs are weighted sum of three components: 30 | classification cost, regression L1 cost and regression iou cost. The 31 | targets don't include the no_object, so generally there are more 32 | predictions than targets. After the one-to-one matching, the un-matched 33 | are treated as backgrounds. Thus each query prediction will be assigned 34 | with `0` or a positive integer indicating the ground truth index: 35 | - 0: negative sample, no assigned gt 36 | - positive integer: positive sample, index (1-based) of assigned gt 37 | Args: 38 | cls_weight (int | float, optional): The scale factor for classification 39 | cost. Default 1.0. 40 | bbox_weight (int | float, optional): The scale factor for regression 41 | L1 cost. Default 1.0. 42 | iou_weight (int | float, optional): The scale factor for regression 43 | iou cost. Default 1.0. 44 | iou_calculator (dict | optional): The config for the iou calculation. 45 | Default type `BboxOverlaps2D`. 46 | iou_mode (str | optional): "iou" (intersection over union), "iof" 47 | (intersection over foreground), or "giou" (generalized 48 | intersection over union). Default "giou". 49 | """ 50 | 51 | def __init__(self, 52 | cls_cost=dict(type='ClassificationCost', weight=1.), 53 | reg_cost=dict(type='BBoxL1Cost', weight=1.0), 54 | iou_cost=dict(type='IoUCost', weight=0.0), 55 | pc_range=None): 56 | self.cls_cost = build_match_cost(cls_cost) 57 | self.reg_cost = build_match_cost(reg_cost) 58 | self.iou_cost = build_match_cost(iou_cost) 59 | self.pc_range = pc_range 60 | 61 | def assign(self, 62 | bbox_pred, 63 | cls_pred, 64 | gt_bboxes, 65 | gt_labels, 66 | gt_bboxes_ignore=None, 67 | eps=1e-7): 68 | """Computes one-to-one matching based on the weighted costs. 69 | This method assign each query prediction to a ground truth or 70 | background. The `assigned_gt_inds` with -1 means don't care, 71 | 0 means negative sample, and positive number is the index (1-based) 72 | of assigned gt. 73 | The assignment is done in the following steps, the order matters. 74 | 1. assign every prediction to -1 75 | 2. compute the weighted costs 76 | 3. do Hungarian matching on CPU based on the costs 77 | 4. assign all to 0 (background) first, then for each matched pair 78 | between predictions and gts, treat this prediction as foreground 79 | and assign the corresponding gt index (plus 1) to it. 80 | Args: 81 | bbox_pred (Tensor): Predicted boxes with normalized coordinates 82 | (cx, cy, w, h), which are all in range [0, 1]. Shape 83 | [num_query, 4]. 84 | cls_pred (Tensor): Predicted classification logits, shape 85 | [num_query, num_class]. 86 | gt_bboxes (Tensor): Ground truth boxes with unnormalized 87 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4]. 88 | gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,). 89 | gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are 90 | labelled as `ignored`. Default None. 91 | eps (int | float, optional): A value added to the denominator for 92 | numerical stability. Default 1e-7. 93 | Returns: 94 | :obj:`AssignResult`: The assigned result. 95 | """ 96 | assert gt_bboxes_ignore is None, \ 97 | 'Only case when gt_bboxes_ignore is None is supported.' 98 | num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0) 99 | 100 | # 1. assign -1 by default 101 | assigned_gt_inds = bbox_pred.new_full((num_bboxes, ), 102 | -1, 103 | dtype=torch.long) 104 | assigned_labels = bbox_pred.new_full((num_bboxes, ), 105 | -1, 106 | dtype=torch.long) 107 | if num_gts == 0 or num_bboxes == 0: 108 | # No ground truth or boxes, return empty assignment 109 | if num_gts == 0: 110 | # No ground truth, assign all to background 111 | assigned_gt_inds[:] = 0 112 | return AssignResult( 113 | num_gts, assigned_gt_inds, None, labels=assigned_labels) 114 | 115 | # 2. compute the weighted costs 116 | # classification and bboxcost. 117 | cls_cost = self.cls_cost(cls_pred, gt_labels) 118 | # regression L1 cost 119 | normalized_gt_bboxes = normalize_bbox(gt_bboxes, self.pc_range) 120 | reg_cost = self.reg_cost(bbox_pred[:, :8], normalized_gt_bboxes[:, :8]) 121 | 122 | # weighted sum of above two costs 123 | cost = cls_cost + reg_cost 124 | 125 | # 3. do Hungarian matching on CPU using linear_sum_assignment 126 | cost = cost.detach().cpu() 127 | if linear_sum_assignment is None: 128 | raise ImportError('Please run "pip install scipy" ' 129 | 'to install scipy first.') 130 | # version of torch 131 | try: 132 | cost = torch.nan_to_num(cost, nan=100.0, posinf=100.0, neginf=-100.0) 133 | except: 134 | cost = nan_to_num(cost, nan=100.0, posinf=100.0, neginf=-100.0) 135 | 136 | matched_row_inds, matched_col_inds = linear_sum_assignment(cost) 137 | matched_row_inds = torch.from_numpy(matched_row_inds).to( 138 | bbox_pred.device) 139 | matched_col_inds = torch.from_numpy(matched_col_inds).to( 140 | bbox_pred.device) 141 | 142 | # 4. assign backgrounds and foregrounds 143 | # assign all indices to backgrounds first 144 | assigned_gt_inds[:] = 0 145 | # assign foregrounds based on matching results 146 | assigned_gt_inds[matched_row_inds] = matched_col_inds + 1 147 | assigned_labels[matched_row_inds] = gt_labels[matched_col_inds] 148 | return AssignResult( 149 | num_gts, assigned_gt_inds, None, labels=assigned_labels) 150 | 151 | 152 | def nan_to_num(x, nan=0.0, posinf=None, neginf=None): 153 | x[torch.isnan(x)]= nan 154 | if posinf is not None: 155 | x[torch.isposinf(x)] = posinf 156 | if neginf is not None: 157 | x[torch.isneginf(x)] = posinf 158 | return x -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_free_coder import NMSFreeCoder, NMSFreeClsCoder 2 | __all__ = ['NMSFreeCoder', 'NMSFreeClsCoder'] 3 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/iou_calculators/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .iou3d_calculator import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D, 3 | BboxOverlapsNearest3D, 4 | axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d, 5 | bbox_overlaps_nearest_3d) 6 | 7 | __all__ = [ 8 | 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d', 9 | 'bbox_overlaps_3d', 'AxisAlignedBboxOverlaps3D', 10 | 'axis_aligned_bbox_overlaps_3d' 11 | ] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.core.bbox.match_costs import build_match_cost 2 | from .match_cost import BBox3DL1Cost 3 | 4 | __all__ = ['build_match_cost', 'BBox3DL1Cost'] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from mmdet.core.bbox.match_costs.builder import MATCH_COST 3 | from mmdet.core.bbox.iou_calculators import bbox_overlaps 4 | 5 | @MATCH_COST.register_module() 6 | class BBox3DL1Cost(object): 7 | """BBox3DL1Cost. 8 | Args: 9 | weight (int | float, optional): loss_weight 10 | """ 11 | 12 | def __init__(self, weight=1.): 13 | self.weight = weight 14 | 15 | def __call__(self, bbox_pred, gt_bboxes): 16 | """ 17 | Args: 18 | bbox_pred (Tensor): Predicted boxes with normalized coordinates 19 | (cx, cy, w, h), which are all in range [0, 1]. Shape 20 | [num_query, 4]. 21 | gt_bboxes (Tensor): Ground truth boxes with normalized 22 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4]. 23 | Returns: 24 | torch.Tensor: bbox_cost value with weight 25 | """ 26 | bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1) 27 | return bbox_cost * self.weight -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .array_converter import array_converter 3 | 4 | @array_converter(apply_to=('points', 'cam2img')) 5 | def points_img2cam(points, cam2img): 6 | """Project points in image coordinates to camera coordinates. 7 | 8 | Args: 9 | points (torch.Tensor): 2.5D points in 2D images, [N, 3], 10 | 3 corresponds with x, y in the image and depth. 11 | cam2img (torch.Tensor): Camera intrinsic matrix. The shape can be 12 | [3, 3], [3, 4] or [4, 4]. 13 | 14 | Returns: 15 | torch.Tensor: points in 3D space. [N, 3], 16 | 3 corresponds with x, y, z in 3D space. 17 | """ 18 | assert cam2img.shape[0] <= 4 19 | assert cam2img.shape[1] <= 4 20 | assert points.shape[1] == 3 21 | 22 | xys = points[:, :2] 23 | depths = points[:, 2].view(-1, 1) 24 | unnormed_xys = torch.cat([xys * depths, depths], dim=1) 25 | 26 | pad_cam2img = torch.eye(4, dtype=xys.dtype, device=xys.device) 27 | pad_cam2img[:cam2img.shape[0], :cam2img.shape[1]] = cam2img 28 | inv_pad_cam2img = torch.inverse(pad_cam2img).transpose(0, 1) 29 | 30 | # Do operation in homogeneous coordinates. 31 | num_points = unnormed_xys.shape[0] 32 | homo_xys = torch.cat([unnormed_xys, xys.new_ones((num_points, 1))], dim=1) 33 | points3D = torch.mm(homo_xys, inv_pad_cam2img)[:, :3] 34 | 35 | return points3D 36 | 37 | 38 | def normalize_bbox(bboxes, pc_range): 39 | 40 | cx = bboxes[..., 0:1] 41 | cy = bboxes[..., 1:2] 42 | cz = bboxes[..., 2:3] 43 | w = bboxes[..., 3:4].log() 44 | l = bboxes[..., 4:5].log() 45 | h = bboxes[..., 5:6].log() 46 | 47 | rot = bboxes[..., 6:7] 48 | if bboxes.size(-1) > 7: 49 | vx = bboxes[..., 7:8] 50 | vy = bboxes[..., 8:9] 51 | normalized_bboxes = torch.cat( 52 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1 53 | ) 54 | else: 55 | normalized_bboxes = torch.cat( 56 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1 57 | ) 58 | return normalized_bboxes 59 | 60 | def denormalize_bbox(normalized_bboxes, pc_range): 61 | # rotation 62 | rot_sine = normalized_bboxes[..., 6:7] 63 | 64 | rot_cosine = normalized_bboxes[..., 7:8] 65 | rot = torch.atan2(rot_sine, rot_cosine) 66 | 67 | # center in the bev 68 | cx = normalized_bboxes[..., 0:1] 69 | cy = normalized_bboxes[..., 1:2] 70 | cz = normalized_bboxes[..., 4:5] 71 | 72 | # size 73 | w = normalized_bboxes[..., 2:3] 74 | l = normalized_bboxes[..., 3:4] 75 | h = normalized_bboxes[..., 5:6] 76 | 77 | w = w.exp() 78 | l = l.exp() 79 | h = h.exp() 80 | if normalized_bboxes.size(-1) > 8: 81 | # velocity 82 | vx = normalized_bboxes[:, 8:9] 83 | vy = normalized_bboxes[:, 9:10] 84 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1) 85 | else: 86 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1) 87 | return denormalized_bboxes -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 8 | # Copyright (c) OpenMMLab. All rights reserved. 9 | # ------------------------------------------------------------------------ 10 | from .nuscenes_dataset import CustomNuScenesDataset 11 | __all__ = [ 12 | 'CustomNuScenesDataset' 13 | ] 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/datasets/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/__pycache__/nuscenes_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/datasets/__pycache__/nuscenes_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/nuscenes_dataset.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 8 | # Copyright (c) OpenMMLab. All rights reserved. 9 | # ------------------------------------------------------------------------ 10 | import numpy as np 11 | from mmdet.datasets import DATASETS 12 | from mmdet3d.datasets import NuScenesDataset 13 | import os 14 | 15 | @DATASETS.register_module() 16 | class CustomNuScenesDataset(NuScenesDataset): 17 | r"""NuScenes Dataset. 18 | This datset only add camera intrinsics and extrinsics to the results. 19 | """ 20 | def get_data_info(self, index): 21 | """Get data info according to the given index. 22 | Args: 23 | index (int): Index of the sample data to get. 24 | Returns: 25 | dict: Data information that will be passed to the data \ 26 | preprocessing pipelines. It includes the following keys: 27 | 28 | - sample_idx (str): Sample index. 29 | - pts_filename (str): Filename of point clouds. 30 | - sweeps (list[dict]): Infos of sweeps. 31 | - timestamp (float): Sample timestamp. 32 | - img_filename (str, optional): Image filename. 33 | - lidar2img (list[np.ndarray], optional): Transformations \ 34 | from lidar to different cameras. 35 | - ann_info (dict): Annotation info. 36 | """ 37 | info = self.data_infos[index] 38 | # standard protocal modified from SECOND.Pytorch 39 | input_dict = dict( 40 | sample_idx=info['token'], 41 | pts_filename=info['lidar_path'], 42 | sweeps=info['sweeps'], 43 | timestamp=info['timestamp'] / 1e6, 44 | ) 45 | 46 | if self.modality['use_camera']: 47 | image_paths = [] 48 | lidar2img_rts = [] 49 | intrinsics = [] 50 | extrinsics = [] 51 | img_timestamp = [] 52 | for cam_type, cam_info in info['cams'].items(): 53 | img_timestamp.append(cam_info['timestamp'] / 1e6) 54 | image_paths.append(cam_info['data_path']) 55 | # obtain lidar to image transformation matrix 56 | lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation']) 57 | lidar2cam_t = cam_info[ 58 | 'sensor2lidar_translation'] @ lidar2cam_r.T 59 | lidar2cam_rt = np.eye(4) 60 | lidar2cam_rt[:3, :3] = lidar2cam_r.T 61 | lidar2cam_rt[3, :3] = -lidar2cam_t 62 | intrinsic = cam_info['cam_intrinsic'] 63 | viewpad = np.eye(4) 64 | viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic 65 | lidar2img_rt = (viewpad @ lidar2cam_rt.T) 66 | intrinsics.append(viewpad) 67 | extrinsics.append(lidar2cam_rt) 68 | lidar2img_rts.append(lidar2img_rt) 69 | 70 | input_dict.update( 71 | dict( 72 | img_timestamp=img_timestamp, 73 | img_filename=image_paths, 74 | lidar2img=lidar2img_rts, 75 | intrinsics=intrinsics, 76 | extrinsics=extrinsics 77 | )) 78 | 79 | if not self.test_mode: 80 | annos = self.get_ann_info(index) 81 | input_dict['ann_info'] = annos 82 | return input_dict 83 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 8 | # Copyright (c) OpenMMLab. All rights reserved. 9 | # ------------------------------------------------------------------------ 10 | from .transform_3d import ( 11 | PadMultiViewImage, NormalizeMultiviewImage, 12 | PhotoMetricDistortionMultiViewImage, 13 | ResizeMultiview3D, 14 | AlbuMultiview3D, 15 | ResizeCropFlipImage, 16 | GlobalRotScaleTransImage 17 | ) 18 | from .loading import LoadMultiViewImageFromMultiSweepsFiles 19 | __all__ = [ 20 | 'PadMultiViewImage', 'NormalizeMultiviewImage', 'PhotoMetricDistortionMultiViewImage', 'LoadMultiViewImageFromMultiSweepsFiles', 21 | 'ResizeMultiview3D','AlbuMultiview3D','ResizeCropFlipImage','GlobalRotScaleTransImage'] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/datasets/pipelines/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/__pycache__/loading.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/datasets/pipelines/__pycache__/loading.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/__pycache__/transform_3d.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/datasets/pipelines/__pycache__/transform_3d.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/loading.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | import mmcv 5 | import numpy as np 6 | from mmdet.datasets.builder import PIPELINES 7 | 8 | @PIPELINES.register_module() 9 | class LoadMultiViewImageFromMultiSweepsFiles(object): 10 | """Load multi channel images from a list of separate channel files. 11 | Expects results['img_filename'] to be a list of filenames. 12 | Args: 13 | to_float32 (bool): Whether to convert the img to float32. 14 | Defaults to False. 15 | color_type (str): Color type of the file. Defaults to 'unchanged'. 16 | """ 17 | 18 | def __init__(self, 19 | sweeps_num=5, 20 | to_float32=False, 21 | file_client_args=dict(backend='disk'), 22 | pad_empty_sweeps=False, 23 | sweep_range=[3,27], 24 | sweeps_id = None, 25 | color_type='unchanged', 26 | sensors = ['CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_FRONT_LEFT', 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT'], 27 | test_mode=True, 28 | prob=1.0, 29 | ): 30 | 31 | self.sweeps_num = sweeps_num 32 | self.to_float32 = to_float32 33 | self.color_type = color_type 34 | self.file_client_args = file_client_args.copy() 35 | self.file_client = None 36 | self.pad_empty_sweeps = pad_empty_sweeps 37 | self.sensors = sensors 38 | self.test_mode = test_mode 39 | self.sweeps_id = sweeps_id 40 | self.sweep_range = sweep_range 41 | self.prob = prob 42 | if self.sweeps_id: 43 | assert len(self.sweeps_id) == self.sweeps_num 44 | 45 | def __call__(self, results): 46 | """Call function to load multi-view image from files. 47 | Args: 48 | results (dict): Result dict containing multi-view image filenames. 49 | Returns: 50 | dict: The result dict containing the multi-view image data. \ 51 | Added keys and values are described below. 52 | - filename (str): Multi-view image filenames. 53 | - img (np.ndarray): Multi-view image arrays. 54 | - img_shape (tuple[int]): Shape of multi-view image arrays. 55 | - ori_shape (tuple[int]): Shape of original image arrays. 56 | - pad_shape (tuple[int]): Shape of padded image arrays. 57 | - scale_factor (float): Scale factor. 58 | - img_norm_cfg (dict): Normalization configuration of images. 59 | """ 60 | sweep_imgs_list = [] 61 | timestamp_imgs_list = [] 62 | imgs = results['img'] 63 | img_timestamp = results['img_timestamp'] 64 | lidar_timestamp = results['timestamp'] 65 | img_timestamp = [lidar_timestamp - timestamp for timestamp in img_timestamp] 66 | sweep_imgs_list.extend(imgs) 67 | timestamp_imgs_list.extend(img_timestamp) 68 | nums = len(imgs) 69 | if self.pad_empty_sweeps and len(results['sweeps']) == 0: 70 | for i in range(self.sweeps_num): 71 | sweep_imgs_list.extend(imgs) 72 | mean_time = (self.sweep_range[0] + self.sweep_range[1]) / 2.0 * 0.083 73 | timestamp_imgs_list.extend([time + mean_time for time in img_timestamp]) 74 | for j in range(nums): 75 | results['filename'].append(results['filename'][j]) 76 | results['lidar2img'].append(np.copy(results['lidar2img'][j])) 77 | results['intrinsics'].append(np.copy(results['intrinsics'][j])) 78 | results['extrinsics'].append(np.copy(results['extrinsics'][j])) 79 | else: 80 | if self.sweeps_id: 81 | choices = self.sweeps_id 82 | elif len(results['sweeps']) <= self.sweeps_num: 83 | choices = np.arange(len(results['sweeps'])) 84 | elif self.test_mode: 85 | choices = [int((self.sweep_range[0] + self.sweep_range[1])/2) - 1] 86 | else: 87 | if np.random.random() < self.prob: 88 | if self.sweep_range[0] < len(results['sweeps']): 89 | sweep_range = list(range(self.sweep_range[0], min(self.sweep_range[1], len(results['sweeps'])))) 90 | else: 91 | sweep_range = list(range(self.sweep_range[0], self.sweep_range[1])) 92 | choices = np.random.choice(sweep_range, self.sweeps_num, replace=False) 93 | else: 94 | choices = [int((self.sweep_range[0] + self.sweep_range[1])/2) - 1] 95 | 96 | for idx in choices: 97 | sweep_idx = min(idx, len(results['sweeps']) - 1) 98 | sweep = results['sweeps'][sweep_idx] 99 | if len(sweep.keys()) < len(self.sensors): 100 | sweep = results['sweeps'][sweep_idx - 1] 101 | results['filename'].extend([sweep[sensor]['data_path'] for sensor in self.sensors]) 102 | 103 | img = np.stack([mmcv.imread(sweep[sensor]['data_path'], self.color_type) for sensor in self.sensors], axis=-1) 104 | 105 | if self.to_float32: 106 | img = img.astype(np.float32) 107 | img = [img[..., i] for i in range(img.shape[-1])] 108 | sweep_imgs_list.extend(img) 109 | sweep_ts = [lidar_timestamp - sweep[sensor]['timestamp'] / 1e6 for sensor in self.sensors] 110 | timestamp_imgs_list.extend(sweep_ts) 111 | for sensor in self.sensors: 112 | results['lidar2img'].append(sweep[sensor]['lidar2img']) 113 | results['intrinsics'].append(sweep[sensor]['intrinsics']) 114 | results['extrinsics'].append(sweep[sensor]['extrinsics']) 115 | results['img'] = sweep_imgs_list 116 | results['timestamp'] = timestamp_imgs_list 117 | 118 | return results 119 | 120 | def __repr__(self): 121 | """str: Return a string that describes the module.""" 122 | repr_str = self.__class__.__name__ 123 | repr_str += f'(to_float32={self.to_float32}, ' 124 | repr_str += f"color_type='{self.color_type}')" 125 | return repr_str 126 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | from .vovnet import VoVNet 8 | from .vovnetcp import VoVNetCP 9 | __all__ = ['VoVNet', 'VoVNetCP'] 10 | 11 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/backbones/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/backbones/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/backbones/__pycache__/vovnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/backbones/__pycache__/vovnet.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/backbones/__pycache__/vovnetcp.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/backbones/__pycache__/vovnetcp.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 8 | # Copyright (c) OpenMMLab. All rights reserved. 9 | # ------------------------------------------------------------------------ 10 | from .dgcnn3d_head import DGCNN3DHead 11 | from .detr3d_head import Detr3DHead 12 | from .petr_head import PETRHead 13 | from .petrv2_head import PETRv2Head 14 | __all__ = ['DGCNN3DHead', 'Detr3DHead','PETRHead','PETRv2Head'] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/dense_heads/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/dense_heads/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/dense_heads/__pycache__/detr3d_head.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/dense_heads/__pycache__/detr3d_head.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/dense_heads/__pycache__/dgcnn3d_head.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/dense_heads/__pycache__/dgcnn3d_head.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/dense_heads/__pycache__/petr_head.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/dense_heads/__pycache__/petr_head.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/dense_heads/__pycache__/petrv2_head.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/dense_heads/__pycache__/petrv2_head.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 8 | # Copyright (c) OpenMMLab. All rights reserved. 9 | # ------------------------------------------------------------------------ 10 | from .obj_dgcnn import ObjDGCNN 11 | from .detr3d import Detr3D 12 | from .petr3d import Petr3D 13 | __all__ = ['ObjDGCNN', 'Detr3D', 'Petr3D'] 14 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/detectors/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/detectors/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/detectors/__pycache__/detr3d.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/detectors/__pycache__/detr3d.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/detectors/__pycache__/obj_dgcnn.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/detectors/__pycache__/obj_dgcnn.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/detectors/__pycache__/petr3d.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/detectors/__pycache__/petr3d.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/detectors/obj_dgcnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.models import DETECTORS 4 | from mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d 5 | from mmdet3d.models.detectors.mvx_two_stage import MVXTwoStageDetector 6 | 7 | 8 | @DETECTORS.register_module() 9 | class ObjDGCNN(MVXTwoStageDetector): 10 | """Base class of Multi-modality VoxelNet.""" 11 | 12 | def __init__(self, 13 | pts_voxel_layer=None, 14 | pts_voxel_encoder=None, 15 | pts_middle_encoder=None, 16 | pts_fusion_layer=None, 17 | img_backbone=None, 18 | pts_backbone=None, 19 | img_neck=None, 20 | pts_neck=None, 21 | pts_bbox_head=None, 22 | img_roi_head=None, 23 | img_rpn_head=None, 24 | train_cfg=None, 25 | test_cfg=None, 26 | pretrained=None): 27 | super(ObjDGCNN, 28 | self).__init__(pts_voxel_layer, pts_voxel_encoder, 29 | pts_middle_encoder, pts_fusion_layer, 30 | img_backbone, pts_backbone, img_neck, pts_neck, 31 | pts_bbox_head, img_roi_head, img_rpn_head, 32 | train_cfg, test_cfg, pretrained) 33 | 34 | def extract_pts_feat(self, pts, img_feats, img_metas): 35 | """Extract features of points.""" 36 | if not self.with_pts_bbox: 37 | return None 38 | 39 | voxels, num_points, coors = self.voxelize(pts) 40 | 41 | voxel_features = self.pts_voxel_encoder(voxels, num_points, coors) 42 | batch_size = coors[-1, 0] + 1 43 | x = self.pts_middle_encoder(voxel_features, coors, batch_size) 44 | x = self.pts_backbone(x) 45 | if self.with_pts_neck: 46 | x = self.pts_neck(x) 47 | return x 48 | 49 | def forward_pts_train(self, 50 | pts_feats, 51 | gt_bboxes_3d, 52 | gt_labels_3d, 53 | img_metas, 54 | gt_bboxes_ignore=None): 55 | """Forward function for point cloud branch. 56 | Args: 57 | pts_feats (list[torch.Tensor]): Features of point cloud branch 58 | gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth 59 | boxes for each sample. 60 | gt_labels_3d (list[torch.Tensor]): Ground truth labels for 61 | boxes of each sampole 62 | img_metas (list[dict]): Meta information of samples. 63 | gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth 64 | boxes to be ignored. Defaults to None. 65 | Returns: 66 | dict: Losses of each branch. 67 | """ 68 | outs = self.pts_bbox_head(pts_feats) 69 | loss_inputs = [gt_bboxes_3d, gt_labels_3d, outs] 70 | losses = self.pts_bbox_head.loss(*loss_inputs) 71 | return losses 72 | 73 | def simple_test_pts(self, x, img_metas, rescale=False): 74 | """Test function of point cloud branch.""" 75 | outs = self.pts_bbox_head(x) 76 | bbox_list = self.pts_bbox_head.get_bboxes( 77 | outs, img_metas, rescale=rescale) 78 | bbox_results = [ 79 | bbox3d2result(bboxes, scores, labels) 80 | for bboxes, scores, labels in bbox_list 81 | ] 82 | return bbox_results 83 | 84 | def aug_test_pts(self, feats, img_metas, rescale=False): 85 | """Test function of point cloud branch with augmentaiton. 86 | The function implementation process is as follows: 87 | - step 1: map features back for double-flip augmentation. 88 | - step 2: merge all features and generate boxes. 89 | - step 3: map boxes back for scale augmentation. 90 | - step 4: merge results. 91 | Args: 92 | feats (list[torch.Tensor]): Feature of point cloud. 93 | img_metas (list[dict]): Meta information of samples. 94 | rescale (bool): Whether to rescale bboxes. Default: False. 95 | Returns: 96 | dict: Returned bboxes consists of the following keys: 97 | - boxes_3d (:obj:`LiDARInstance3DBoxes`): Predicted bboxes. 98 | - scores_3d (torch.Tensor): Scores of predicted boxes. 99 | - labels_3d (torch.Tensor): Labels of predicted boxes. 100 | """ 101 | # only support aug_test for one sample 102 | outs_list = [] 103 | for x, img_meta in zip(feats, img_metas): 104 | outs = self.pts_bbox_head(x[0]) 105 | # merge augmented outputs before decoding bboxes 106 | for task_id, out in enumerate(outs): 107 | for key in out[0].keys(): 108 | if img_meta[0]['pcd_horizontal_flip']: 109 | outs[task_id][0][key] = torch.flip( 110 | outs[task_id][0][key], dims=[2]) 111 | if key == 'reg': 112 | outs[task_id][0][key][:, 1, ...] = 1 - outs[ 113 | task_id][0][key][:, 1, ...] 114 | elif key == 'rot': 115 | outs[task_id][0][ 116 | key][:, 1, 117 | ...] = -outs[task_id][0][key][:, 1, ...] 118 | elif key == 'vel': 119 | outs[task_id][0][ 120 | key][:, 1, 121 | ...] = -outs[task_id][0][key][:, 1, ...] 122 | if img_meta[0]['pcd_vertical_flip']: 123 | outs[task_id][0][key] = torch.flip( 124 | outs[task_id][0][key], dims=[3]) 125 | if key == 'reg': 126 | outs[task_id][0][key][:, 0, ...] = 1 - outs[ 127 | task_id][0][key][:, 0, ...] 128 | elif key == 'rot': 129 | outs[task_id][0][ 130 | key][:, 0, 131 | ...] = -outs[task_id][0][key][:, 0, ...] 132 | elif key == 'vel': 133 | outs[task_id][0][ 134 | key][:, 0, 135 | ...] = -outs[task_id][0][key][:, 0, ...] 136 | 137 | outs_list.append(outs) 138 | 139 | preds_dicts = dict() 140 | scale_img_metas = [] 141 | 142 | # concat outputs sharing the same pcd_scale_factor 143 | for i, (img_meta, outs) in enumerate(zip(img_metas, outs_list)): 144 | pcd_scale_factor = img_meta[0]['pcd_scale_factor'] 145 | if pcd_scale_factor not in preds_dicts.keys(): 146 | preds_dicts[pcd_scale_factor] = outs 147 | scale_img_metas.append(img_meta) 148 | else: 149 | for task_id, out in enumerate(outs): 150 | for key in out[0].keys(): 151 | preds_dicts[pcd_scale_factor][task_id][0][key] += out[ 152 | 0][key] 153 | 154 | aug_bboxes = [] 155 | 156 | for pcd_scale_factor, preds_dict in preds_dicts.items(): 157 | for task_id, pred_dict in enumerate(preds_dict): 158 | # merge outputs with different flips before decoding bboxes 159 | for key in pred_dict[0].keys(): 160 | preds_dict[task_id][0][key] /= len(outs_list) / len( 161 | preds_dicts.keys()) 162 | bbox_list = self.pts_bbox_head.get_bboxes( 163 | preds_dict, img_metas[0], rescale=rescale) 164 | bbox_list = [ 165 | dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels) 166 | for bboxes, scores, labels in bbox_list 167 | ] 168 | aug_bboxes.append(bbox_list[0]) 169 | 170 | if len(preds_dicts.keys()) > 1: 171 | # merge outputs with different scales after decoding bboxes 172 | merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, scale_img_metas, 173 | self.pts_bbox_head.test_cfg) 174 | return merged_bboxes 175 | else: 176 | for key in bbox_list[0].keys(): 177 | bbox_list[0][key] = bbox_list[0][key].to('cpu') 178 | return bbox_list[0] 179 | 180 | def aug_test(self, points, img_metas, imgs=None, rescale=False): 181 | """Test function with augmentaiton.""" 182 | img_feats, pts_feats = self.extract_feats(points, img_metas, imgs) 183 | bbox_list = dict() 184 | if pts_feats and self.with_pts_bbox: 185 | pts_bbox = self.aug_test_pts(pts_feats, img_metas, rescale) 186 | bbox_list.update(pts_bbox=pts_bbox) 187 | return [bbox_list] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from mmdetection (https://github.com/open-mmlab/mmdetection) 5 | # Copyright (c) OpenMMLab. All rights reserved. 6 | # ------------------------------------------------------------------------ 7 | from .cp_fpn import CPFPN 8 | __all__ = ['CPFPN'] 9 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/necks/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/necks/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/necks/__pycache__/cp_fpn.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/necks/__pycache__/cp_fpn.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/necks/cp_fpn.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from mmdetection (https://github.com/open-mmlab/mmdetection) 5 | # Copyright (c) OpenMMLab. All rights reserved. 6 | # ------------------------------------------------------------------------ 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from mmcv.cnn import ConvModule 10 | from mmcv.runner import BaseModule, auto_fp16 11 | 12 | from mmdet.models import NECKS 13 | 14 | ####This FPN remove the unused parameters which can used with checkpoint (with_cp = True in Backbone) 15 | @NECKS.register_module() 16 | class CPFPN(BaseModule): 17 | r"""Feature Pyramid Network. 18 | 19 | This is an implementation of paper `Feature Pyramid Networks for Object 20 | Detection `_. 21 | 22 | Args: 23 | in_channels (List[int]): Number of input channels per scale. 24 | out_channels (int): Number of output channels (used at each scale) 25 | num_outs (int): Number of output scales. 26 | start_level (int): Index of the start input backbone level used to 27 | build the feature pyramid. Default: 0. 28 | end_level (int): Index of the end input backbone level (exclusive) to 29 | build the feature pyramid. Default: -1, which means the last level. 30 | add_extra_convs (bool | str): If bool, it decides whether to add conv 31 | layers on top of the original feature maps. Default to False. 32 | If True, it is equivalent to `add_extra_convs='on_input'`. 33 | If str, it specifies the source feature map of the extra convs. 34 | Only the following options are allowed 35 | 36 | - 'on_input': Last feat map of neck inputs (i.e. backbone feature). 37 | - 'on_lateral': Last feature map after lateral convs. 38 | - 'on_output': The last output feature map after fpn convs. 39 | relu_before_extra_convs (bool): Whether to apply relu before the extra 40 | conv. Default: False. 41 | no_norm_on_lateral (bool): Whether to apply norm on lateral. 42 | Default: False. 43 | conv_cfg (dict): Config dict for convolution layer. Default: None. 44 | norm_cfg (dict): Config dict for normalization layer. Default: None. 45 | act_cfg (str): Config dict for activation layer in ConvModule. 46 | Default: None. 47 | upsample_cfg (dict): Config dict for interpolate layer. 48 | Default: `dict(mode='nearest')` 49 | init_cfg (dict or list[dict], optional): Initialization config dict. 50 | 51 | Example: 52 | >>> import torch 53 | >>> in_channels = [2, 3, 5, 7] 54 | >>> scales = [340, 170, 84, 43] 55 | >>> inputs = [torch.rand(1, c, s, s) 56 | ... for c, s in zip(in_channels, scales)] 57 | >>> self = FPN(in_channels, 11, len(in_channels)).eval() 58 | >>> outputs = self.forward(inputs) 59 | >>> for i in range(len(outputs)): 60 | ... print(f'outputs[{i}].shape = {outputs[i].shape}') 61 | outputs[0].shape = torch.Size([1, 11, 340, 340]) 62 | outputs[1].shape = torch.Size([1, 11, 170, 170]) 63 | outputs[2].shape = torch.Size([1, 11, 84, 84]) 64 | outputs[3].shape = torch.Size([1, 11, 43, 43]) 65 | """ 66 | 67 | def __init__(self, 68 | in_channels, 69 | out_channels, 70 | num_outs, 71 | start_level=0, 72 | end_level=-1, 73 | add_extra_convs=False, 74 | relu_before_extra_convs=False, 75 | no_norm_on_lateral=False, 76 | conv_cfg=None, 77 | norm_cfg=None, 78 | act_cfg=None, 79 | upsample_cfg=dict(mode='nearest'), 80 | init_cfg=dict( 81 | type='Xavier', layer='Conv2d', distribution='uniform')): 82 | super(CPFPN, self).__init__(init_cfg) 83 | assert isinstance(in_channels, list) 84 | self.in_channels = in_channels 85 | self.out_channels = out_channels 86 | self.num_ins = len(in_channels) 87 | self.num_outs = num_outs 88 | self.relu_before_extra_convs = relu_before_extra_convs 89 | self.no_norm_on_lateral = no_norm_on_lateral 90 | self.fp16_enabled = False 91 | self.upsample_cfg = upsample_cfg.copy() 92 | 93 | if end_level == -1: 94 | self.backbone_end_level = self.num_ins 95 | assert num_outs >= self.num_ins - start_level 96 | else: 97 | # if end_level < inputs, no extra level is allowed 98 | self.backbone_end_level = end_level 99 | assert end_level <= len(in_channels) 100 | assert num_outs == end_level - start_level 101 | self.start_level = start_level 102 | self.end_level = end_level 103 | self.add_extra_convs = add_extra_convs 104 | assert isinstance(add_extra_convs, (str, bool)) 105 | if isinstance(add_extra_convs, str): 106 | # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' 107 | assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') 108 | elif add_extra_convs: # True 109 | self.add_extra_convs = 'on_input' 110 | 111 | self.lateral_convs = nn.ModuleList() 112 | self.fpn_convs = nn.ModuleList() 113 | 114 | for i in range(self.start_level, self.backbone_end_level): 115 | l_conv = ConvModule( 116 | in_channels[i], 117 | out_channels, 118 | 1, 119 | conv_cfg=conv_cfg, 120 | norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, 121 | act_cfg=act_cfg, 122 | inplace=False) 123 | self.lateral_convs.append(l_conv) 124 | if i == 0 : 125 | fpn_conv = ConvModule( 126 | out_channels, 127 | out_channels, 128 | 3, 129 | padding=1, 130 | conv_cfg=conv_cfg, 131 | norm_cfg=norm_cfg, 132 | act_cfg=act_cfg, 133 | inplace=False) 134 | self.fpn_convs.append(fpn_conv) 135 | 136 | # add extra conv layers (e.g., RetinaNet) 137 | extra_levels = num_outs - self.backbone_end_level + self.start_level 138 | if self.add_extra_convs and extra_levels >= 1: 139 | for i in range(extra_levels): 140 | if i == 0 and self.add_extra_convs == 'on_input': 141 | in_channels = self.in_channels[self.backbone_end_level - 1] 142 | else: 143 | in_channels = out_channels 144 | extra_fpn_conv = ConvModule( 145 | in_channels, 146 | out_channels, 147 | 3, 148 | stride=2, 149 | padding=1, 150 | conv_cfg=conv_cfg, 151 | norm_cfg=norm_cfg, 152 | act_cfg=act_cfg, 153 | inplace=False) 154 | self.fpn_convs.append(extra_fpn_conv) 155 | 156 | # @auto_fp16() 157 | def forward(self, inputs): 158 | """Forward function.""" 159 | assert len(inputs) == len(self.in_channels) 160 | 161 | # build laterals 162 | laterals = [ 163 | lateral_conv(inputs[i + self.start_level]) 164 | for i, lateral_conv in enumerate(self.lateral_convs) 165 | ] 166 | 167 | # build top-down path 168 | used_backbone_levels = len(laterals) 169 | for i in range(used_backbone_levels - 1, 0, -1): 170 | # In some cases, fixing `scale factor` (e.g. 2) is preferred, but 171 | # it cannot co-exist with `size` in `F.interpolate`. 172 | if 'scale_factor' in self.upsample_cfg: 173 | laterals[i - 1] += F.interpolate(laterals[i], 174 | **self.upsample_cfg) 175 | else: 176 | prev_shape = laterals[i - 1].shape[2:] 177 | laterals[i - 1] += F.interpolate( 178 | laterals[i], size=prev_shape, **self.upsample_cfg) 179 | 180 | # build outputs 181 | # part 1: from original levels 182 | outs = [ 183 | self.fpn_convs[i](laterals[i]) if i==0 else laterals[i] for i in range(used_backbone_levels) 184 | ] 185 | # part 2: add extra levels 186 | if self.num_outs > len(outs): 187 | # use max pool to get more levels on top of outputs 188 | # (e.g., Faster R-CNN, Mask R-CNN) 189 | if not self.add_extra_convs: 190 | for i in range(self.num_outs - used_backbone_levels): 191 | outs.append(F.max_pool2d(outs[-1], 1, stride=2)) 192 | # add conv layers on top of original feature maps (RetinaNet) 193 | else: 194 | if self.add_extra_convs == 'on_input': 195 | extra_source = inputs[self.backbone_end_level - 1] 196 | elif self.add_extra_convs == 'on_lateral': 197 | extra_source = laterals[-1] 198 | elif self.add_extra_convs == 'on_output': 199 | extra_source = outs[-1] 200 | else: 201 | raise NotImplementedError 202 | outs.append(self.fpn_convs[used_backbone_levels](extra_source)) 203 | for i in range(used_backbone_levels + 1, self.num_outs): 204 | if self.relu_before_extra_convs: 205 | outs.append(self.fpn_convs[i](F.relu(outs[-1]))) 206 | else: 207 | outs.append(self.fpn_convs[i](outs[-1])) 208 | return tuple(outs) 209 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | from .dgcnn_attn import DGCNNAttn 8 | from .detr import Deformable3DDetrTransformerDecoder 9 | from .detr3d_transformer import Detr3DTransformer, Detr3DTransformerDecoder, Detr3DCrossAtten 10 | from .positional_encoding import SinePositionalEncoding3D, LearnedPositionalEncoding3D 11 | from .petr_transformer import PETRTransformer, PETRMultiheadAttention, PETRTransformerEncoder, PETRTransformerDecoder 12 | 13 | __all__ = ['DGCNNAttn', 'Deformable3DDetrTransformerDecoder', 14 | 'Detr3DTransformer', 'Detr3DTransformerDecoder', 'Detr3DCrossAtten' 15 | 'SinePositionalEncoding3D', 'LearnedPositionalEncoding3D', 16 | 'PETRTransformer', 'PETRMultiheadAttention', 17 | 'PETRTransformerEncoder', 'PETRTransformerDecoder' 18 | ] 19 | 20 | 21 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__pycache__/detr.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/utils/__pycache__/detr.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__pycache__/detr3d_transformer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/utils/__pycache__/detr3d_transformer.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__pycache__/dgcnn_attn.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/utils/__pycache__/dgcnn_attn.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__pycache__/grid_mask.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/utils/__pycache__/grid_mask.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__pycache__/petr_transformer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/utils/__pycache__/petr_transformer.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__pycache__/positional_encoding.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/mmdet3d_plugin/models/utils/__pycache__/positional_encoding.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/detr.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 8 | # Copyright (c) OpenMMLab. All rights reserved. 9 | # ------------------------------------------------------------------------ 10 | import torch 11 | from mmcv.cnn.bricks.registry import TRANSFORMER_LAYER_SEQUENCE 12 | from mmcv.cnn.bricks.transformer import TransformerLayerSequence 13 | 14 | 15 | def inverse_sigmoid(x, eps=1e-5): 16 | """Inverse function of sigmoid. 17 | Args: 18 | x (Tensor): The tensor to do the 19 | inverse. 20 | eps (float): EPS avoid numerical 21 | overflow. Defaults 1e-5. 22 | Returns: 23 | Tensor: The x has passed the inverse 24 | function of sigmoid, has same 25 | shape with input. 26 | """ 27 | x = x.clamp(min=0, max=1) 28 | x1 = x.clamp(min=eps) 29 | x2 = (1 - x).clamp(min=eps) 30 | return torch.log(x1 / x2) 31 | 32 | 33 | @TRANSFORMER_LAYER_SEQUENCE.register_module() 34 | class Deformable3DDetrTransformerDecoder(TransformerLayerSequence): 35 | """Copy the decoder in DETR transformer. 36 | Args: 37 | return_intermediate (bool): Whether to return intermediate outputs. 38 | coder_norm_cfg (dict): Config of last normalization layer. Default: 39 | `LN`. 40 | """ 41 | 42 | def __init__(self, *args, return_intermediate=False, **kwargs): 43 | super(Deformable3DDetrTransformerDecoder, self).__init__(*args, **kwargs) 44 | self.return_intermediate = return_intermediate 45 | 46 | def forward(self, 47 | query, 48 | *args, 49 | reference_points=None, 50 | valid_ratios=None, 51 | reg_branches=None, 52 | **kwargs): 53 | """Forward function for `TransformerDecoder`. 54 | Args: 55 | query (Tensor): Input query with shape 56 | `(num_query, bs, embed_dims)`. 57 | reference_points (Tensor): The reference 58 | points of offset. has shape 59 | (bs, num_query, 4) when as_two_stage, 60 | otherwise has shape ((bs, num_query, 2). 61 | valid_ratios (Tensor): The radios of valid 62 | points on the feature map, has shape 63 | (bs, num_levels, 2) 64 | reg_branch: (obj:`nn.ModuleList`): Used for 65 | refining the regression results. Only would 66 | be passed when with_box_refine is True, 67 | otherwise would be passed a `None`. 68 | Returns: 69 | Tensor: Results with shape [1, num_query, bs, embed_dims] when 70 | return_intermediate is `False`, otherwise it has shape 71 | [num_layers, num_query, bs, embed_dims]. 72 | """ 73 | output = query 74 | intermediate = [] 75 | intermediate_reference_points = [] 76 | for lid, layer in enumerate(self.layers): 77 | if reference_points.shape[-1] == 4: 78 | reference_points_input = reference_points[:, :, None] * \ 79 | torch.cat([valid_ratios, valid_ratios], -1)[:, None] 80 | else: 81 | assert reference_points.shape[-1] == 2 82 | reference_points_input = reference_points[:, :, None] * \ 83 | valid_ratios[:, None] 84 | output = layer( 85 | output, 86 | *args, 87 | reference_points=reference_points_input, 88 | **kwargs) 89 | output = output.permute(1, 0, 2) 90 | 91 | if reg_branches is not None: 92 | tmp = reg_branches[lid](output) 93 | if reference_points.shape[-1] == 4: 94 | new_reference_points = tmp + inverse_sigmoid( 95 | reference_points) 96 | new_reference_points = new_reference_points.sigmoid() 97 | else: 98 | assert reference_points.shape[-1] == 2 99 | # This is to deal with the different output number (10). 100 | # new_reference_points = tmp 101 | new_reference_points = tmp[ 102 | ..., :2] + inverse_sigmoid(reference_points) 103 | new_reference_points = new_reference_points.sigmoid() 104 | reference_points = new_reference_points.detach() 105 | 106 | output = output.permute(1, 0, 2) 107 | if self.return_intermediate: 108 | intermediate.append(output) 109 | intermediate_reference_points.append(reference_points) 110 | 111 | if self.return_intermediate: 112 | return torch.stack(intermediate), torch.stack( 113 | intermediate_reference_points) 114 | 115 | return output, reference_points 116 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/dgcnn_attn.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | from mmcv.cnn.bricks.registry import ATTENTION 6 | from mmcv.runner.base_module import BaseModule 7 | 8 | 9 | @ATTENTION.register_module() 10 | class DGCNNAttn(BaseModule): 11 | """A warpper for DGCNN-type self-attention. 12 | Args: 13 | embed_dims (int): The embedding dimension. 14 | num_heads (int): Parallel attention heads. Same as 15 | `nn.MultiheadAttention`. 16 | dropout (float):w A Dropout layer on attn_output_weights. Default: 0.. 17 | init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. 18 | Default: None. 19 | """ 20 | 21 | def __init__(self, 22 | embed_dims, 23 | num_heads, 24 | dropout=0., 25 | init_cfg=None, 26 | **kwargs): 27 | super(DGCNNAttn, self).__init__(init_cfg) 28 | self.embed_dims = embed_dims 29 | self.num_heads = num_heads 30 | self.dropout = dropout 31 | self.conv1 = nn.Sequential(nn.Conv2d(self.embed_dims*2, self.embed_dims, kernel_size=1, bias=False), 32 | nn.BatchNorm2d(self.embed_dims), 33 | nn.ReLU(inplace=True)) 34 | self.conv2 = nn.Sequential(nn.Conv2d(self.embed_dims*2, self.embed_dims, kernel_size=1, bias=False), 35 | nn.BatchNorm2d(self.embed_dims), 36 | nn.ReLU(inplace=True)) 37 | self.K = kwargs['K'] 38 | self.dropout = nn.Dropout(dropout) 39 | 40 | def forward(self, 41 | query, 42 | key=None, 43 | value=None, 44 | residual=None, 45 | query_pos=None, 46 | key_pos=None, 47 | attn_mask=None, 48 | key_padding_mask=None, 49 | **kwargs): 50 | """Forward function for `DGCNN`. 51 | **kwargs allow passing a more general data flow when combining 52 | with other operations in `DGCNN`. 53 | Args: 54 | query (Tensor): The input query with shape [num_queries, bs, 55 | embed_dims]. Same in `nn.MultiheadAttention.forward`. 56 | residual (Tensor): This tensor, with the same shape as x, 57 | will be used for the residual link. 58 | If None, `x` will be used. Defaults to None. 59 | query_pos (Tensor): The positional encoding for query, with 60 | the same shape as `x`. If not None, it will 61 | be added to `x` before forward function. Defaults to None. 62 | Returns: 63 | Tensor: forwarded results with shape [num_queries, bs, embed_dims]. 64 | """ 65 | if residual is None: 66 | residual = query 67 | if query_pos is not None: 68 | query = query + query_pos 69 | 70 | query = query.permute(1, 0, 2) # [bs, num_queries, embed_dims] 71 | edge_feats = self.edge_feats(query, K=self.K) 72 | edge_feats1 = self.conv1(edge_feats) 73 | edge_feats1 = edge_feats1.max(dim=-1)[0] 74 | out = edge_feats1 75 | edge_feats1 = self.edge_feats(edge_feats1.permute(0, 2, 1)) 76 | edge_feats2 = self.conv2(edge_feats1) 77 | edge_feats2 = edge_feats2.max(dim=-1)[0] 78 | out = out + edge_feats2 79 | out = out.permute(2, 0, 1) 80 | return residual + self.dropout(out) 81 | 82 | def edge_feats(self, query, K=16): 83 | # (B, N, N) 84 | affinity = torch.cdist(query, query) 85 | # (B, N, K) 86 | _, topk = torch.topk(affinity, k=K, dim=2) 87 | B, N, C = query.size() 88 | 89 | idx_base = torch.arange(0, B, device=query.device).view(-1, 1, 1) * N 90 | idx = topk + idx_base 91 | idx = idx.view(-1) 92 | query = query.reshape(B*N, C) 93 | query_neighbor = query[idx, :].view(B, N, K, C) 94 | query = query.reshape(B, N, 1, C).repeat(1, 1, K, 1) 95 | out = torch.cat((query_neighbor, query), dim=-1).permute(0, 3, 1, 2).contiguous() 96 | return out 97 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/grid_mask.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from PIL import Image 5 | 6 | class Grid(object): 7 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.): 8 | self.use_h = use_h 9 | self.use_w = use_w 10 | self.rotate = rotate 11 | self.offset = offset 12 | self.ratio = ratio 13 | self.mode=mode 14 | self.st_prob = prob 15 | self.prob = prob 16 | 17 | def set_prob(self, epoch, max_epoch): 18 | self.prob = self.st_prob * epoch / max_epoch 19 | 20 | def __call__(self, img, label): 21 | if np.random.rand() > self.prob: 22 | return img, label 23 | h = img.size(1) 24 | w = img.size(2) 25 | self.d1 = 2 26 | self.d2 = min(h, w) 27 | hh = int(1.5*h) 28 | ww = int(1.5*w) 29 | d = np.random.randint(self.d1, self.d2) 30 | if self.ratio == 1: 31 | self.l = np.random.randint(1, d) 32 | else: 33 | self.l = min(max(int(d*self.ratio+0.5),1),d-1) 34 | mask = np.ones((hh, ww), np.float32) 35 | st_h = np.random.randint(d) 36 | st_w = np.random.randint(d) 37 | if self.use_h: 38 | for i in range(hh//d): 39 | s = d*i + st_h 40 | t = min(s+self.l, hh) 41 | mask[s:t,:] *= 0 42 | if self.use_w: 43 | for i in range(ww//d): 44 | s = d*i + st_w 45 | t = min(s+self.l, ww) 46 | mask[:,s:t] *= 0 47 | 48 | r = np.random.randint(self.rotate) 49 | mask = Image.fromarray(np.uint8(mask)) 50 | mask = mask.rotate(r) 51 | mask = np.asarray(mask) 52 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] 53 | 54 | mask = torch.from_numpy(mask).float() 55 | if self.mode == 1: 56 | mask = 1-mask 57 | 58 | mask = mask.expand_as(img) 59 | if self.offset: 60 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float() 61 | offset = (1 - mask) * offset 62 | img = img * mask + offset 63 | else: 64 | img = img * mask 65 | 66 | return img, label 67 | 68 | 69 | class GridMask(nn.Module): 70 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.): 71 | super(GridMask, self).__init__() 72 | self.use_h = use_h 73 | self.use_w = use_w 74 | self.rotate = rotate 75 | self.offset = offset 76 | self.ratio = ratio 77 | self.mode = mode 78 | self.st_prob = prob 79 | self.prob = prob 80 | 81 | def set_prob(self, epoch, max_epoch): 82 | self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5 83 | 84 | def forward(self, x): 85 | if np.random.rand() > self.prob or not self.training: 86 | return x 87 | n,c,h,w = x.size() 88 | x = x.view(-1,h,w) 89 | hh = int(1.5*h) 90 | ww = int(1.5*w) 91 | d = np.random.randint(2, h) 92 | self.l = min(max(int(d*self.ratio+0.5),1),d-1) 93 | mask = np.ones((hh, ww), np.float32) 94 | st_h = np.random.randint(d) 95 | st_w = np.random.randint(d) 96 | if self.use_h: 97 | for i in range(hh//d): 98 | s = d*i + st_h 99 | t = min(s+self.l, hh) 100 | mask[s:t,:] *= 0 101 | if self.use_w: 102 | for i in range(ww//d): 103 | s = d*i + st_w 104 | t = min(s+self.l, ww) 105 | mask[:,s:t] *= 0 106 | 107 | r = np.random.randint(self.rotate) 108 | mask = Image.fromarray(np.uint8(mask)) 109 | mask = mask.rotate(r) 110 | mask = np.asarray(mask) 111 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] 112 | 113 | mask = torch.from_numpy(mask).float().cuda() 114 | if self.mode == 1: 115 | mask = 1-mask 116 | mask = mask.expand_as(x) 117 | if self.offset: 118 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float().cuda() 119 | x = x * mask + offset * (1 - mask) 120 | else: 121 | x = x * mask 122 | 123 | return x.view(n,c,h,w) -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/positional_encoding.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from mmdetection (https://github.com/open-mmlab/mmdetection) 5 | # Copyright (c) OpenMMLab. All rights reserved. 6 | # ------------------------------------------------------------------------ 7 | import math 8 | 9 | import torch 10 | import torch.nn as nn 11 | from mmcv.cnn.bricks.transformer import POSITIONAL_ENCODING 12 | from mmcv.runner import BaseModule 13 | 14 | @POSITIONAL_ENCODING.register_module() 15 | class SinePositionalEncoding3D(BaseModule): 16 | """Position encoding with sine and cosine functions. 17 | See `End-to-End Object Detection with Transformers 18 | `_ for details. 19 | Args: 20 | num_feats (int): The feature dimension for each position 21 | along x-axis or y-axis. Note the final returned dimension 22 | for each position is 2 times of this value. 23 | temperature (int, optional): The temperature used for scaling 24 | the position embedding. Defaults to 10000. 25 | normalize (bool, optional): Whether to normalize the position 26 | embedding. Defaults to False. 27 | scale (float, optional): A scale factor that scales the position 28 | embedding. The scale will be used only when `normalize` is True. 29 | Defaults to 2*pi. 30 | eps (float, optional): A value added to the denominator for 31 | numerical stability. Defaults to 1e-6. 32 | offset (float): offset add to embed when do the normalization. 33 | Defaults to 0. 34 | init_cfg (dict or list[dict], optional): Initialization config dict. 35 | Default: None 36 | """ 37 | 38 | def __init__(self, 39 | num_feats, 40 | temperature=10000, 41 | normalize=False, 42 | scale=2 * math.pi, 43 | eps=1e-6, 44 | offset=0., 45 | init_cfg=None): 46 | super(SinePositionalEncoding3D, self).__init__(init_cfg) 47 | if normalize: 48 | assert isinstance(scale, (float, int)), 'when normalize is set,' \ 49 | 'scale should be provided and in float or int type, ' \ 50 | f'found {type(scale)}' 51 | self.num_feats = num_feats 52 | self.temperature = temperature 53 | self.normalize = normalize 54 | self.scale = scale 55 | self.eps = eps 56 | self.offset = offset 57 | 58 | def forward(self, mask): 59 | """Forward function for `SinePositionalEncoding`. 60 | Args: 61 | mask (Tensor): ByteTensor mask. Non-zero values representing 62 | ignored positions, while zero values means valid positions 63 | for this image. Shape [bs, h, w]. 64 | Returns: 65 | pos (Tensor): Returned position embedding with shape 66 | [bs, num_feats*2, h, w]. 67 | """ 68 | # For convenience of exporting to ONNX, it's required to convert 69 | # `masks` from bool to int. 70 | mask = mask.to(torch.int) 71 | not_mask = 1 - mask # logical_not 72 | n_embed = not_mask.cumsum(1, dtype=torch.float32) 73 | y_embed = not_mask.cumsum(2, dtype=torch.float32) 74 | x_embed = not_mask.cumsum(3, dtype=torch.float32) 75 | if self.normalize: 76 | n_embed = (n_embed + self.offset) / \ 77 | (n_embed[:, -1:, :, :] + self.eps) * self.scale 78 | y_embed = (y_embed + self.offset) / \ 79 | (y_embed[:, :, -1:, :] + self.eps) * self.scale 80 | x_embed = (x_embed + self.offset) / \ 81 | (x_embed[:, :, :, -1:] + self.eps) * self.scale 82 | dim_t = torch.arange( 83 | self.num_feats, dtype=torch.float32, device=mask.device) 84 | dim_t = self.temperature**(2 * (dim_t // 2) / self.num_feats) 85 | pos_n = n_embed[:, :, :, :, None] / dim_t 86 | pos_x = x_embed[:, :, :, :, None] / dim_t 87 | pos_y = y_embed[:, :, :, :, None] / dim_t 88 | # use `view` instead of `flatten` for dynamically exporting to ONNX 89 | B, N, H, W = mask.size() 90 | pos_n = torch.stack( 91 | (pos_n[:, :, :, :, 0::2].sin(), pos_n[:, :, :, :, 1::2].cos()), 92 | dim=4).view(B, N, H, W, -1) 93 | pos_x = torch.stack( 94 | (pos_x[:, :, :, :, 0::2].sin(), pos_x[:, :, :, :, 1::2].cos()), 95 | dim=4).view(B, N, H, W, -1) 96 | pos_y = torch.stack( 97 | (pos_y[:, :, :, :, 0::2].sin(), pos_y[:, :, :, :, 1::2].cos()), 98 | dim=4).view(B, N, H, W, -1) 99 | pos = torch.cat((pos_n, pos_y, pos_x), dim=4).permute(0, 1, 4, 2, 3) 100 | return pos 101 | 102 | def __repr__(self): 103 | """str: a string that describes the module""" 104 | repr_str = self.__class__.__name__ 105 | repr_str += f'(num_feats={self.num_feats}, ' 106 | repr_str += f'temperature={self.temperature}, ' 107 | repr_str += f'normalize={self.normalize}, ' 108 | repr_str += f'scale={self.scale}, ' 109 | repr_str += f'eps={self.eps})' 110 | return repr_str 111 | 112 | 113 | @POSITIONAL_ENCODING.register_module() 114 | class LearnedPositionalEncoding3D(BaseModule): 115 | """Position embedding with learnable embedding weights. 116 | Args: 117 | num_feats (int): The feature dimension for each position 118 | along x-axis or y-axis. The final returned dimension for 119 | each position is 2 times of this value. 120 | row_num_embed (int, optional): The dictionary size of row embeddings. 121 | Default 50. 122 | col_num_embed (int, optional): The dictionary size of col embeddings. 123 | Default 50. 124 | init_cfg (dict or list[dict], optional): Initialization config dict. 125 | """ 126 | 127 | def __init__(self, 128 | num_feats, 129 | row_num_embed=50, 130 | col_num_embed=50, 131 | init_cfg=dict(type='Uniform', layer='Embedding')): 132 | super(LearnedPositionalEncoding3D, self).__init__(init_cfg) 133 | self.row_embed = nn.Embedding(row_num_embed, num_feats) 134 | self.col_embed = nn.Embedding(col_num_embed, num_feats) 135 | self.num_feats = num_feats 136 | self.row_num_embed = row_num_embed 137 | self.col_num_embed = col_num_embed 138 | 139 | def forward(self, mask): 140 | """Forward function for `LearnedPositionalEncoding`. 141 | Args: 142 | mask (Tensor): ByteTensor mask. Non-zero values representing 143 | ignored positions, while zero values means valid positions 144 | for this image. Shape [bs, h, w]. 145 | Returns: 146 | pos (Tensor): Returned position embedding with shape 147 | [bs, num_feats*2, h, w]. 148 | """ 149 | h, w = mask.shape[-2:] 150 | x = torch.arange(w, device=mask.device) 151 | y = torch.arange(h, device=mask.device) 152 | x_embed = self.col_embed(x) 153 | y_embed = self.row_embed(y) 154 | pos = torch.cat( 155 | (x_embed.unsqueeze(0).repeat(h, 1, 1), y_embed.unsqueeze(1).repeat( 156 | 1, w, 1)), 157 | dim=-1).permute(2, 0, 158 | 1).unsqueeze(0).repeat(mask.shape[0], 1, 1, 1) 159 | return pos 160 | 161 | def __repr__(self): 162 | """str: a string that describes the module""" 163 | repr_str = self.__class__.__name__ 164 | repr_str += f'(num_feats={self.num_feats}, ' 165 | repr_str += f'row_num_embed={self.row_num_embed}, ' 166 | repr_str += f'col_num_embed={self.col_num_embed})' 167 | return repr_str -------------------------------------------------------------------------------- /projects/tracking_plugin/__init__.py: -------------------------------------------------------------------------------- 1 | from .datasets import NuScenesTrackingDataset, FormatBundle3DTrack, ScaleMultiViewImage3D, TrackInstanceRangeFilter, TrackLoadAnnotations3D, \ 2 | TrackPadMultiViewImage, TrackNormalizeMultiviewImage, TrackResizeMultiview3D, TrackResizeCropFlipImage, TrackGlobalRotScaleTransImage 3 | from .models import Cam3DTracker, TrackingLossBase, TrackingLoss, DETR3DCamTrackingHead 4 | from .core.coder import TrackNMSFreeCoder 5 | -------------------------------------------------------------------------------- /projects/tracking_plugin/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/core/__init__.py -------------------------------------------------------------------------------- /projects/tracking_plugin/core/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/core/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/core/__pycache__/coder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/core/__pycache__/coder.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/core/__pycache__/instances.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/core/__pycache__/instances.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/core/coder.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2023 toyota research instutute. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 8 | # Copyright (c) OpenMMLab. All rights reserved. 9 | # ------------------------------------------------------------------------ 10 | import torch 11 | from mmdet.core.bbox import BaseBBoxCoder 12 | from mmdet.core.bbox.builder import BBOX_CODERS 13 | from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox 14 | import torch.nn.functional as F 15 | 16 | 17 | @BBOX_CODERS.register_module() 18 | class TrackNMSFreeCoder(BaseBBoxCoder): 19 | """Bbox coder for NMS-free detector. Including the fields for tracking 20 | Args: 21 | pc_range (list[float]): Range of point cloud. 22 | post_center_range (list[float]): Limit of the center. 23 | Default: None. 24 | max_num (int): Max number to be kept. Default: 100. 25 | score_threshold (float): Threshold to filter boxes based on score. 26 | Default: None. 27 | code_size (int): Code size of bboxes. Default: 9 28 | """ 29 | 30 | def __init__(self, 31 | pc_range, 32 | voxel_size=None, 33 | post_center_range=None, 34 | max_num=100, 35 | score_threshold=None, 36 | num_classes=10): 37 | 38 | self.pc_range = pc_range 39 | self.voxel_size = voxel_size 40 | self.post_center_range = post_center_range 41 | self.max_num = max_num 42 | self.score_threshold = score_threshold 43 | self.num_classes = num_classes 44 | 45 | def encode(self): 46 | pass 47 | 48 | def decode_single(self, cls_scores, bbox_preds, obj_idxes=None, track_scores=None, motion_forecasting=None, masks=None): 49 | """Decode bboxes. 50 | Args: 51 | cls_scores (Tensor): Outputs from the classification head, \ 52 | shape [num_query, cls_out_channels]. Note \ 53 | cls_out_channels should includes background. 54 | bbox_preds (Tensor): Outputs from the regression \ 55 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 56 | Shape [num_query, 9]. 57 | obj_idxes (Tensor): The idxes of the track instances 58 | track_scores (Tensor): The scores of the bbox 59 | motion_forecasting (Tensor): The predicted trajectories, [num_query, T, 2] 60 | all_masks (Tensor): The masks for valid query output 61 | Returns: 62 | list[dict]: Decoded boxes. 63 | """ 64 | max_num = self.max_num 65 | cls_scores = cls_scores.sigmoid() 66 | 67 | if masks is not None: 68 | cls_scores = cls_scores[masks] 69 | bbox_preds = bbox_preds[masks] 70 | obj_idxes = obj_idxes[masks] 71 | track_scores = track_scores[masks] 72 | if motion_forecasting is not None: 73 | motion_forecasting = motion_forecasting[masks] 74 | 75 | # tracking mode decode 76 | if obj_idxes is not None: 77 | _, indexs = cls_scores.max(dim=-1) 78 | labels = indexs % self.num_classes 79 | _, bbox_index = track_scores.topk(min(max_num, len(obj_idxes))) 80 | track_scores = track_scores[bbox_index] 81 | obj_idxes = obj_idxes[bbox_index] 82 | bbox_preds = bbox_preds[bbox_index] 83 | labels = labels[bbox_index] 84 | scores = track_scores 85 | if motion_forecasting is not None: 86 | motion_forecasting = motion_forecasting[bbox_index] 87 | # detection mode decode 88 | else: 89 | cls_scores_topk = cls_scores.view(-1) 90 | scores, indexs = cls_scores_topk.topk(min(max_num, cls_scores_topk.size(0))) 91 | labels = indexs % self.num_classes 92 | scores, indexs = cls_scores_topk.topk(min(max_num, cls_scores_topk.size(0))) 93 | labels = indexs % self.num_classes 94 | bbox_index = indexs // self.num_classes 95 | bbox_preds = bbox_preds[bbox_index] 96 | 97 | final_scores = scores 98 | final_box_preds = denormalize_bbox(bbox_preds, self.pc_range) 99 | final_preds = labels 100 | final_motion_forecasting = motion_forecasting 101 | 102 | # use score threshold 103 | if self.score_threshold is not None: 104 | thresh_mask = final_scores > self.score_threshold 105 | if self.post_center_range is not None: 106 | self.post_center_range = torch.tensor(self.post_center_range, device=scores.device) 107 | 108 | mask = (final_box_preds[..., :3] >= 109 | self.post_center_range[:3]).all(1) 110 | mask &= (final_box_preds[..., :3] <= 111 | self.post_center_range[3:]).all(1) 112 | 113 | if self.score_threshold: 114 | mask &= thresh_mask 115 | 116 | boxes3d = final_box_preds[mask] 117 | scores = final_scores[mask] 118 | labels = final_preds[mask] 119 | if final_motion_forecasting is not None: 120 | motion_forecasting = final_motion_forecasting[mask] 121 | if obj_idxes is not None: 122 | track_scores = track_scores[mask] 123 | obj_idxes = obj_idxes[mask] 124 | 125 | predictions_dict = { 126 | 'bboxes': boxes3d, 127 | 'scores': scores, 128 | 'labels': labels, 129 | 'track_scores': track_scores, 130 | 'obj_idxes': obj_idxes, 131 | 'forecasting': motion_forecasting 132 | } 133 | 134 | else: 135 | raise NotImplementedError( 136 | 'Need to reorganize output as a batch, only ' 137 | 'support post_center_range is not None for now!') 138 | return predictions_dict 139 | 140 | def decode(self, preds_dicts): 141 | """Decode bboxes. 142 | Args: 143 | all_cls_scores (Tensor): Outputs from the classification head, \ 144 | shape [nb_dec, bs, num_query, cls_out_channels]. Note \ 145 | cls_out_channels should includes background. 146 | all_bbox_preds (Tensor): Sigmoid outputs from the regression \ 147 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 148 | Shape [nb_dec, bs, num_query, 9]. 149 | track_instances (Instances): Instances containing track information. 150 | Available for tracking evaluation. 151 | Returns: 152 | list[dict]: Decoded boxes. 153 | """ 154 | all_cls_scores = preds_dicts['all_cls_scores'][-1].clone() 155 | all_bbox_preds = preds_dicts['all_bbox_preds'][-1].clone() 156 | 157 | batch_size = all_cls_scores.size()[0] 158 | if 'track_instances' in preds_dicts.keys(): 159 | track_instances = preds_dicts['track_instances'].clone() 160 | obj_idxes = [track_instances.obj_idxes.clone()] 161 | track_scores = [track_instances.scores.clone()] 162 | if 'all_masks' in preds_dicts.keys(): 163 | all_masks = [preds_dicts['all_masks'].clone()] 164 | else: 165 | all_masks = [None] 166 | 167 | if 'all_motion_forecasting' in preds_dicts.keys() and preds_dicts['all_motion_forecasting'] is not None: 168 | motion_forecasting = preds_dicts['all_motion_forecasting'].clone() 169 | motion_forecasting = [motion_forecasting] 170 | else: 171 | motion_forecasting = [None] 172 | else: 173 | obj_idxes = [None for _ in range(batch_size)] 174 | track_scores = [None for _ in range(batch_size)] 175 | motion_forecasting = [None for _ in range(batch_size)] 176 | all_masks = [None for _ in range(batch_size)] 177 | 178 | predictions_list = [] 179 | for i in range(batch_size): 180 | predictions_list.append(self.decode_single( 181 | all_cls_scores[i], all_bbox_preds[i], obj_idxes[i], track_scores[i], 182 | motion_forecasting[i], all_masks[i])) 183 | return predictions_list -------------------------------------------------------------------------------- /projects/tracking_plugin/core/instances.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2023 toyota research instutute. 3 | # ------------------------------------------------------------------------ 4 | # Modified from MOTR (https://github.com/megvii-model/MOTR/) 5 | # ------------------------------------------------------------------------ 6 | # Modified from Detectron2 (https://github.com/facebookresearch/detectron2) 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 8 | # ------------------------------------------------------------------------ 9 | 10 | import itertools 11 | from typing import Any, Dict, List, Tuple, Union 12 | import torch 13 | 14 | 15 | class Instances: 16 | """ 17 | This class represents a list of instances in an image. 18 | It stores the attributes of instances (e.g., boxes, masks, labels, scores) as "fields". 19 | All fields must have the same ``__len__`` which is the number of instances. 20 | All other (non-field) attributes of this class are considered private: 21 | they must start with '_' and are not modifiable by a user. 22 | Some basic usage: 23 | 1. Set/get/check a field: 24 | .. code-block:: python 25 | instances.gt_boxes = Boxes(...) 26 | print(instances.pred_masks) # a tensor of shape (N, H, W) 27 | print('gt_masks' in instances) 28 | 2. ``len(instances)`` returns the number of instances 29 | 3. Indexing: ``instances[indices]`` will apply the indexing on all the fields 30 | and returns a new :class:`Instances`. 31 | Typically, ``indices`` is a integer vector of indices, 32 | or a binary mask of length ``num_instances`` 33 | .. code-block:: python 34 | category_3_detections = instances[instances.pred_classes == 3] 35 | confident_detections = instances[instances.scores > 0.9] 36 | """ 37 | 38 | def __init__(self, image_size: Tuple[int, int], **kwargs: Any): 39 | """ 40 | Args: 41 | image_size (height, width): the spatial size of the image. 42 | kwargs: fields to add to this `Instances`. 43 | """ 44 | self._image_size = image_size 45 | self._fields: Dict[str, Any] = {} 46 | for k, v in kwargs.items(): 47 | self.set(k, v) 48 | 49 | @property 50 | def image_size(self) -> Tuple[int, int]: 51 | """ 52 | Returns: 53 | tuple: height, width 54 | """ 55 | return self._image_size 56 | 57 | def __setattr__(self, name: str, val: Any) -> None: 58 | if name.startswith("_"): 59 | super().__setattr__(name, val) 60 | else: 61 | self.set(name, val) 62 | 63 | def __getattr__(self, name: str) -> Any: 64 | if name == "_fields" or name not in self._fields: 65 | raise AttributeError("Cannot find field '{}' in the given Instances!".format(name)) 66 | return self._fields[name] 67 | 68 | def set(self, name: str, value: Any) -> None: 69 | """ 70 | Set the field named `name` to `value`. 71 | The length of `value` must be the number of instances, 72 | and must agree with other existing fields in this object. 73 | """ 74 | data_len = len(value) 75 | if len(self._fields): 76 | assert ( 77 | len(self) == data_len 78 | ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self)) 79 | self._fields[name] = value 80 | 81 | def has(self, name: str) -> bool: 82 | """ 83 | Returns: 84 | bool: whether the field called `name` exists. 85 | """ 86 | return name in self._fields 87 | 88 | def remove(self, name: str) -> None: 89 | """ 90 | Remove the field called `name`. 91 | """ 92 | del self._fields[name] 93 | 94 | def get(self, name: str) -> Any: 95 | """ 96 | Returns the field called `name`. 97 | """ 98 | return self._fields[name] 99 | 100 | def get_fields(self) -> Dict[str, Any]: 101 | """ 102 | Returns: 103 | dict: a dict which maps names (str) to data of the fields 104 | Modifying the returned dict will modify this instance. 105 | """ 106 | return self._fields 107 | 108 | # Tensor-like methods 109 | def to(self, *args: Any, **kwargs: Any) -> "Instances": 110 | """ 111 | Returns: 112 | Instances: all fields are called with a `to(device)`, if the field has this method. 113 | """ 114 | ret = Instances(self._image_size) 115 | for k, v in self._fields.items(): 116 | if hasattr(v, "to"): 117 | v = v.to(*args, **kwargs) 118 | ret.set(k, v) 119 | return ret 120 | 121 | def numpy(self): 122 | ret = Instances(self._image_size) 123 | for k, v in self._fields.items(): 124 | if hasattr(v, "numpy"): 125 | v = v.numpy() 126 | ret.set(k, v) 127 | return ret 128 | 129 | def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Instances": 130 | """ 131 | Args: 132 | item: an index-like object and will be used to index all the fields. 133 | Returns: 134 | If `item` is a string, return the data in the corresponding field. 135 | Otherwise, returns an `Instances` where all fields are indexed by `item`. 136 | """ 137 | if type(item) == int: 138 | if item >= len(self) or item < -len(self): 139 | raise IndexError("Instances index out of range!") 140 | else: 141 | item = slice(item, None, len(self)) 142 | 143 | ret = Instances(self._image_size) 144 | for k, v in self._fields.items(): 145 | # print(k, type(item), 'getitem', item.type(), item.dtype) 146 | # if index by torch.BoolTensor 147 | if k == 'kalman_models' and isinstance(item, torch.Tensor): 148 | # print(item.shape, 'in get item') 149 | ret_list = [] 150 | for i, if_true in enumerate(item): 151 | if if_true: 152 | ret_list.append(self.kalman_models[i]) 153 | ret.set(k, ret_list) 154 | 155 | else: 156 | ret.set(k, v[item]) 157 | return ret 158 | 159 | def __len__(self) -> int: 160 | for v in self._fields.values(): 161 | # use __len__ because len() has to be int and is not friendly to tracing 162 | return v.__len__() 163 | raise NotImplementedError("Empty Instances does not support __len__!") 164 | 165 | def __iter__(self): 166 | raise NotImplementedError("`Instances` object is not iterable!") 167 | 168 | @staticmethod 169 | def cat(instance_lists: List["Instances"]) -> "Instances": 170 | """ 171 | Args: 172 | instance_lists (list[Instances]) 173 | Returns: 174 | Instances 175 | """ 176 | assert all(isinstance(i, Instances) for i in instance_lists) 177 | assert len(instance_lists) > 0 178 | if len(instance_lists) == 1: 179 | return instance_lists[0] 180 | 181 | image_size = instance_lists[0].image_size 182 | for i in instance_lists[1:]: 183 | assert i.image_size == image_size 184 | ret = Instances(image_size) 185 | for k in instance_lists[0]._fields.keys(): 186 | values = [i.get(k) for i in instance_lists] 187 | v0 = values[0] 188 | if isinstance(v0, torch.Tensor): 189 | values = torch.cat(values, dim=0) 190 | elif isinstance(v0, list): 191 | values = list(itertools.chain(*values)) 192 | elif hasattr(type(v0), "cat"): 193 | values = type(v0).cat(values) 194 | else: 195 | raise ValueError("Unsupported type {} for concatenation".format(type(v0))) 196 | ret.set(k, values) 197 | return ret 198 | 199 | def clone(self): 200 | ret = Instances(self._image_size) 201 | for k, v in self._fields.items(): 202 | if hasattr(v, 'clone'): 203 | v = v.clone() 204 | ret.set(k, v) 205 | return ret 206 | 207 | def __str__(self) -> str: 208 | s = self.__class__.__name__ + "(" 209 | s += "num_instances={}, ".format(len(self)) 210 | s += "image_height={}, ".format(self._image_size[0]) 211 | s += "image_width={}, ".format(self._image_size[1]) 212 | s += "fields=[{}])".format(", ".join((f"{k}: {v}" for k, v in self._fields.items()))) 213 | return s 214 | 215 | __repr__ = __str__ -------------------------------------------------------------------------------- /projects/tracking_plugin/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .nuscenes_tracking_dataset import NuScenesTrackingDataset 2 | from .nuscenes_forecasting_bbox import NuScenesForecastingBox 3 | from .pipelines import (FormatBundle3DTrack, ScaleMultiViewImage3D, TrackInstanceRangeFilter, TrackObjectNameFilter, TrackLoadAnnotations3D, 4 | TrackPadMultiViewImage, TrackNormalizeMultiviewImage, TrackResizeMultiview3D, TrackResizeCropFlipImage, TrackGlobalRotScaleTransImage) 5 | -------------------------------------------------------------------------------- /projects/tracking_plugin/datasets/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/datasets/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/datasets/__pycache__/nuscenes_forecasting_bbox.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/datasets/__pycache__/nuscenes_forecasting_bbox.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/datasets/__pycache__/nuscenes_tracking_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/datasets/__pycache__/nuscenes_tracking_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/datasets/nuscenes_forecasting_bbox.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2023 toyota research instutute. 3 | # ------------------------------------------------------------------------ 4 | from nuscenes.utils.data_classes import Box as NuScenesBox 5 | from typing import Tuple, List, Dict 6 | from pyquaternion import Quaternion 7 | import numpy as np, copy 8 | 9 | 10 | class NuScenesForecastingBox(NuScenesBox): 11 | def __init__(self, 12 | center: List[float], 13 | size: List[float], 14 | orientation: Quaternion, 15 | label: int = np.nan, 16 | score: float = np.nan, 17 | velocity: Tuple = (np.nan, np.nan, np.nan), 18 | name: str = None, 19 | token: str = None, 20 | forecasting: List[float] = None): 21 | """ 22 | :param center: Center of box given as x, y, z. 23 | :param size: Size of box in width, length, height. 24 | :param orientation: Box orientation. 25 | :param label: Integer label, optional. 26 | :param score: Classification score, optional. 27 | :param velocity: Box velocity in x, y, z direction. 28 | :param name: Box name, optional. Can be used e.g. for denote category name. 29 | :param token: Unique string identifier from DB. 30 | :param: forecasting trajectories 31 | """ 32 | super(NuScenesForecastingBox, self).__init__(center, size, orientation, label, 33 | score, velocity, name, token) 34 | self.forecasting = forecasting 35 | 36 | def rotate(self, quaternion: Quaternion) -> None: 37 | self.center = np.dot(quaternion.rotation_matrix, self.center) 38 | self.orientation = quaternion * self.orientation 39 | self.velocity = np.dot(quaternion.rotation_matrix, self.velocity) 40 | if self.forecasting is not None: 41 | self.forecasting = np.dot(quaternion.rotation_matrix[:2, :2], self.forecasting.T).T 42 | 43 | def copy(self) -> 'NuScenesForecastingBox': 44 | return copy.deepcopy(self) 45 | -------------------------------------------------------------------------------- /projects/tracking_plugin/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline import (FormatBundle3DTrack, ScaleMultiViewImage3D, TrackInstanceRangeFilter, 2 | TrackLoadAnnotations3D, TrackObjectNameFilter, TrackLoadAnnotations3D) 3 | from .track_transform_3d import ( 4 | TrackPadMultiViewImage, TrackNormalizeMultiviewImage, 5 | TrackResizeMultiview3D, 6 | TrackResizeCropFlipImage, 7 | TrackGlobalRotScaleTransImage 8 | ) -------------------------------------------------------------------------------- /projects/tracking_plugin/datasets/pipelines/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/datasets/pipelines/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/datasets/pipelines/__pycache__/pipeline.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/datasets/pipelines/__pycache__/pipeline.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/datasets/pipelines/__pycache__/track_transform_3d.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/datasets/pipelines/__pycache__/track_transform_3d.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .dense_heads import PETRCamTrackingHead, DETR3DCamTrackingHead 2 | from .losses import TrackingLossBase, TrackingLoss 3 | from .trackers import Cam3DTracker 4 | from .utils import PETRTrackingTransformer -------------------------------------------------------------------------------- /projects/tracking_plugin/models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .petr_tracking_head import PETRCamTrackingHead 2 | from .detr3d_tracking_head import DETR3DCamTrackingHead -------------------------------------------------------------------------------- /projects/tracking_plugin/models/dense_heads/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/dense_heads/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/dense_heads/__pycache__/detr3d_tracking_head.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/dense_heads/__pycache__/detr3d_tracking_head.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/dense_heads/__pycache__/petr_tracking_head.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/dense_heads/__pycache__/petr_tracking_head.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .tracking_loss_base import TrackingLossBase 2 | from .tracking_loss import TrackingLoss 3 | from .tracking_loss_prediction import TrackingLossPrediction 4 | from .tracking_loss_mem_bank import TrackingLossMemBank 5 | from .tracking_loss_combo import TrackingLossCombo 6 | -------------------------------------------------------------------------------- /projects/tracking_plugin/models/losses/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/losses/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/losses/__pycache__/tracking_loss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/losses/__pycache__/tracking_loss.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/losses/__pycache__/tracking_loss_base.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/losses/__pycache__/tracking_loss_base.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/losses/__pycache__/tracking_loss_combo.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/losses/__pycache__/tracking_loss_combo.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/losses/__pycache__/tracking_loss_mem_bank.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/losses/__pycache__/tracking_loss_mem_bank.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/losses/__pycache__/tracking_loss_prediction.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/losses/__pycache__/tracking_loss_prediction.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/losses/tracking_loss_combo.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) Toyota Research Institute 3 | # ------------------------------------------------------------------------ 4 | # Modified from PETR (https://github.com/megvii-research/PETR) 5 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 6 | # ------------------------------------------------------------------------ 7 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 8 | # Copyright (c) 2021 Wang, Yue 9 | # ------------------------------------------------------------------------ 10 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 11 | # Copyright (c) OpenMMLab. All rights reserved. 12 | # ------------------------------------------------------------------------ 13 | 14 | import torch 15 | import torch.nn as nn 16 | import torch.nn.functional as F 17 | from mmcv.runner import force_fp32 18 | from mmdet.models import LOSSES 19 | from mmdet.models import build_loss 20 | from mmdet.core import (build_assigner, reduce_mean, multi_apply, build_sampler) 21 | from projects.mmdet3d_plugin.core.bbox.util import normalize_bbox 22 | from .tracking_loss import TrackingLoss 23 | 24 | 25 | @LOSSES.register_module() 26 | class TrackingLossCombo(TrackingLoss): 27 | """ Tracking loss with reference point supervision 28 | """ 29 | def __init__(self, 30 | num_classes, 31 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2], 32 | sync_cls_avg_factor=False, 33 | interm_loss=True, 34 | loss_cls=dict( 35 | type='FocalLoss', 36 | use_sigmoid=True, 37 | gamma=2.0, 38 | alpha=0.25, 39 | loss_weight=2.0), 40 | loss_bbox=dict(type='L1Loss', loss_weight=0.25), 41 | loss_iou=dict(type='GIoULoss', loss_weight=0.0), 42 | loss_prediction=dict(type='L1Loss', loss_weight=1.0), 43 | assigner=dict( 44 | type='HungarianAssigner3D', 45 | cls_cost=dict(type='FocalLossCost', weight=2.0), 46 | reg_cost=dict(type='BBox3DL1Cost', weight=0.25), 47 | iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head. 48 | pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0])): 49 | 50 | super(TrackingLoss, self).__init__( 51 | num_classes, code_weights, sync_cls_avg_factor, interm_loss, 52 | loss_cls, loss_bbox, loss_iou, assigner) 53 | self.loss_traj = build_loss(loss_prediction) 54 | self.loss_mem_cls = build_loss(loss_cls) 55 | # self.loc_refine_code_weights = [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0] 56 | self.loc_refine_code_weights = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2] 57 | 58 | def loss_prediction(self, 59 | frame_idx, 60 | loss_dict, 61 | gt_trajs, 62 | gt_masks, 63 | pred_trajs, 64 | loss_key='for'): 65 | loss_prediction = self.loss_traj( 66 | gt_trajs[..., :2] * gt_masks.unsqueeze(-1), 67 | pred_trajs[..., :2] * gt_masks.unsqueeze(-1)) 68 | loss_dict[f'f{frame_idx}.loss_{loss_key}'] = loss_prediction 69 | return loss_dict 70 | 71 | def loss_mem_bank(self, 72 | frame_idx, 73 | loss_dict, 74 | gt_bboxes_list, 75 | gt_labels_list, 76 | instance_ids, 77 | track_instances): 78 | obj_idxes_list = instance_ids[0].detach().cpu().numpy().tolist() 79 | obj_idx_to_gt_idx = {obj_idx: gt_idx for gt_idx, obj_idx in enumerate(obj_idxes_list)} 80 | device = track_instances.query_feats.device 81 | 82 | # classification loss 83 | matched_labels = torch.ones((len(track_instances), ), dtype=torch.long, device=device) * self.num_classes 84 | matched_label_weights = torch.ones((len(track_instances), ), dtype=torch.float32, device=device) 85 | num_pos, num_neg = 0, 0 86 | for track_idx, id in enumerate(track_instances.obj_idxes): 87 | cpu_id = id.cpu().numpy().tolist() 88 | if cpu_id not in obj_idx_to_gt_idx.keys(): 89 | num_neg += 1 90 | continue 91 | index = obj_idx_to_gt_idx[cpu_id] 92 | matched_labels[track_idx] = gt_labels_list[0][index].long() 93 | num_pos += 1 94 | 95 | labels_list = matched_labels 96 | label_weights_list = matched_label_weights 97 | cls_scores = track_instances.cache_logits 98 | 99 | cls_avg_factor = num_pos * 1.0 + \ 100 | num_neg * self.bg_cls_weight 101 | if self.sync_cls_avg_factor: 102 | cls_avg_factor = reduce_mean( 103 | cls_scores.new_tensor([cls_avg_factor])) 104 | 105 | cls_avg_factor = max(cls_avg_factor, 1) 106 | loss_cls = self.loss_mem_cls( 107 | cls_scores, labels_list, label_weights_list, avg_factor=cls_avg_factor) 108 | loss_cls = torch.nan_to_num(loss_cls) 109 | 110 | loss_dict[f'f{frame_idx}.loss_mem_cls'] = loss_cls 111 | 112 | # location refinement loss 113 | gt_bboxes_list = [torch.cat( 114 | (gt_bboxes.gravity_center, gt_bboxes.tensor[:, 3:]), 115 | dim=1).to(device) for gt_bboxes in gt_bboxes_list] 116 | 117 | pos_bbox_num = 0 118 | matched_bbox_targets = torch.zeros((len(track_instances), gt_bboxes_list[0].shape[1]), dtype=torch.float32, device=device) 119 | matched_bbox_weights = torch.zeros((len(track_instances),len(self.loc_refine_code_weights)), dtype=torch.float32, device=device) 120 | for track_idx, id in enumerate(track_instances.obj_idxes): 121 | cpu_id = id.cpu().numpy().tolist() 122 | if cpu_id not in obj_idx_to_gt_idx.keys(): 123 | matched_bbox_weights[track_idx] = 0.0 124 | continue 125 | index = obj_idx_to_gt_idx[cpu_id] 126 | matched_bbox_targets[track_idx] = gt_bboxes_list[0][index].float() 127 | matched_bbox_weights[track_idx] = 1.0 128 | pos_bbox_num += 1 129 | 130 | normalized_bbox_targets = normalize_bbox(matched_bbox_targets, self.pc_range) 131 | isnotnan = torch.isfinite(normalized_bbox_targets).all(dim=-1) 132 | bbox_weights = matched_bbox_weights * torch.tensor(self.loc_refine_code_weights).to(device) 133 | 134 | loss_bbox = self.loss_bbox( 135 | track_instances.cache_bboxes[isnotnan, :10], normalized_bbox_targets[isnotnan, :10], bbox_weights[isnotnan, :10], avg_factor=pos_bbox_num) 136 | loss_dict[f'f{frame_idx}.loss_mem_bbox'] = loss_bbox 137 | return loss_dict 138 | 139 | @force_fp32(apply_to=('preds_dicts')) 140 | def forward(self, 141 | preds_dicts): 142 | """Loss function for multi-frame tracking 143 | """ 144 | frame_num = len(preds_dicts) 145 | losses_dicts = [p.pop('loss_dict') for p in preds_dicts] 146 | loss_dict = dict() 147 | 148 | for key in losses_dicts[-1].keys(): 149 | # example loss_dict["d2.loss_cls"] = losses_dicts[-1]["f0.d2.loss_cls"] 150 | loss_dict[key[3:]] = losses_dicts[-1][key] 151 | 152 | for frame_loss in losses_dicts[:-1]: 153 | loss_dict.update(frame_loss) 154 | 155 | return loss_dict 156 | 157 | 158 | def nan_to_num(x, nan=0.0, posinf=None, neginf=None): 159 | x[torch.isnan(x)]= nan 160 | if posinf is not None: 161 | x[torch.isposinf(x)] = posinf 162 | if neginf is not None: 163 | x[torch.isneginf(x)] = posinf 164 | return x -------------------------------------------------------------------------------- /projects/tracking_plugin/models/losses/tracking_loss_mem_bank.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) Toyota Research Institute 3 | # ------------------------------------------------------------------------ 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from mmcv.runner import force_fp32 8 | from mmdet.models import LOSSES 9 | from mmdet.models import build_loss 10 | from mmdet.core import (build_assigner, reduce_mean, multi_apply, build_sampler) 11 | from projects.mmdet3d_plugin.core.bbox.util import normalize_bbox 12 | from .tracking_loss import TrackingLoss 13 | 14 | 15 | @LOSSES.register_module() 16 | class TrackingLossMemBank(TrackingLoss): 17 | def __init__(self, 18 | num_classes, 19 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2], 20 | sync_cls_avg_factor=False, 21 | interm_loss=True, 22 | loss_cls=dict( 23 | type='FocalLoss', 24 | use_sigmoid=True, 25 | gamma=2.0, 26 | alpha=0.25, 27 | loss_weight=2.0), 28 | loss_bbox=dict(type='L1Loss', loss_weight=0.25), 29 | loss_iou=dict(type='GIoULoss', loss_weight=0.0), 30 | assigner=dict( 31 | type='HungarianAssigner3D', 32 | cls_cost=dict(type='FocalLossCost', weight=2.0), 33 | reg_cost=dict(type='BBox3DL1Cost', weight=0.25), 34 | iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head. 35 | pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0])): 36 | 37 | super(TrackingLoss, self).__init__( 38 | num_classes, code_weights, sync_cls_avg_factor, interm_loss, 39 | loss_cls, loss_bbox, loss_iou, assigner) 40 | self.loss_mem_cls = build_loss(loss_cls) 41 | self.loc_refine_code_weights = [1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0] 42 | 43 | def loss_mem_bank(self, 44 | frame_idx, 45 | loss_dict, 46 | gt_bboxes_list, 47 | gt_labels_list, 48 | instance_ids, 49 | track_instances): 50 | obj_idxes_list = instance_ids[0].detach().cpu().numpy().tolist() 51 | obj_idx_to_gt_idx = {obj_idx: gt_idx for gt_idx, obj_idx in enumerate(obj_idxes_list)} 52 | device = track_instances.output_embedding.device 53 | 54 | # classification loss 55 | matched_labels = torch.ones((len(track_instances), ), dtype=torch.long, device=device) * self.num_classes 56 | matched_label_weights = torch.ones((len(track_instances), ), dtype=torch.float32, device=device) 57 | num_pos, num_neg = 0, 0 58 | for track_idx, id in enumerate(track_instances.obj_idxes): 59 | cpu_id = id.cpu().numpy().tolist() 60 | if cpu_id not in obj_idx_to_gt_idx.keys(): 61 | num_neg += 1 62 | continue 63 | index = obj_idx_to_gt_idx[cpu_id] 64 | matched_labels[track_idx] = gt_labels_list[0][index].long() 65 | num_pos += 1 66 | 67 | labels_list = matched_labels 68 | label_weights_list = matched_label_weights 69 | cls_scores = track_instances.mem_pred_logits[:, -1, :] 70 | 71 | cls_avg_factor = num_pos * 1.0 + \ 72 | num_neg * self.bg_cls_weight 73 | if self.sync_cls_avg_factor: 74 | cls_avg_factor = reduce_mean( 75 | cls_scores.new_tensor([cls_avg_factor])) 76 | 77 | cls_avg_factor = max(cls_avg_factor, 1) 78 | loss_cls = self.loss_mem_cls( 79 | cls_scores, labels_list, label_weights_list, avg_factor=cls_avg_factor) 80 | loss_cls = torch.nan_to_num(loss_cls) 81 | 82 | loss_dict[f'f{frame_idx}.loss_mem_cls'] = loss_cls 83 | 84 | # location refinement loss 85 | gt_bboxes_list = [torch.cat( 86 | (gt_bboxes.gravity_center, gt_bboxes.tensor[:, 3:]), 87 | dim=1).to(device) for gt_bboxes in gt_bboxes_list] 88 | 89 | pos_bbox_num = 0 90 | matched_bbox_targets = torch.zeros((len(track_instances), gt_bboxes_list[0].shape[1]), dtype=torch.float32, device=device) 91 | matched_bbox_weights = torch.zeros((len(track_instances),len(self.loc_refine_code_weights)), dtype=torch.float32, device=device) 92 | for track_idx, id in enumerate(track_instances.obj_idxes): 93 | cpu_id = id.cpu().numpy().tolist() 94 | if cpu_id not in obj_idx_to_gt_idx.keys(): 95 | matched_bbox_weights[track_idx] = 0.0 96 | continue 97 | index = obj_idx_to_gt_idx[cpu_id] 98 | matched_bbox_targets[track_idx] = gt_bboxes_list[0][index].float() 99 | matched_bbox_weights[track_idx] = 1.0 100 | pos_bbox_num += 1 101 | 102 | normalized_bbox_targets = normalize_bbox(matched_bbox_targets, self.pc_range) 103 | isnotnan = torch.isfinite(normalized_bbox_targets).all(dim=-1) 104 | bbox_weights = matched_bbox_weights * torch.tensor(self.loc_refine_code_weights).to(device) 105 | 106 | loss_bbox = self.loss_bbox( 107 | track_instances.bbox_preds[isnotnan, :10], normalized_bbox_targets[isnotnan, :10], bbox_weights[isnotnan, :10], avg_factor=pos_bbox_num) 108 | loss_dict[f'f{frame_idx}.loss_mem_bbox'] = loss_bbox 109 | return loss_dict 110 | 111 | @force_fp32(apply_to=('preds_dicts')) 112 | def forward(self, 113 | preds_dicts): 114 | """Loss function for multi-frame tracking 115 | """ 116 | frame_num = len(preds_dicts) 117 | losses_dicts = [p.pop('loss_dict') for p in preds_dicts] 118 | loss_dict = dict() 119 | for key in losses_dicts[-1].keys(): 120 | # example loss_dict["d2.loss_cls"] = losses_dicts[-1]["f0.d2.loss_cls"] 121 | loss_dict[key[3:]] = losses_dicts[-1][key] 122 | 123 | for frame_loss in losses_dicts[:-1]: 124 | loss_dict.update(frame_loss) 125 | 126 | return loss_dict -------------------------------------------------------------------------------- /projects/tracking_plugin/models/losses/tracking_loss_prediction.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) Toyota Research Institute 3 | # ------------------------------------------------------------------------ 4 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 5 | # Copyright (c) OpenMMLab. All rights reserved. 6 | # ------------------------------------------------------------------------ 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | from mmcv.runner import force_fp32 12 | from mmdet.models import LOSSES 13 | from mmdet.models import build_loss 14 | from mmdet.core import (build_assigner, reduce_mean, multi_apply, build_sampler) 15 | from projects.mmdet3d_plugin.core.bbox.util import normalize_bbox 16 | from .tracking_loss import TrackingLoss 17 | 18 | 19 | @LOSSES.register_module() 20 | class TrackingLossPrediction(TrackingLoss): 21 | """ Tracking loss with reference point supervision 22 | """ 23 | def __init__(self, 24 | num_classes, 25 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2], 26 | sync_cls_avg_factor=False, 27 | interm_loss=True, 28 | loss_cls=dict( 29 | type='FocalLoss', 30 | use_sigmoid=True, 31 | gamma=2.0, 32 | alpha=0.25, 33 | loss_weight=2.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=0.25), 35 | loss_iou=dict(type='GIoULoss', loss_weight=0.0), 36 | loss_prediction=dict(type='L1Loss', loss_weight=1.0), 37 | assigner=dict( 38 | type='HungarianAssigner3D', 39 | cls_cost=dict(type='FocalLossCost', weight=2.0), 40 | reg_cost=dict(type='BBox3DL1Cost', weight=0.25), 41 | iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head. 42 | pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0])): 43 | 44 | super(TrackingLoss, self).__init__( 45 | num_classes, code_weights, sync_cls_avg_factor, interm_loss, 46 | loss_cls, loss_bbox, loss_iou, assigner) 47 | self.loss_traj = build_loss(loss_prediction) 48 | 49 | def loss_prediction(self, 50 | frame_idx, 51 | loss_dict, 52 | gt_trajs, 53 | gt_masks, 54 | pred_trajs, 55 | loss_key='for'): 56 | loss_prediction = self.loss_traj( 57 | gt_trajs[..., :2] * gt_masks.unsqueeze(-1), 58 | pred_trajs[..., :2] * gt_masks.unsqueeze(-1)) 59 | loss_dict[f'f{frame_idx}.loss_{loss_key}'] = loss_prediction 60 | return loss_dict 61 | 62 | @force_fp32(apply_to=('preds_dicts')) 63 | def forward(self, 64 | preds_dicts): 65 | """Loss function for multi-frame tracking 66 | """ 67 | frame_num = len(preds_dicts) 68 | losses_dicts = [p.pop('loss_dict') for p in preds_dicts] 69 | loss_dict = dict() 70 | 71 | for key in losses_dicts[-1].keys(): 72 | # example loss_dict["d2.loss_cls"] = losses_dicts[-1]["f0.d2.loss_cls"] 73 | loss_dict[key[3:]] = losses_dicts[-1][key] 74 | 75 | for frame_loss in losses_dicts[:-1]: 76 | loss_dict.update(frame_loss) 77 | 78 | return loss_dict 79 | 80 | 81 | def nan_to_num(x, nan=0.0, posinf=None, neginf=None): 82 | x[torch.isnan(x)]= nan 83 | if posinf is not None: 84 | x[torch.isposinf(x)] = posinf 85 | if neginf is not None: 86 | x[torch.isneginf(x)] = posinf 87 | return x -------------------------------------------------------------------------------- /projects/tracking_plugin/models/trackers/__init__.py: -------------------------------------------------------------------------------- 1 | from .tracker import Cam3DTracker -------------------------------------------------------------------------------- /projects/tracking_plugin/models/trackers/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/trackers/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/trackers/__pycache__/runtime_tracker.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/trackers/__pycache__/runtime_tracker.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/trackers/__pycache__/spatial_temporal_reason.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/trackers/__pycache__/spatial_temporal_reason.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/trackers/__pycache__/tracker.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/trackers/__pycache__/tracker.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/trackers/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/trackers/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/trackers/runtime_tracker.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2023 toyota research instutute. 3 | # ------------------------------------------------------------------------ 4 | from projects.tracking_plugin.core.instances import Instances 5 | import torch 6 | import numpy as np 7 | 8 | 9 | class RunTimeTracker: 10 | def __init__(self, output_threshold=0.2, score_threshold=0.4, record_threshold=0.4, 11 | max_age_since_update=1,): 12 | self.current_id = 1 13 | self.current_seq = 0 14 | self.timestamp = None 15 | self.time_delta = None 16 | self.query_embeddings = None 17 | self.reference_points = None 18 | self.frame_index = 0 19 | 20 | self.track_instances = None 21 | self.timestamp = None 22 | self.first_frame = None 23 | 24 | self.threshold = score_threshold 25 | self.output_threshold = output_threshold 26 | self.record_threshold = record_threshold 27 | self.max_age_since_update = max_age_since_update 28 | 29 | def update_active_tracks(self, track_instances, active_mask): 30 | # first frame 31 | if self.track_instances is None: 32 | self.track_instances = track_instances[active_mask] 33 | return 34 | 35 | live_mask = torch.zeros_like(track_instances.obj_idxes).bool().detach() 36 | for i in range(len(track_instances)): 37 | if active_mask[i]: 38 | track_instances.disappear_time[i] = 0 39 | live_mask[i] = True 40 | elif track_instances.track_query_mask[i]: 41 | track_instances.disappear_time[i] += 1 42 | if track_instances.disappear_time[i] < self.max_age_since_update: 43 | live_mask[i] = True 44 | self.track_instances = track_instances[live_mask] 45 | return 46 | 47 | def get_active_mask(self, track_instances, training=True): 48 | if training: 49 | active_mask = (track_instances.matched_gt_idxes >= 0) 50 | return active_mask 51 | 52 | def empty(self): 53 | """Copy the historical buffer parts from the init 54 | """ 55 | self.current_id = 1 56 | self.current_seq = 0 57 | self.timestamp = None 58 | self.query_embeddings = None 59 | self.reference_points = None 60 | self.frame_index = 0 61 | 62 | self.track_instances = None 63 | self.timestamp = None 64 | self.first_frame = None 65 | -------------------------------------------------------------------------------- /projects/tracking_plugin/models/trackers/utils.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2023 toyota research instutute. 3 | # ------------------------------------------------------------------------ 4 | import math, torch, numpy as np 5 | 6 | 7 | def ts2tsemb1d(ts, num_pos_feats=128, temperature=10000): 8 | scale = 2 * math.pi 9 | ts = ts * scale 10 | dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=ts.device) 11 | dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats) 12 | pos = ts[..., 0, None] / dim_t 13 | posemb = torch.stack((pos[..., 0::2].sin(), pos[..., 1::2].cos()), dim=-1).flatten(-2) 14 | return posemb 15 | 16 | 17 | def time_position_embedding(track_num, frame_num, embed_dims, device): 18 | ts = torch.arange(0, 1 + 1e-5, 1/(frame_num - 1), dtype=torch.float32, device=device) 19 | ts = ts[None, :] * torch.ones((track_num, frame_num), dtype=torch.float32, device=device) 20 | ts_embed = ts2tsemb1d(ts.view(track_num * frame_num, 1), num_pos_feats=embed_dims).view(track_num, frame_num, embed_dims) 21 | return ts_embed 22 | 23 | 24 | def xyz_ego_transformation(xyz, l2g0, l2g1, pc_range, 25 | src_normalized=True, tgt_normalized=True): 26 | """Transform xyz coordinates from l2g0 to l2g1 27 | xyz has to be denormalized 28 | """ 29 | # denormalized to the physical coordinates 30 | if src_normalized: 31 | xyz = denormalize(xyz, pc_range) 32 | 33 | # to global, then to next local 34 | if torch.__version__ < '1.9.0': 35 | g2l1 = torch.tensor(np.linalg.inv(l2g1.cpu().numpy())).type(torch.float).to(l2g1.device) 36 | else: 37 | g2l1 = torch.linalg.inv(l2g1).type(torch.float) 38 | xyz = xyz @ l2g0[:3, :3].T + l2g0[:3, 3] - l2g1[:3, 3] 39 | xyz = xyz @ g2l1[:3, :3].T 40 | 41 | # normalize to 0-1 42 | if tgt_normalized: 43 | xyz = normalize(xyz, pc_range) 44 | return xyz 45 | 46 | 47 | def normalize(xyz, pc_range): 48 | xyz[..., 0:1] = (xyz[..., 0:1] - pc_range[0]) / (pc_range[3] - pc_range[0]) 49 | xyz[..., 1:2] = (xyz[..., 1:2] - pc_range[1]) / (pc_range[4] - pc_range[1]) 50 | xyz[..., 2:3] = (xyz[..., 2:3] - pc_range[2]) / (pc_range[5] - pc_range[2]) 51 | return xyz 52 | 53 | def denormalize(xyz, pc_range): 54 | xyz[..., 0:1] = xyz[..., 0:1] * (pc_range[3] - pc_range[0]) + pc_range[0] 55 | xyz[..., 1:2] = xyz[..., 1:2] * (pc_range[4] - pc_range[1]) + pc_range[1] 56 | xyz[..., 2:3] = xyz[..., 2:3] * (pc_range[5] - pc_range[2]) + pc_range[2] 57 | return xyz 58 | -------------------------------------------------------------------------------- /projects/tracking_plugin/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .petr_tracker_transformer import PETRTrackingTransformer 2 | from .detr3d_tracking_transformer import Detr3DTrackingTransformer 3 | from .temporal_transformer import TemporalTransformer -------------------------------------------------------------------------------- /projects/tracking_plugin/models/utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/utils/__pycache__/detr3d_tracking_transformer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/utils/__pycache__/detr3d_tracking_transformer.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/utils/__pycache__/petr_tracker_transformer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/utils/__pycache__/petr_tracker_transformer.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/utils/__pycache__/temporal_transformer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/models/utils/__pycache__/temporal_transformer.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/models/utils/detr3d_tracking_transformer.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2023 toyota research instutute. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | import numpy as np 8 | import warnings 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | from mmcv.cnn import xavier_init, constant_init 13 | from mmcv.cnn.bricks.registry import (ATTENTION, 14 | TRANSFORMER_LAYER_SEQUENCE) 15 | from mmcv.cnn.bricks.transformer import (MultiScaleDeformableAttention, 16 | TransformerLayerSequence, 17 | build_transformer_layer_sequence) 18 | from mmcv.runner.base_module import BaseModule 19 | from mmdet.models.utils.builder import TRANSFORMER 20 | from projects.mmdet3d_plugin.models.utils.detr3d_transformer import Detr3DCrossAtten 21 | 22 | 23 | def inverse_sigmoid(x, eps=1e-5): 24 | """Inverse function of sigmoid. 25 | Args: 26 | x (Tensor): The tensor to do the 27 | inverse. 28 | eps (float): EPS avoid numerical 29 | overflow. Defaults 1e-5. 30 | Returns: 31 | Tensor: The x has passed the inverse 32 | function of sigmoid, has same 33 | shape with input. 34 | """ 35 | x = x.clamp(min=0, max=1) 36 | x1 = x.clamp(min=eps) 37 | x2 = (1 - x).clamp(min=eps) 38 | return torch.log(x1 / x2) 39 | 40 | 41 | @TRANSFORMER.register_module() 42 | class Detr3DTrackingTransformer(BaseModule): 43 | """Implements the Detr3D transformer. 44 | Args: 45 | as_two_stage (bool): Generate query from encoder features. 46 | Default: False. 47 | num_feature_levels (int): Number of feature maps from FPN: 48 | Default: 4. 49 | two_stage_num_proposals (int): Number of proposals when set 50 | `as_two_stage` as True. Default: 300. 51 | """ 52 | 53 | def __init__(self, 54 | num_feature_levels=4, 55 | num_cams=6, 56 | two_stage_num_proposals=300, 57 | decoder=None, 58 | **kwargs): 59 | super(Detr3DTrackingTransformer, self).__init__(**kwargs) 60 | self.decoder = build_transformer_layer_sequence(decoder) 61 | self.embed_dims = self.decoder.embed_dims 62 | self.num_feature_levels = num_feature_levels 63 | self.num_cams = num_cams 64 | self.two_stage_num_proposals = two_stage_num_proposals 65 | self.init_layers() 66 | 67 | def init_layers(self): 68 | """Initialize layers of the Detr3DTransformer.""" 69 | return 70 | 71 | def init_weights(self): 72 | """Initialize the transformer weights.""" 73 | for p in self.parameters(): 74 | if p.dim() > 1: 75 | nn.init.xavier_uniform_(p) 76 | for m in self.modules(): 77 | if isinstance(m, MultiScaleDeformableAttention) or isinstance(m, Detr3DCrossAtten): 78 | m.init_weight() 79 | return 80 | 81 | def forward(self, 82 | mlvl_feats, 83 | query_targets, 84 | query_embeds, 85 | reference_points, 86 | reg_branches=None, 87 | **kwargs): 88 | """Forward function for `Detr3DTransformer`. 89 | Args: 90 | mlvl_feats (list(Tensor)): Input queries from 91 | different level. Each element has shape 92 | [bs, embed_dims, h, w]. 93 | query_embed (Tensor): The query embedding for decoder, 94 | with shape [num_query, c]. 95 | mlvl_pos_embeds (list(Tensor)): The positional encoding 96 | of feats from different level, has the shape 97 | [bs, embed_dims, h, w]. 98 | reg_branches (obj:`nn.ModuleList`): Regression heads for 99 | feature maps from each decoder layer. Only would 100 | be passed when 101 | `with_box_refine` is True. Default to None. 102 | Returns: 103 | tuple[Tensor]: results of decoder containing the following tensor. 104 | - inter_states: Outputs from decoder. If 105 | return_intermediate_dec is True output has shape \ 106 | (num_dec_layers, bs, num_query, embed_dims), else has \ 107 | shape (1, bs, num_query, embed_dims). 108 | - init_reference_out: The initial value of reference \ 109 | points, has shape (bs, num_queries, 4). 110 | - inter_references_out: The internal value of reference \ 111 | points in decoder, has shape \ 112 | (num_dec_layers, bs,num_query, embed_dims) 113 | - enc_outputs_class: The classification score of \ 114 | proposals generated from \ 115 | encoder's feature maps, has shape \ 116 | (batch, h*w, num_classes). \ 117 | Only would be returned when `as_two_stage` is True, \ 118 | otherwise None. 119 | - enc_outputs_coord_unact: The regression results \ 120 | generated from encoder's feature maps., has shape \ 121 | (batch, h*w, 4). Only would \ 122 | be returned when `as_two_stage` is True, \ 123 | otherwise None. 124 | """ 125 | bs = mlvl_feats[0].size(0) 126 | query_embeds = query_embeds.clone().unsqueeze(0).expand(bs, -1, -1) 127 | query = query_targets.clone().unsqueeze(0).expand(bs, -1, -1) 128 | init_reference_out = reference_points 129 | 130 | # decoder 131 | query = query.permute(1, 0, 2) 132 | query_embeds = query_embeds.permute(1, 0, 2) 133 | inter_states, inter_references = self.decoder( 134 | query=query, 135 | key=None, 136 | value=mlvl_feats, 137 | query_pos=query_embeds, 138 | reference_points=reference_points, 139 | reg_branches=reg_branches, 140 | **kwargs) 141 | 142 | inter_references_out = inter_references 143 | return inter_states, init_reference_out, inter_references_out -------------------------------------------------------------------------------- /projects/tracking_plugin/models/utils/petr_tracker_transformer.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 toyota research institute 3 | # ------------------------------------------------------------------------ 4 | # Modified from PETR (https://github.com/megvii-research/PETR) 5 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 6 | # ------------------------------------------------------------------------ 7 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 8 | # Copyright (c) 2021 Wang, Yue 9 | # ------------------------------------------------------------------------ 10 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 11 | # Copyright (c) OpenMMLab. All rights reserved. 12 | # ------------------------------------------------------------------------ 13 | import math 14 | import warnings 15 | from typing import Sequence 16 | 17 | import torch 18 | import torch.nn as nn 19 | import torch.nn.functional as F 20 | from mmcv.cnn.bricks.transformer import (BaseTransformerLayer, 21 | TransformerLayerSequence, 22 | build_transformer_layer_sequence) 23 | from mmcv.cnn.bricks.drop import build_dropout 24 | from mmdet.models.utils.builder import TRANSFORMER 25 | from mmcv.cnn import (build_activation_layer, build_conv_layer, 26 | build_norm_layer, xavier_init) 27 | from mmcv.runner.base_module import BaseModule 28 | from mmcv.cnn.bricks.registry import (ATTENTION,TRANSFORMER_LAYER, 29 | TRANSFORMER_LAYER_SEQUENCE) 30 | from mmcv.utils import (ConfigDict, build_from_cfg, deprecated_api_warning, 31 | to_2tuple) 32 | import copy 33 | import torch.utils.checkpoint as cp 34 | 35 | 36 | @TRANSFORMER.register_module() 37 | class PETRTrackingTransformer(BaseModule): 38 | """Implements the DETR transformer. 39 | Adding a field of target to denote the query features. 40 | If target is None, the behavior is identical to PETRTransformer. 41 | """ 42 | def __init__(self, encoder=None, decoder=None, init_cfg=None, cross=False): 43 | super(PETRTrackingTransformer, self).__init__(init_cfg=init_cfg) 44 | if encoder is not None: 45 | self.encoder = build_transformer_layer_sequence(encoder) 46 | else: 47 | self.encoder = None 48 | self.decoder = build_transformer_layer_sequence(decoder) 49 | self.embed_dims = self.decoder.embed_dims 50 | self.cross = cross 51 | 52 | def init_weights(self): 53 | # follow the official DETR to init parameters 54 | for m in self.modules(): 55 | if hasattr(m, 'weight') and m.weight.dim() > 1: 56 | xavier_init(m, distribution='uniform') 57 | self._is_init = True 58 | 59 | def forward(self, target, x, mask, query_embed, pos_embed, reg_branch=None): 60 | bs, n, c, h, w = x.shape 61 | memory = x.permute(1, 3, 4, 0, 2).reshape(-1, bs, c) # [bs, n, c, h, w] -> [n*h*w, bs, c] 62 | pos_embed = pos_embed.permute(1, 3, 4, 0, 2).reshape(-1, bs, c) # [bs, n, c, h, w] -> [n*h*w, bs, c] 63 | query_embed = query_embed.unsqueeze(1).repeat( 64 | 1, bs, 1) # [num_query, dim] -> [num_query, bs, dim] 65 | target = target.unsqueeze(1).repeat( 66 | 1, bs, 1) # [num_query, dim] -> [num_query, bs, dim] 67 | mask = mask.view(bs, -1) # [bs, n, h, w] -> [bs, n*h*w] 68 | 69 | # out_dec: [num_layers, num_query, bs, dim] 70 | out_dec = self.decoder( 71 | query=target, 72 | key=memory, 73 | value=memory, 74 | key_pos=pos_embed, 75 | query_pos=query_embed, 76 | key_padding_mask=mask, 77 | reg_branch=reg_branch, 78 | ) 79 | out_dec = out_dec.transpose(1, 2) 80 | memory = memory.reshape(n, h, w, bs, c).permute(3, 0, 4, 1, 2) 81 | return out_dec, memory 82 | -------------------------------------------------------------------------------- /projects/tracking_plugin/models/utils/temporal_transformer.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 Toyota Research Institute 3 | # ------------------------------------------------------------------------ 4 | import math 5 | import warnings 6 | from typing import Sequence 7 | import pickle 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | from mmcv.cnn.bricks.transformer import (BaseTransformerLayer, 12 | TransformerLayerSequence, 13 | build_transformer_layer_sequence) 14 | from mmcv.cnn.bricks.drop import build_dropout 15 | from mmdet.models.utils.builder import TRANSFORMER 16 | from mmcv.cnn import (build_activation_layer, build_conv_layer, 17 | build_norm_layer, xavier_init) 18 | from mmcv.runner.base_module import BaseModule 19 | from mmcv.cnn.bricks.registry import (ATTENTION,TRANSFORMER_LAYER, 20 | TRANSFORMER_LAYER_SEQUENCE) 21 | from mmcv.utils import (ConfigDict, build_from_cfg, deprecated_api_warning, 22 | to_2tuple) 23 | 24 | 25 | @TRANSFORMER.register_module() 26 | class TemporalTransformer(BaseModule): 27 | """Implement a DETR transformer. 28 | Adapting the input and output to the motion reasoning purpose. 29 | """ 30 | def __init__(self, encoder=None, decoder=None, init_cfg=None, cross=False): 31 | super(TemporalTransformer, self).__init__(init_cfg=init_cfg) 32 | if encoder is not None: 33 | self.encoder = build_transformer_layer_sequence(encoder) 34 | else: 35 | self.encoder = None 36 | self.decoder = build_transformer_layer_sequence(decoder) 37 | self.embed_dims = self.decoder.embed_dims 38 | self.cross = cross 39 | 40 | def init_weights(self): 41 | for m in self.modules(): 42 | if hasattr(m, 'weight') and m.weight.dim() > 1: 43 | xavier_init(m, distribution='uniform') 44 | self._is_init = True 45 | return 46 | 47 | def forward(self, target, x, query_embed, pos_embed, query_key_padding_mask=None, key_padding_mask=None): 48 | """ The general transformer interface for temporal/spatial cross attention 49 | Args: 50 | target: query feature [num_query, len, dim] 51 | x: key/value features [num_query, len, dim] 52 | query_embed: query positional embedding [num_query, len, dim] 53 | pos_embed: key positional embedding [num_query, len, dim] 54 | """ 55 | # suit the shape for transformer 56 | bs = 1 57 | memory = x.transpose(0, 1) 58 | pos_embed = pos_embed.transpose(0, 1) 59 | query_embed = query_embed.transpose(0, 1) # [num_query, dim] -> [num_query, bs, dim] 60 | target = target.transpose(0, 1) # [num_query, dim] -> [num_query, bs, dim] 61 | 62 | # if query_key_padding_mask is not None: 63 | # query_key_padding_mask = query_key_padding_mask.transpose(0, 1) 64 | 65 | # if key_padding_mask is not None: 66 | # key_padding_mask = key_padding_mask.transpose(0, 1) 67 | 68 | # out_dec: [num_layers, num_query, bs, dim] 69 | out_dec = self.decoder( 70 | query=target, 71 | key=memory, 72 | value=memory, 73 | key_pos=pos_embed, 74 | query_pos=query_embed, 75 | query_key_padding_mask=query_key_padding_mask, 76 | key_padding_mask=key_padding_mask)[-1] 77 | out_dec = out_dec.transpose(0, 1) 78 | return out_dec -------------------------------------------------------------------------------- /projects/tracking_plugin/test_track_api.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) toyota research institute 3 | # ------------------------------------------------------------------------ 4 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 5 | # Copyright (c) OpenMMLab. All rights reserved. 6 | # ------------------------------------------------------------------------ 7 | import mmcv 8 | import torch 9 | from mmcv.image import tensor2imgs 10 | from os import path as osp 11 | 12 | 13 | def single_gpu_test_tracking(model, 14 | data_loader, 15 | show=False, 16 | out_dir=None, 17 | show_score_thr=0.3): 18 | """Test tracking model with single gpu. 19 | 20 | This method tests model with single gpu and gives the 'show' option. 21 | By setting ``show=True``, it saves the visualization results under 22 | ``out_dir``. 23 | 24 | Args: 25 | model (nn.Module): Model to be tested. 26 | data_loader (nn.Dataloader): Pytorch data loader. 27 | show (bool): Whether to save viualization results. 28 | Default: True. 29 | out_dir (str): The path to save visualization results. 30 | Default: None. 31 | 32 | Returns: 33 | list[dict]: The prediction results. 34 | """ 35 | model.eval() 36 | results = [] 37 | dataset = data_loader.dataset 38 | 39 | prog_bar = mmcv.ProgressBar(len(dataset)) 40 | for i, data in enumerate(data_loader): 41 | with torch.no_grad(): 42 | result = model(return_loss=False, rescale=True, tracking=True, **data) 43 | results.extend(result) 44 | 45 | batch_size = len(result) 46 | for _ in range(batch_size): 47 | prog_bar.update() 48 | return results -------------------------------------------------------------------------------- /projects/tracking_plugin/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | from .visualizer import Visualizer2D 2 | from .functions import NuscenesTrackingBox -------------------------------------------------------------------------------- /projects/tracking_plugin/visualization/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/visualization/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/visualization/__pycache__/functions.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/visualization/__pycache__/functions.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/visualization/__pycache__/visualizer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/projects/tracking_plugin/visualization/__pycache__/visualizer.cpython-38.pyc -------------------------------------------------------------------------------- /projects/tracking_plugin/visualization/functions.py: -------------------------------------------------------------------------------- 1 | 2 | import matplotlib.pyplot as plt 3 | from PIL import Image 4 | import numpy as np 5 | from nuscenes.eval.common.data_classes import EvalBoxes, EvalBox 6 | from nuscenes.utils.data_classes import Box 7 | from nuscenes.utils.geometry_utils import view_points 8 | import copy 9 | from matplotlib.axes import Axes 10 | from typing import Tuple, List, Iterable 11 | 12 | 13 | def map_point_cloud_to_image(pc, image, ego_pose, cam_pose, cam_intrinsics, min_dist=1.0): 14 | """ map a global coordinate point cloud to image 15 | Args: 16 | pc (numpy.ndarray [N * 3]) 17 | """ 18 | point_cloud = copy.deepcopy(pc) 19 | 20 | # transform point cloud to the ego 21 | point_cloud -= ego_pose[:3, 3] 22 | point_cloud = point_cloud @ ego_pose[:3, :3] 23 | 24 | # transform from ego to camera 25 | point_cloud -= cam_pose[:3, 3] 26 | point_cloud = point_cloud @ cam_pose[:3, :3] 27 | 28 | # project points to images 29 | # step 1. Depth and colors 30 | depths = point_cloud[:, 2] 31 | intensities = point_cloud[:, 2] 32 | intensities = (intensities - np.min(intensities)) / (np.max(intensities) - np.min(intensities)) 33 | intensities = intensities ** 0.1 34 | intensities = np.maximum(0, intensities - 0.5) 35 | coloring = intensities 36 | 37 | # step 2. Project onto images with intrinsics 38 | points = point_cloud.T 39 | points = view_points(points[:3, :], cam_intrinsics, normalize=True).T 40 | 41 | # step 3. Remove the points that are outside/behind the camera 42 | mask = np.ones(depths.shape[0], dtype=bool) 43 | mask = np.logical_and(mask, depths > min_dist) 44 | mask = np.logical_and(mask, points[:, 0] > 1) 45 | mask = np.logical_and(mask, points[:, 0] < image.size[0] - 1) 46 | mask = np.logical_and(mask, points[:, 1] > 1) 47 | mask = np.logical_and(mask, points[:, 1] < image.size[1] - 1) 48 | points = points[mask, :] 49 | coloring = coloring[mask] 50 | 51 | return points, coloring 52 | 53 | 54 | class NuscenesTrackingBox(Box): 55 | """ Data class used during tracking evaluation. Can be a prediction or ground truth.""" 56 | 57 | def __init__(self, 58 | sample_token: str = "", 59 | translation: Tuple[float, float, float] = (0, 0, 0), 60 | size: Tuple[float, float, float] = (0, 0, 0), 61 | rotation: Tuple[float, float, float, float] = (0, 0, 0, 0), 62 | velocity: Tuple[float, float] = (0, 0), 63 | ego_translation: Tuple[float, float, float] = (0, 0, 0), # Translation to ego vehicle in meters. 64 | num_pts: int = -1, # Nbr. LIDAR or RADAR inside the box. Only for gt boxes. 65 | tracking_id: str = '', # Instance id of this object. 66 | tracking_name: str = '', # The class name used in the tracking challenge. 67 | tracking_score: float = -1.0): # Does not apply to GT. 68 | 69 | super().__init__(translation, size, rotation, np.nan, tracking_score, name=tracking_id) 70 | 71 | assert tracking_name is not None, 'Error: tracking_name cannot be empty!' 72 | 73 | assert type(tracking_score) == float, 'Error: tracking_score must be a float!' 74 | assert not np.any(np.isnan(tracking_score)), 'Error: tracking_score may not be NaN!' 75 | 76 | # Assign. 77 | self.tracking_id = tracking_id 78 | self.tracking_name = tracking_name 79 | self.tracking_score = tracking_score 80 | 81 | def __eq__(self, other): 82 | return (self.sample_token == other.sample_token and 83 | self.translation == other.translation and 84 | self.size == other.size and 85 | self.rotation == other.rotation and 86 | self.velocity == other.velocity and 87 | self.ego_translation == other.ego_translation and 88 | self.num_pts == other.num_pts and 89 | self.tracking_id == other.tracking_id and 90 | self.tracking_name == other.tracking_name and 91 | self.tracking_score == other.tracking_score) 92 | 93 | def serialize(self) -> dict: 94 | """ Serialize instance into json-friendly format. """ 95 | return { 96 | 'sample_token': self.sample_token, 97 | 'translation': self.translation, 98 | 'size': self.size, 99 | 'rotation': self.rotation, 100 | 'velocity': self.velocity, 101 | 'ego_translation': self.ego_translation, 102 | 'num_pts': self.num_pts, 103 | 'tracking_id': self.tracking_id, 104 | 'tracking_name': self.tracking_name, 105 | 'tracking_score': self.tracking_score 106 | } 107 | 108 | @classmethod 109 | def deserialize(cls, content: dict): 110 | """ Initialize from serialized content. """ 111 | return cls(sample_token=content['sample_token'], 112 | translation=tuple(content['translation']), 113 | size=tuple(content['size']), 114 | rotation=tuple(content['rotation']), 115 | velocity=tuple(content['velocity']), 116 | ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content 117 | else tuple(content['ego_translation']), 118 | num_pts=-1 if 'num_pts' not in content else int(content['num_pts']), 119 | tracking_id=content['tracking_id'], 120 | tracking_name=content['tracking_name'], 121 | tracking_score=-1.0 if 'tracking_score' not in content else float(content['tracking_score'])) 122 | 123 | def render(self, 124 | axis: Axes, 125 | view: np.ndarray = np.eye(3), 126 | normalize: bool = False, 127 | colors: Tuple = ('b', 'r', 'k'), 128 | linestyle: str = 'solid', 129 | linewidth: float = 2, 130 | text=True) -> None: 131 | """ 132 | Renders the box in the provided Matplotlib axis. 133 | :param axis: Axis onto which the box should be drawn. 134 | :param view: . Define a projection in needed (e.g. for drawing projection in an image). 135 | :param normalize: Whether to normalize the remaining coordinate. 136 | :param colors: (: 3). Valid Matplotlib colors ( or normalized RGB tuple) for front, 137 | back and sides. 138 | :param linewidth: Width in pixel of the box sides. 139 | """ 140 | corners = view_points(self.corners(), view, normalize=normalize)[:2, :] 141 | 142 | def draw_rect(selected_corners, color): 143 | prev = selected_corners[-1] 144 | for corner in selected_corners: 145 | axis.plot([prev[0], corner[0]], [prev[1], corner[1]], color=color, linewidth=linewidth, linestyle=linestyle) 146 | prev = corner 147 | 148 | # Draw the sides 149 | for i in range(4): 150 | axis.plot([corners.T[i][0], corners.T[i + 4][0]], 151 | [corners.T[i][1], corners.T[i + 4][1]], 152 | color=colors, linewidth=linewidth, linestyle=linestyle) 153 | 154 | # Draw front (first 4 corners) and rear (last 4 corners) rectangles(3d)/lines(2d) 155 | draw_rect(corners.T[:4], colors) 156 | draw_rect(corners.T[4:], colors) 157 | 158 | # Draw line indicating the front 159 | center_bottom_forward = np.mean(corners.T[2:4], axis=0) 160 | center_bottom = np.mean(corners.T[[2, 3, 7, 6]], axis=0) 161 | axis.plot([center_bottom[0], center_bottom_forward[0]], 162 | [center_bottom[1], center_bottom_forward[1]], 163 | color=colors, linewidth=linewidth, linestyle=linestyle) 164 | corner_index = np.random.randint(0, 8, 1) 165 | if text: 166 | axis.text(corners[0, corner_index] - 1, corners[1, corner_index] - 1, self.tracking_id, color=colors, fontsize=8) 167 | -------------------------------------------------------------------------------- /projects/tracking_plugin/visualization/visualizer.py: -------------------------------------------------------------------------------- 1 | # Modified from SimpleTrack (https://github.com/tusen-ai/SimpleTrack) 2 | from threading import local 3 | import matplotlib.pyplot as plt, numpy as np 4 | from . import functions 5 | from nuscenes.utils.data_classes import Box 6 | from pyquaternion import Quaternion 7 | from nuscenes.utils.geometry_utils import view_points 8 | import copy 9 | 10 | 11 | class Visualizer2D: 12 | def __init__(self, name='', figsize=(8, 8)): 13 | self.figure = plt.figure(name, figsize=figsize) 14 | plt.axis('equal') 15 | self.COLOR_MAP = { 16 | 'gray': np.array([140, 140, 136]) / 256, 17 | 'light_blue': np.array([4, 157, 217]) / 256, 18 | 'red': np.array([191, 4, 54]) / 256, 19 | 'black': np.array([0, 0, 0]) / 256, 20 | 'purple': np.array([224, 133, 250]) / 256, 21 | 'dark_green': np.array([32, 64, 40]) / 256, 22 | 'green': np.array([77, 115, 67]) / 256 23 | } 24 | 25 | def show(self): 26 | plt.show() 27 | 28 | def close(self): 29 | plt.close() 30 | 31 | def save(self, path): 32 | plt.savefig(path) 33 | 34 | def handler_pc(self, pc, color='gray'): 35 | vis_pc = np.asarray(pc) 36 | plt.scatter(vis_pc[:, 0], vis_pc[:, 1], marker='o', color=self.COLOR_MAP[color], s=0.01) 37 | 38 | def handle_project_pc(self, pc, image, ego_pose, cam_pose, cam_intrinsics): 39 | points, coloring = functions.map_point_cloud_to_image( 40 | pc, image, ego_pose, cam_pose, cam_intrinsics) 41 | plt.scatter(points[:, 0], points[:, 1], c=coloring, s=0.1) 42 | 43 | def handle_image(self, image): 44 | plt.imshow(image) 45 | 46 | def handle_bbox(self, bbox: Box, message: str='', color='red', linestyle='solid'): 47 | """bbox bev visualization 48 | """ 49 | corners = bbox.bottom_corners().T 50 | corners = np.concatenate([corners, corners[0:1, :2]]) 51 | plt.plot(corners[:, 0], corners[:, 1], color=self.COLOR_MAP[color], linestyle=linestyle) 52 | corner_index = np.random.randint(0, 4, 1) 53 | plt.text(corners[corner_index, 0] - 1, corners[corner_index, 1] - 1, message, color=self.COLOR_MAP[color]) 54 | 55 | def handle_project_bbox(self, bbox: Box, image, ego_pose, cam_pose, cam_intrinsics, 56 | message: str='', color='red', linestyle='solid'): 57 | """bbox project to image visualization 58 | """ 59 | # transform global bbox to ego --> camera 60 | box = copy.deepcopy(bbox) 61 | box.translate(-ego_pose[:3, 3]) 62 | box.rotate(Quaternion(matrix=ego_pose[:3, :3].T)) 63 | box.translate(-cam_pose[:3, 3]) 64 | box.rotate(Quaternion(matrix=cam_pose[:3, :3].T)) 65 | 66 | if box.center[2] < 0: 67 | return 68 | 69 | def draw_rect(selected_corners, color): 70 | prev = selected_corners[-1] 71 | for corner in selected_corners: 72 | plt.plot([prev[0], corner[0]], [prev[1], corner[1]], color=color, linestyle=linestyle) 73 | prev = corner 74 | 75 | corners = view_points(box.corners(), cam_intrinsics, normalize=True).T[:, :2] 76 | 77 | # if np.max(corners[:, 0]) <= 1 or np.min(corners[:, 1]) >= image.size[0] or \ 78 | # np.max(corners[:, 1]) <= 1 or np.min(corners[:, 1]) >= image.size[1]: 79 | # return 80 | if not (np.min(corners[:, 0]) >= 1 and np.max(corners[:, 0]) <= image.size[0] and \ 81 | np.min(corners[:, 1]) >= 1 and np.max(corners[:, 1]) <= image.size[1]): 82 | return 83 | 84 | for i in range(4): 85 | plt.plot([corners[i][0], corners[i + 4][0]], 86 | [corners[i][1], corners[i + 4][1]], 87 | color=self.COLOR_MAP[color], linestyle=linestyle) 88 | 89 | draw_rect(corners[:4], self.COLOR_MAP[color]) 90 | draw_rect(corners[4:], self.COLOR_MAP[color]) 91 | 92 | # Draw line indicating the front 93 | center_bottom_forward = np.mean(corners.T[2:4], axis=0) 94 | center_bottom = np.mean(corners[[2, 3, 7, 6]], axis=0) 95 | plt.plot([center_bottom[0], center_bottom_forward[0]], 96 | [center_bottom[1], center_bottom_forward[1]], 97 | color=self.COLOR_MAP[color], linestyle=linestyle) 98 | 99 | # select a corner and plot messages 100 | corner_index = np.random.randint(0, 8, 1) 101 | plt.text(corners[corner_index, 0] - 1, corners[corner_index, 1] - 1, message, color=self.COLOR_MAP[color]) 102 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/tools/__init__.py -------------------------------------------------------------------------------- /tools/create_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | export PYTHONPATH=`pwd`:$PYTHONPATH 5 | 6 | PARTITION=$1 7 | JOB_NAME=$2 8 | CONFIG=$3 9 | WORK_DIR=$4 10 | GPUS=${GPUS:-1} 11 | GPUS_PER_NODE=${GPUS_PER_NODE:-1} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | JOB_NAME=create_data 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --kill-on-bad-exit=1 \ 21 | ${SRUN_ARGS} \ 22 | python -u tools/create_data.py kitti \ 23 | --root-path ./data/kitti \ 24 | --out-dir ./data/kitti \ 25 | --extra-tag kitti 26 | -------------------------------------------------------------------------------- /tools/data_converter/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | 3 | from . import nuscenes_tracking_converter -------------------------------------------------------------------------------- /tools/data_converter/indoor_converter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | import numpy as np 4 | import os 5 | 6 | from tools.data_converter.s3dis_data_utils import S3DISData, S3DISSegData 7 | from tools.data_converter.scannet_data_utils import ScanNetData, ScanNetSegData 8 | from tools.data_converter.sunrgbd_data_utils import SUNRGBDData 9 | 10 | 11 | def create_indoor_info_file(data_path, 12 | pkl_prefix='sunrgbd', 13 | save_path=None, 14 | use_v1=False, 15 | workers=4): 16 | """Create indoor information file. 17 | 18 | Get information of the raw data and save it to the pkl file. 19 | 20 | Args: 21 | data_path (str): Path of the data. 22 | pkl_prefix (str): Prefix of the pkl to be saved. Default: 'sunrgbd'. 23 | save_path (str): Path of the pkl to be saved. Default: None. 24 | use_v1 (bool): Whether to use v1. Default: False. 25 | workers (int): Number of threads to be used. Default: 4. 26 | """ 27 | assert os.path.exists(data_path) 28 | assert pkl_prefix in ['sunrgbd', 'scannet', 's3dis'], \ 29 | f'unsupported indoor dataset {pkl_prefix}' 30 | save_path = data_path if save_path is None else save_path 31 | assert os.path.exists(save_path) 32 | 33 | # generate infos for both detection and segmentation task 34 | if pkl_prefix in ['sunrgbd', 'scannet']: 35 | train_filename = os.path.join(save_path, 36 | f'{pkl_prefix}_infos_train.pkl') 37 | val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl') 38 | if pkl_prefix == 'sunrgbd': 39 | # SUN RGB-D has a train-val split 40 | train_dataset = SUNRGBDData( 41 | root_path=data_path, split='train', use_v1=use_v1) 42 | val_dataset = SUNRGBDData( 43 | root_path=data_path, split='val', use_v1=use_v1) 44 | else: 45 | # ScanNet has a train-val-test split 46 | train_dataset = ScanNetData(root_path=data_path, split='train') 47 | val_dataset = ScanNetData(root_path=data_path, split='val') 48 | test_dataset = ScanNetData(root_path=data_path, split='test') 49 | test_filename = os.path.join(save_path, 50 | f'{pkl_prefix}_infos_test.pkl') 51 | 52 | infos_train = train_dataset.get_infos( 53 | num_workers=workers, has_label=True) 54 | mmcv.dump(infos_train, train_filename, 'pkl') 55 | print(f'{pkl_prefix} info train file is saved to {train_filename}') 56 | 57 | infos_val = val_dataset.get_infos(num_workers=workers, has_label=True) 58 | mmcv.dump(infos_val, val_filename, 'pkl') 59 | print(f'{pkl_prefix} info val file is saved to {val_filename}') 60 | 61 | if pkl_prefix == 'scannet': 62 | infos_test = test_dataset.get_infos( 63 | num_workers=workers, has_label=False) 64 | mmcv.dump(infos_test, test_filename, 'pkl') 65 | print(f'{pkl_prefix} info test file is saved to {test_filename}') 66 | 67 | # generate infos for the semantic segmentation task 68 | # e.g. re-sampled scene indexes and label weights 69 | # scene indexes are used to re-sample rooms with different number of points 70 | # label weights are used to balance classes with different number of points 71 | if pkl_prefix == 'scannet': 72 | # label weight computation function is adopted from 73 | # https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24 74 | train_dataset = ScanNetSegData( 75 | data_root=data_path, 76 | ann_file=train_filename, 77 | split='train', 78 | num_points=8192, 79 | label_weight_func=lambda x: 1.0 / np.log(1.2 + x)) 80 | # TODO: do we need to generate on val set? 81 | val_dataset = ScanNetSegData( 82 | data_root=data_path, 83 | ann_file=val_filename, 84 | split='val', 85 | num_points=8192, 86 | label_weight_func=lambda x: 1.0 / np.log(1.2 + x)) 87 | # no need to generate for test set 88 | train_dataset.get_seg_infos() 89 | val_dataset.get_seg_infos() 90 | elif pkl_prefix == 's3dis': 91 | # S3DIS doesn't have a fixed train-val split 92 | # it has 6 areas instead, so we generate info file for each of them 93 | # in training, we will use dataset to wrap different areas 94 | splits = [f'Area_{i}' for i in [1, 2, 3, 4, 5, 6]] 95 | for split in splits: 96 | dataset = S3DISData(root_path=data_path, split=split) 97 | info = dataset.get_infos(num_workers=workers, has_label=True) 98 | filename = os.path.join(save_path, 99 | f'{pkl_prefix}_infos_{split}.pkl') 100 | mmcv.dump(info, filename, 'pkl') 101 | print(f'{pkl_prefix} info {split} file is saved to {filename}') 102 | seg_dataset = S3DISSegData( 103 | data_root=data_path, 104 | ann_file=filename, 105 | split=split, 106 | num_points=4096, 107 | label_weight_func=lambda x: 1.0 / np.log(1.2 + x)) 108 | seg_dataset.get_seg_infos() 109 | -------------------------------------------------------------------------------- /tools/data_converter/lyft_data_fixer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import numpy as np 4 | import os 5 | 6 | 7 | def fix_lyft(root_folder='./data/lyft', version='v1.01'): 8 | # refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000 # noqa 9 | lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin' 10 | root_folder = os.path.join(root_folder, f'{version}-train') 11 | lidar_path = os.path.join(root_folder, lidar_path) 12 | assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \ 13 | f'dataset and make sure {lidar_path} is present.' 14 | points = np.fromfile(lidar_path, dtype=np.float32, count=-1) 15 | try: 16 | points.reshape([-1, 5]) 17 | print(f'This fix is not required for version {version}.') 18 | except ValueError: 19 | new_points = np.array(list(points) + [100.0, 1.0], dtype='float32') 20 | new_points.tofile(lidar_path) 21 | print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.') 22 | 23 | 24 | parser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser') 25 | parser.add_argument( 26 | '--root-folder', 27 | type=str, 28 | default='./data/lyft', 29 | help='specify the root path of Lyft dataset') 30 | parser.add_argument( 31 | '--version', 32 | type=str, 33 | default='v1.01', 34 | help='specify Lyft dataset version') 35 | args = parser.parse_args() 36 | 37 | if __name__ == '__main__': 38 | fix_lyft(root_folder=args.root_folder, version=args.version) 39 | -------------------------------------------------------------------------------- /tools/data_converter/nuimage_converter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import base64 4 | import mmcv 5 | import numpy as np 6 | from nuimages import NuImages 7 | from nuimages.utils.utils import mask_decode, name_to_index_mapping 8 | from os import path as osp 9 | 10 | nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 11 | 'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 12 | 'barrier') 13 | 14 | NAME_MAPPING = { 15 | 'movable_object.barrier': 'barrier', 16 | 'vehicle.bicycle': 'bicycle', 17 | 'vehicle.bus.bendy': 'bus', 18 | 'vehicle.bus.rigid': 'bus', 19 | 'vehicle.car': 'car', 20 | 'vehicle.construction': 'construction_vehicle', 21 | 'vehicle.motorcycle': 'motorcycle', 22 | 'human.pedestrian.adult': 'pedestrian', 23 | 'human.pedestrian.child': 'pedestrian', 24 | 'human.pedestrian.construction_worker': 'pedestrian', 25 | 'human.pedestrian.police_officer': 'pedestrian', 26 | 'movable_object.trafficcone': 'traffic_cone', 27 | 'vehicle.trailer': 'trailer', 28 | 'vehicle.truck': 'truck', 29 | } 30 | 31 | 32 | def parse_args(): 33 | parser = argparse.ArgumentParser(description='Data converter arg parser') 34 | parser.add_argument( 35 | '--data-root', 36 | type=str, 37 | default='./data/nuimages', 38 | help='specify the root path of dataset') 39 | parser.add_argument( 40 | '--version', 41 | type=str, 42 | nargs='+', 43 | default=['v1.0-mini'], 44 | required=False, 45 | help='specify the dataset version') 46 | parser.add_argument( 47 | '--out-dir', 48 | type=str, 49 | default='./data/nuimages/annotations/', 50 | required=False, 51 | help='path to save the exported json') 52 | parser.add_argument( 53 | '--nproc', 54 | type=int, 55 | default=4, 56 | required=False, 57 | help='workers to process semantic masks') 58 | parser.add_argument('--extra-tag', type=str, default='nuimages') 59 | args = parser.parse_args() 60 | return args 61 | 62 | 63 | def get_img_annos(nuim, img_info, cat2id, out_dir, data_root, seg_root): 64 | """Get semantic segmentation map for an image. 65 | 66 | Args: 67 | nuim (obj:`NuImages`): NuImages dataset object 68 | img_info (dict): Meta information of img 69 | 70 | Returns: 71 | np.ndarray: Semantic segmentation map of the image 72 | """ 73 | sd_token = img_info['token'] 74 | image_id = img_info['id'] 75 | name_to_index = name_to_index_mapping(nuim.category) 76 | 77 | # Get image data. 78 | width, height = img_info['width'], img_info['height'] 79 | semseg_mask = np.zeros((height, width)).astype('uint8') 80 | 81 | # Load stuff / surface regions. 82 | surface_anns = [ 83 | o for o in nuim.surface_ann if o['sample_data_token'] == sd_token 84 | ] 85 | 86 | # Draw stuff / surface regions. 87 | for ann in surface_anns: 88 | # Get color and mask. 89 | category_token = ann['category_token'] 90 | category_name = nuim.get('category', category_token)['name'] 91 | if ann['mask'] is None: 92 | continue 93 | mask = mask_decode(ann['mask']) 94 | 95 | # Draw mask for semantic segmentation. 96 | semseg_mask[mask == 1] = name_to_index[category_name] 97 | 98 | # Load object instances. 99 | object_anns = [ 100 | o for o in nuim.object_ann if o['sample_data_token'] == sd_token 101 | ] 102 | 103 | # Sort by token to ensure that objects always appear in the 104 | # instance mask in the same order. 105 | object_anns = sorted(object_anns, key=lambda k: k['token']) 106 | 107 | # Draw object instances. 108 | # The 0 index is reserved for background; thus, the instances 109 | # should start from index 1. 110 | annotations = [] 111 | for i, ann in enumerate(object_anns, start=1): 112 | # Get color, box, mask and name. 113 | category_token = ann['category_token'] 114 | category_name = nuim.get('category', category_token)['name'] 115 | if ann['mask'] is None: 116 | continue 117 | mask = mask_decode(ann['mask']) 118 | 119 | # Draw masks for semantic segmentation and instance segmentation. 120 | semseg_mask[mask == 1] = name_to_index[category_name] 121 | 122 | if category_name in NAME_MAPPING: 123 | cat_name = NAME_MAPPING[category_name] 124 | cat_id = cat2id[cat_name] 125 | 126 | x_min, y_min, x_max, y_max = ann['bbox'] 127 | # encode calibrated instance mask 128 | mask_anno = dict() 129 | mask_anno['counts'] = base64.b64decode( 130 | ann['mask']['counts']).decode() 131 | mask_anno['size'] = ann['mask']['size'] 132 | 133 | data_anno = dict( 134 | image_id=image_id, 135 | category_id=cat_id, 136 | bbox=[x_min, y_min, x_max - x_min, y_max - y_min], 137 | area=(x_max - x_min) * (y_max - y_min), 138 | segmentation=mask_anno, 139 | iscrowd=0) 140 | annotations.append(data_anno) 141 | 142 | # after process, save semantic masks 143 | img_filename = img_info['file_name'] 144 | seg_filename = img_filename.replace('jpg', 'png') 145 | seg_filename = osp.join(seg_root, seg_filename) 146 | mmcv.imwrite(semseg_mask, seg_filename) 147 | return annotations, np.max(semseg_mask) 148 | 149 | 150 | def export_nuim_to_coco(nuim, data_root, out_dir, extra_tag, version, nproc): 151 | print('Process category information') 152 | categories = [] 153 | categories = [ 154 | dict(id=nus_categories.index(cat_name), name=cat_name) 155 | for cat_name in nus_categories 156 | ] 157 | cat2id = {k_v['name']: k_v['id'] for k_v in categories} 158 | 159 | images = [] 160 | print('Process image meta information...') 161 | for sample_info in mmcv.track_iter_progress(nuim.sample_data): 162 | if sample_info['is_key_frame']: 163 | img_idx = len(images) 164 | images.append( 165 | dict( 166 | id=img_idx, 167 | token=sample_info['token'], 168 | file_name=sample_info['filename'], 169 | width=sample_info['width'], 170 | height=sample_info['height'])) 171 | 172 | seg_root = f'{out_dir}semantic_masks' 173 | mmcv.mkdir_or_exist(seg_root) 174 | mmcv.mkdir_or_exist(osp.join(data_root, 'calibrated')) 175 | 176 | global process_img_anno 177 | 178 | def process_img_anno(img_info): 179 | single_img_annos, max_cls_id = get_img_annos(nuim, img_info, cat2id, 180 | out_dir, data_root, 181 | seg_root) 182 | return single_img_annos, max_cls_id 183 | 184 | print('Process img annotations...') 185 | if nproc > 1: 186 | outputs = mmcv.track_parallel_progress( 187 | process_img_anno, images, nproc=nproc) 188 | else: 189 | outputs = [] 190 | for img_info in mmcv.track_iter_progress(images): 191 | outputs.append(process_img_anno(img_info)) 192 | 193 | # Determine the index of object annotation 194 | print('Process annotation information...') 195 | annotations = [] 196 | max_cls_ids = [] 197 | for single_img_annos, max_cls_id in outputs: 198 | max_cls_ids.append(max_cls_id) 199 | for img_anno in single_img_annos: 200 | img_anno.update(id=len(annotations)) 201 | annotations.append(img_anno) 202 | 203 | max_cls_id = max(max_cls_ids) 204 | print(f'Max ID of class in the semantic map: {max_cls_id}') 205 | 206 | coco_format_json = dict( 207 | images=images, annotations=annotations, categories=categories) 208 | 209 | mmcv.mkdir_or_exist(out_dir) 210 | out_file = osp.join(out_dir, f'{extra_tag}_{version}.json') 211 | print(f'Annotation dumped to {out_file}') 212 | mmcv.dump(coco_format_json, out_file) 213 | 214 | 215 | def main(): 216 | args = parse_args() 217 | for version in args.version: 218 | nuim = NuImages( 219 | dataroot=args.data_root, version=version, verbose=True, lazy=True) 220 | export_nuim_to_coco(nuim, args.data_root, args.out_dir, args.extra_tag, 221 | version, args.nproc) 222 | 223 | 224 | if __name__ == '__main__': 225 | main() 226 | -------------------------------------------------------------------------------- /tools/data_converter/nuscenes_prediction_tools.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2023 toyota research instutute. 3 | # ------------------------------------------------------------------------ 4 | # Modified from FutureDet (https://github.com/neeharperi/FutureDet) 5 | # ------------------------------------------------------------------------ 6 | import numpy as np 7 | from pyquaternion import Quaternion 8 | from nuscenes import NuScenes 9 | from nuscenes.utils.data_classes import Box 10 | from itertools import tee 11 | from copy import deepcopy 12 | 13 | 14 | def get_forecasting_annotations(nusc: NuScenes, annotations, length): 15 | """Acquire the trajectories for each box 16 | """ 17 | forecast_annotations = [] 18 | forecast_boxes = [] 19 | forecast_trajectory_type = [] 20 | forecast_visibility_mask = [] 21 | sample_tokens = [s["token"] for s in nusc.sample] 22 | 23 | for annotation in annotations: 24 | tracklet_box = [] 25 | tracklet_annotation = [] 26 | tracklet_visiblity_mask = [] 27 | tracklet_trajectory_type = [] 28 | 29 | token = nusc.sample[sample_tokens.index(annotation["sample_token"])]["data"]["LIDAR_TOP"] 30 | sd_record = nusc.get("sample_data", token) 31 | cs_record = nusc.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) 32 | pose_record = nusc.get("ego_pose", sd_record["ego_pose_token"]) 33 | 34 | visibility = True 35 | for step in range(length): 36 | box = Box(center = annotation["translation"], 37 | size = annotation["size"], 38 | orientation = Quaternion(annotation["rotation"]), 39 | velocity = nusc.box_velocity(annotation["token"]), 40 | name = annotation["category_name"], 41 | token = annotation["token"]) 42 | 43 | # move box to the ego-system when the prediction is made 44 | box.translate(-np.array(pose_record["translation"])) 45 | box.rotate(Quaternion(pose_record["rotation"]).inverse) 46 | 47 | # Move box to sensor coord system 48 | box.translate(-np.array(cs_record["translation"])) 49 | box.rotate(Quaternion(cs_record["rotation"]).inverse) 50 | 51 | tracklet_box.append(box) 52 | tracklet_annotation.append(annotation) 53 | tracklet_visiblity_mask.append(visibility) 54 | 55 | next_token = annotation['next'] 56 | if next_token != '': 57 | annotation = nusc.get('sample_annotation', next_token) 58 | else: 59 | # if the trajectory cannot be prolonged anymore, 60 | # use the last one to pad and set the visibility flag 61 | annotation = annotation 62 | visibility = False 63 | 64 | tokens = [b["sample_token"] for b in tracklet_annotation] 65 | time = [get_time(nusc, src, dst) for src, dst in window(tokens, 2)] 66 | tracklet_trajectory_type = trajectory_type(nusc, tracklet_box, time, length) # same as FutureDet 67 | 68 | forecast_boxes.append(tracklet_box) 69 | forecast_annotations.append(tracklet_annotation) 70 | forecast_trajectory_type.append(length * [tracklet_trajectory_type]) 71 | forecast_visibility_mask.append(tracklet_visiblity_mask) 72 | return forecast_boxes, forecast_annotations, forecast_visibility_mask, forecast_trajectory_type 73 | 74 | 75 | def window(iterable, size): 76 | iters = tee(iterable, size) 77 | for i in range(1, size): 78 | for each in iters[i:]: 79 | next(each, None) 80 | 81 | return zip(*iters) 82 | 83 | def get_time(nusc, src_token, dst_token): 84 | time_last = 1e-6 * nusc.get('sample', src_token)["timestamp"] 85 | time_first = 1e-6 * nusc.get('sample', dst_token)["timestamp"] 86 | time_diff = time_first - time_last 87 | 88 | return time_diff 89 | 90 | 91 | def center_distance(gt_box, pred_box) -> float: 92 | """ 93 | L2 distance between the box centers (xy only). 94 | :param gt_box: GT annotation sample. 95 | :param pred_box: Predicted sample. 96 | :return: L2 distance. 97 | """ 98 | return np.linalg.norm(np.array(pred_box.center[:2]) - np.array(gt_box.center[:2])) 99 | 100 | 101 | def trajectory_type(nusc, boxes, time, timesteps=7, past=False): 102 | target = boxes[-1] 103 | 104 | static_forecast = deepcopy(boxes[0]) 105 | 106 | linear_forecast = deepcopy(boxes[0]) 107 | vel = linear_forecast.velocity[:2] 108 | disp = np.sum(list(map(lambda x: np.array(list(vel) + [0]) * x, time)), axis=0) 109 | 110 | if past: 111 | linear_forecast.center = linear_forecast.center - disp 112 | 113 | else: 114 | linear_forecast.center = linear_forecast.center + disp 115 | 116 | if center_distance(target, static_forecast) < max(target.wlh[0], target.wlh[1]): 117 | # return "static" 118 | return 0 119 | 120 | elif center_distance(target, linear_forecast) < max(target.wlh[0], target.wlh[1]): 121 | # return "linear" 122 | return 1 123 | 124 | else: 125 | # return "nonlinear" 126 | return 2 -------------------------------------------------------------------------------- /tools/data_converter/sunrgbd_data_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | import numpy as np 4 | from concurrent import futures as futures 5 | from os import path as osp 6 | from scipy import io as sio 7 | 8 | 9 | def random_sampling(points, num_points, replace=None, return_choices=False): 10 | """Random sampling. 11 | 12 | Sampling point cloud to a certain number of points. 13 | 14 | Args: 15 | points (ndarray): Point cloud. 16 | num_points (int): The number of samples. 17 | replace (bool): Whether the sample is with or without replacement. 18 | return_choices (bool): Whether to return choices. 19 | 20 | Returns: 21 | points (ndarray): Point cloud after sampling. 22 | """ 23 | 24 | if replace is None: 25 | replace = (points.shape[0] < num_points) 26 | choices = np.random.choice(points.shape[0], num_points, replace=replace) 27 | if return_choices: 28 | return points[choices], choices 29 | else: 30 | return points[choices] 31 | 32 | 33 | class SUNRGBDInstance(object): 34 | 35 | def __init__(self, line): 36 | data = line.split(' ') 37 | data[1:] = [float(x) for x in data[1:]] 38 | self.classname = data[0] 39 | self.xmin = data[1] 40 | self.ymin = data[2] 41 | self.xmax = data[1] + data[3] 42 | self.ymax = data[2] + data[4] 43 | self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax]) 44 | self.centroid = np.array([data[5], data[6], data[7]]) 45 | self.w = data[8] 46 | self.l = data[9] # noqa: E741 47 | self.h = data[10] 48 | self.orientation = np.zeros((3, )) 49 | self.orientation[0] = data[11] 50 | self.orientation[1] = data[12] 51 | self.heading_angle = -1 * np.arctan2(self.orientation[1], 52 | self.orientation[0]) 53 | self.box3d = np.concatenate([ 54 | self.centroid, 55 | np.array([self.l * 2, self.w * 2, self.h * 2, self.heading_angle]) 56 | ]) 57 | 58 | 59 | class SUNRGBDData(object): 60 | """SUNRGBD data. 61 | 62 | Generate scannet infos for sunrgbd_converter. 63 | 64 | Args: 65 | root_path (str): Root path of the raw data. 66 | split (str): Set split type of the data. Default: 'train'. 67 | use_v1 (bool): Whether to use v1. Default: False. 68 | """ 69 | 70 | def __init__(self, root_path, split='train', use_v1=False): 71 | self.root_dir = root_path 72 | self.split = split 73 | self.split_dir = osp.join(root_path, 'sunrgbd_trainval') 74 | self.classes = [ 75 | 'bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 76 | 'night_stand', 'bookshelf', 'bathtub' 77 | ] 78 | self.cat2label = {cat: self.classes.index(cat) for cat in self.classes} 79 | self.label2cat = { 80 | label: self.classes[label] 81 | for label in range(len(self.classes)) 82 | } 83 | assert split in ['train', 'val', 'test'] 84 | split_file = osp.join(self.split_dir, f'{split}_data_idx.txt') 85 | mmcv.check_file_exist(split_file) 86 | self.sample_id_list = map(int, mmcv.list_from_file(split_file)) 87 | self.image_dir = osp.join(self.split_dir, 'image') 88 | self.calib_dir = osp.join(self.split_dir, 'calib') 89 | self.depth_dir = osp.join(self.split_dir, 'depth') 90 | if use_v1: 91 | self.label_dir = osp.join(self.split_dir, 'label_v1') 92 | else: 93 | self.label_dir = osp.join(self.split_dir, 'label') 94 | 95 | def __len__(self): 96 | return len(self.sample_id_list) 97 | 98 | def get_image(self, idx): 99 | img_filename = osp.join(self.image_dir, f'{idx:06d}.jpg') 100 | return mmcv.imread(img_filename) 101 | 102 | def get_image_shape(self, idx): 103 | image = self.get_image(idx) 104 | return np.array(image.shape[:2], dtype=np.int32) 105 | 106 | def get_depth(self, idx): 107 | depth_filename = osp.join(self.depth_dir, f'{idx:06d}.mat') 108 | depth = sio.loadmat(depth_filename)['instance'] 109 | return depth 110 | 111 | def get_calibration(self, idx): 112 | calib_filepath = osp.join(self.calib_dir, f'{idx:06d}.txt') 113 | lines = [line.rstrip() for line in open(calib_filepath)] 114 | Rt = np.array([float(x) for x in lines[0].split(' ')]) 115 | Rt = np.reshape(Rt, (3, 3), order='F').astype(np.float32) 116 | K = np.array([float(x) for x in lines[1].split(' ')]) 117 | K = np.reshape(K, (3, 3), order='F').astype(np.float32) 118 | return K, Rt 119 | 120 | def get_label_objects(self, idx): 121 | label_filename = osp.join(self.label_dir, f'{idx:06d}.txt') 122 | lines = [line.rstrip() for line in open(label_filename)] 123 | objects = [SUNRGBDInstance(line) for line in lines] 124 | return objects 125 | 126 | def get_infos(self, num_workers=4, has_label=True, sample_id_list=None): 127 | """Get data infos. 128 | 129 | This method gets information from the raw data. 130 | 131 | Args: 132 | num_workers (int): Number of threads to be used. Default: 4. 133 | has_label (bool): Whether the data has label. Default: True. 134 | sample_id_list (list[int]): Index list of the sample. 135 | Default: None. 136 | 137 | Returns: 138 | infos (list[dict]): Information of the raw data. 139 | """ 140 | 141 | def process_single_scene(sample_idx): 142 | print(f'{self.split} sample_idx: {sample_idx}') 143 | # convert depth to points 144 | SAMPLE_NUM = 50000 145 | # TODO: Check whether can move the point 146 | # sampling process during training. 147 | pc_upright_depth = self.get_depth(sample_idx) 148 | pc_upright_depth_subsampled = random_sampling( 149 | pc_upright_depth, SAMPLE_NUM) 150 | 151 | info = dict() 152 | pc_info = {'num_features': 6, 'lidar_idx': sample_idx} 153 | info['point_cloud'] = pc_info 154 | 155 | mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points')) 156 | pc_upright_depth_subsampled.tofile( 157 | osp.join(self.root_dir, 'points', f'{sample_idx:06d}.bin')) 158 | 159 | info['pts_path'] = osp.join('points', f'{sample_idx:06d}.bin') 160 | img_path = osp.join('image', f'{sample_idx:06d}.jpg') 161 | image_info = { 162 | 'image_idx': sample_idx, 163 | 'image_shape': self.get_image_shape(sample_idx), 164 | 'image_path': img_path 165 | } 166 | info['image'] = image_info 167 | 168 | K, Rt = self.get_calibration(sample_idx) 169 | calib_info = {'K': K, 'Rt': Rt} 170 | info['calib'] = calib_info 171 | 172 | if has_label: 173 | obj_list = self.get_label_objects(sample_idx) 174 | annotations = {} 175 | annotations['gt_num'] = len([ 176 | obj.classname for obj in obj_list 177 | if obj.classname in self.cat2label.keys() 178 | ]) 179 | if annotations['gt_num'] != 0: 180 | annotations['name'] = np.array([ 181 | obj.classname for obj in obj_list 182 | if obj.classname in self.cat2label.keys() 183 | ]) 184 | annotations['bbox'] = np.concatenate([ 185 | obj.box2d.reshape(1, 4) for obj in obj_list 186 | if obj.classname in self.cat2label.keys() 187 | ], 188 | axis=0) 189 | annotations['location'] = np.concatenate([ 190 | obj.centroid.reshape(1, 3) for obj in obj_list 191 | if obj.classname in self.cat2label.keys() 192 | ], 193 | axis=0) 194 | annotations['dimensions'] = 2 * np.array([ 195 | [obj.l, obj.w, obj.h] for obj in obj_list 196 | if obj.classname in self.cat2label.keys() 197 | ]) # lwh (depth) format 198 | annotations['rotation_y'] = np.array([ 199 | obj.heading_angle for obj in obj_list 200 | if obj.classname in self.cat2label.keys() 201 | ]) 202 | annotations['index'] = np.arange( 203 | len(obj_list), dtype=np.int32) 204 | annotations['class'] = np.array([ 205 | self.cat2label[obj.classname] for obj in obj_list 206 | if obj.classname in self.cat2label.keys() 207 | ]) 208 | annotations['gt_boxes_upright_depth'] = np.stack( 209 | [ 210 | obj.box3d for obj in obj_list 211 | if obj.classname in self.cat2label.keys() 212 | ], 213 | axis=0) # (K,8) 214 | info['annos'] = annotations 215 | return info 216 | 217 | sample_id_list = sample_id_list if \ 218 | sample_id_list is not None else self.sample_id_list 219 | with futures.ThreadPoolExecutor(num_workers) as executor: 220 | infos = executor.map(process_single_scene, sample_id_list) 221 | return list(infos) 222 | -------------------------------------------------------------------------------- /work_dirs/cam_visualization/fcbccedd61424f1b85dcbf8f897f9754/0_camera.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/work_dirs/cam_visualization/fcbccedd61424f1b85dcbf8f897f9754/0_camera.png -------------------------------------------------------------------------------- /work_dirs/cam_visualization/fcbccedd61424f1b85dcbf8f897f9754/1_camera.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/work_dirs/cam_visualization/fcbccedd61424f1b85dcbf8f897f9754/1_camera.png -------------------------------------------------------------------------------- /work_dirs/cam_visualization/fcbccedd61424f1b85dcbf8f897f9754/2_camera.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/work_dirs/cam_visualization/fcbccedd61424f1b85dcbf8f897f9754/2_camera.png -------------------------------------------------------------------------------- /work_dirs/cam_visualization/fcbccedd61424f1b85dcbf8f897f9754/video 00_00_00-00_00_30.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/work_dirs/cam_visualization/fcbccedd61424f1b85dcbf8f897f9754/video 00_00_00-00_00_30.gif -------------------------------------------------------------------------------- /work_dirs/jpg1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/work_dirs/jpg1.png -------------------------------------------------------------------------------- /work_dirs/tracking_visualization/fcbccedd61424f1b85dcbf8f897f9754/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/work_dirs/tracking_visualization/fcbccedd61424f1b85dcbf8f897f9754/0.png -------------------------------------------------------------------------------- /work_dirs/tracking_visualization/fcbccedd61424f1b85dcbf8f897f9754/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/work_dirs/tracking_visualization/fcbccedd61424f1b85dcbf8f897f9754/1.png -------------------------------------------------------------------------------- /work_dirs/tracking_visualization/fcbccedd61424f1b85dcbf8f897f9754/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/work_dirs/tracking_visualization/fcbccedd61424f1b85dcbf8f897f9754/2.png -------------------------------------------------------------------------------- /work_dirs/tracking_visualization/fcbccedd61424f1b85dcbf8f897f9754/videobev 00_00_00-00_00_30.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PrymceQ/nuScenes_TrajectoryVisualizationTool/ce76a2e0c85768eda1044f56d8bc25d4a5a91b1e/work_dirs/tracking_visualization/fcbccedd61424f1b85dcbf8f897f9754/videobev 00_00_00-00_00_30.gif --------------------------------------------------------------------------------