├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── assert └── Network.png ├── common ├── laserscan.py ├── laserscanvis.py ├── posslaserscan.py ├── posslaserscanvis.py └── sync_batchnorm │ ├── __init__.py │ ├── batchnorm.py │ ├── comm.py │ └── replicate.py ├── config ├── arch │ ├── CENet.yaml │ ├── CENet_nusc.yaml │ ├── CENet_poss.yaml │ ├── Fid.yaml │ ├── Fid_nusc.yaml │ ├── Fid_poss.yaml │ ├── LENet.yaml │ ├── LENet_nusc.yaml │ └── LENet_poss.yaml ├── data_preparing.yaml └── labels │ ├── semantic-kitti-all.yaml │ ├── semantic-kitti.yaml │ ├── semantic-nuscenes.yaml │ └── semantic-poss.yaml ├── dataset ├── kitti │ └── parser.py ├── nuscenes │ └── parser.py └── poss │ └── parser.py ├── environment.yaml ├── evaluate.py ├── infer.py ├── modules ├── PointRefine │ ├── PointMLP.py │ ├── spvcnn.py │ └── spvcnn_lite.py ├── __init__.py ├── loss │ ├── DiceLoss.py │ ├── Lovasz_Softmax.py │ └── boundary_loss.py ├── network │ ├── CENet.py │ ├── Fid.py │ └── LENet.py ├── scheduler │ ├── consine.py │ └── warmupLR.py ├── tariner_poss.py ├── trainer.py ├── trainer_nusc.py ├── user.py ├── user_nusc.py ├── user_poss.py ├── user_refine.py └── utils.py ├── postproc └── KNN.py ├── requirements.txt ├── train.py ├── train_nusc.py ├── train_poss.py ├── utils ├── auto_gen_residual_images.py ├── kitti_utils.py ├── np_ioueval.py ├── nuscenes2kitti.py ├── torch_ioueval.py ├── utils.py └── validate_submission.py └── visualize.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | .idea/ 131 | .vscode/ 132 | 133 | logs/* -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 dingben 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RangeSeg 2 | > This is the official implementation of **RangeSeg: Efficient Lidar Semantic Segmentation on Range view**[[Paper](https://arxiv.org/pdf/2301.04275.pdf)]. [![arXiv](https://img.shields.io/badge/arxiv-2202.13377-b31b1b.svg)](https://arxiv.org/abs/2301.04275) 3 | ## Demo 4 |

5 | 6 | ## Environment 7 | ```sh 8 | # clone this repo 9 | git clone https://github.com/fengluodb/LENet.git 10 | 11 | # create a conda env with 12 | conda env create -f environment.yaml 13 | conda activate LENet 14 | ``` 15 | 16 | ## Datasets Prepartion 17 | ### SemanticKITTI 18 | Download the SemanticKIITI dataset from [here](http://www.semantic-kitti.org/dataset.html#download). 19 | ``` 20 | dataset 21 | └── SemanticKITTI 22 | └── sequences 23 | ├── 00 24 | ├── ... 25 | └── 21 26 | ``` 27 | 28 | 29 | ### SemanticPOSS 30 | Download the SemanticPOSS dataset from [here](http://www.poss.pku.edu.cn./semanticposs.html). Unzip and arrange it as follows. 31 | ``` 32 | dataset 33 | └── SemanticPOSS 34 | └── sequences 35 | ├── 00 36 | ├── ... 37 | └── 05 38 | ``` 39 | 40 | ### Nuscenes 41 | Download the nuScenes dataset from [here](https://nuscenes.org/nuscenes). Using the [nuscenes2kitti.py](/utils/nuscenes2kitti.py) to nuScenes into SemanticKITTI-compatible format, you can follw the instructions in [here](https://github.com/PRBonn/nuscenes2kitti): 42 | ```sh 43 | python3 utils/nuscenes2kitti.py --nuscenes_dir --output_dir 44 | ``` 45 | the final format look like this: 46 | ```sh 47 | dataset/Nuscene-KITTI/ 48 | └── sequences 49 | ├── 0001 50 | ├── 0002 51 | ├── ... 52 | ├── 1109 53 | └── 1110 54 | ``` 55 | 56 | ## Training 57 | 58 | ### SemanticKITTI 59 | To train a network (from scratch): 60 | ```sh 61 | python train.py -d DATAROOT -ac config/arch/LENet.yaml -dc config/labels/semantic-kitti.yaml -l logs/LENet-KITTI 62 | ``` 63 | 64 | To train a network (from pretrained model): 65 | ```sh 66 | python train.py -d DATAROOT -ac config/arch/LENet.yaml -dc config/labels/semantic-kitti.yaml -l logs/LENet-KITTI -p "logs/LENet-KITTI/TIMESTAMP" 67 | ``` 68 | 69 | ### SemanticPOSS 70 | To train a network (from scratch): 71 | ```sh 72 | python train_poss.py -d DATAROOT -ac config/arch/LENet_poss.yaml -dc config/labels/semantic-poss.yaml -l logs/LENet-POSS 73 | ``` 74 | 75 | To train a network (from pretrained model): 76 | ```sh 77 | python train_poss.py -d DATAROOT -ac config/arch/LENet_poss.yaml -dc config/labels/semantic-poss.yaml -l logs/LENet-POSS -p "logs/LENet-POSS/TIMESTAMP"" 78 | ``` 79 | 80 | ### Nuscenes 81 | To train a network (from scratch): 82 | ```sh 83 | python train_nusc.py -d DATAROOT -ac config/arch/LENet_nusc.yaml -dc config/labels/semantic-nuscenes.yaml -l logs/LENet-Nusc 84 | ``` 85 | 86 | To train a network (from pretrained model): 87 | ```sh 88 | python train_nusc.py -d DATAROOT -ac config/arch/LENet_nusc.yaml -dc config/labels/semantic-nuscenes.yaml -l logs/LENet-Nusc -p "logs/LENet-Nusc/TIMESTAMP"" 89 | ``` 90 | 91 | ## Inference 92 | 93 | ### SemanticKITTI 94 | ```sh 95 | python infer.py -d DATAROOT -m "logs/LENet-KITTI/TIMESTAMP" -l /path/for/predictions -s valid/test 96 | ``` 97 | 98 | ### SemanticPOSS 99 | ```sh 100 | python infer.py -d DATAROOT -m "logs/LENet-POSS/TIMESTAMP" -l /path/for/predictions -s valid 101 | ``` 102 | 103 | ### Nuscenes 104 | ```sh 105 | python infer.py -d DATAROOT -m "logs/LENet-KITTI/TIMESTAMP" -l /path/for/predictions -s valid/test 106 | ``` 107 | 108 | > warning: if you infer the test dataset, I have converted the result format into nuScenes format. But the output have label 0 in prediction. Therefore, the result can't pass the [valid submisson script](/utils/validate_submission.py) of nuScenes. I will find a way to solve it. 109 | 110 | ## Evalution 111 | 112 | ### SemanticKITTI 113 | ```sh 114 | python evaluate.py -d DATAROOT -p /path/for/predictions -dc config/labels/semantic-kitti.yaml 115 | ``` 116 | 117 | ### SemanticPOSS 118 | ```sh 119 | python evaluate.py -d DATAROOT -p /path/for/predictions -dc config/labels/semantic-poss.yaml 120 | ``` 121 | 122 | ### Nuscenes 123 | ```sh 124 | python evaluate.py -d DATAROOT -p /path/for/predictions -dc config/labels/semantic-nuscenes.yaml 125 | ``` 126 | 127 | ## Pretrained Models and Predictions 128 | 129 | | dataset | mIoU | Download | 130 | |---------------|:----:|:-----------:| 131 | | [SemanticKITTI(single)](config/arch/LENet.yaml) | 64.5(test) | [Model Weight And Predictions](https://drive.google.com/drive/folders/1ejoInYl8BVzg3t69_ig4tDUYstaz--Ns?usp=sharing) | 132 | | [SemanticKITTI(multi)](config/arch/LENet.yaml) | 53.0(test) | [Model Weight And Predictions](https://drive.google.com/drive/folders/1OfktGL85mFmdRALBb-_Zpc8VSmjXJVYU?usp=sharing) | 133 | | [SemanticPOSS](config/arch/LENet_poss.yaml) | 53.8(test) | [Model Weight And Predictions](https://drive.google.com/drive/folders/1oECv2GRCXZ1RIQVVum-mRwZbod8pxVA8) | 134 | | [Nuscenes](config/arch/LENet_nusc.yaml) | 64.0(valid) | [Model Weight And Predictions](https://drive.google.com/drive/folders/1gFng2Ob3Papqddh4jK6g6cJeh2KWpS0f) | 135 | ## Acknowlegment 136 | 137 | This repo is built based on [MotionSeg3D](https://github.com/haomo-ai/MotionSeg3D), [FIDNet](https://github.com/placeforyiming/IROS21-FIDNet-SemanticKITTI) and [CENet](https://github.com/huixiancheng/CENet). Thanks the contributors of these repos! 138 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | TRAIN_PATH = "./" 4 | DEPLOY_PATH = "../../../deploy" 5 | sys.path.insert(0, TRAIN_PATH) 6 | -------------------------------------------------------------------------------- /assert/Network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fengluodb/RangeSeg/f61f703ff2fb3a8bc0a190cfd87daf18b057365a/assert/Network.png -------------------------------------------------------------------------------- /common/laserscanvis.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This file is covered by the LICENSE file in the root of this project. 3 | 4 | import vispy 5 | from vispy.scene import visuals, SceneCanvas 6 | import numpy as np 7 | from matplotlib import pyplot as plt 8 | 9 | 10 | class LaserScanVis: 11 | """Class that creates and handles a visualizer for a pointcloud""" 12 | 13 | def __init__(self, scan, scan_names, label_names, offset=0, 14 | semantics=True, instances=False): 15 | self.scan = scan 16 | self.scan_names = scan_names 17 | self.label_names = label_names 18 | self.offset = offset 19 | self.semantics = semantics 20 | self.instances = instances 21 | # sanity check 22 | if not self.semantics and self.instances: 23 | print("Instances are only allowed in when semantics=True") 24 | raise ValueError 25 | 26 | self.reset() 27 | self.update_scan() 28 | 29 | def reset(self): 30 | """ Reset. """ 31 | # last key press (it should have a mutex, but visualization is not 32 | # safety critical, so let's do things wrong) 33 | self.action = "no" # no, next, back, quit are the possibilities 34 | 35 | # new canvas prepared for visualizing data 36 | self.canvas = SceneCanvas(keys='interactive', show=True) 37 | # interface (n next, b back, q quit, very simple) 38 | self.canvas.events.key_press.connect(self.key_press) 39 | self.canvas.events.draw.connect(self.draw) 40 | # grid 41 | self.grid = self.canvas.central_widget.add_grid() 42 | 43 | # laserscan part 44 | self.scan_view = vispy.scene.widgets.ViewBox( 45 | border_color='white', parent=self.canvas.scene) 46 | self.grid.add_widget(self.scan_view, 0, 0) 47 | self.scan_vis = visuals.Markers() 48 | self.scan_view.camera = 'turntable' 49 | self.scan_view.add(self.scan_vis) 50 | visuals.XYZAxis(parent=self.scan_view.scene) 51 | 52 | # add semantics 53 | if self.semantics: 54 | print("Using semantics in visualizer") 55 | self.sem_view = vispy.scene.widgets.ViewBox( 56 | border_color='white', parent=self.canvas.scene) 57 | self.grid.add_widget(self.sem_view, 0, 1) 58 | self.sem_vis = visuals.Markers() 59 | self.sem_view.camera = 'turntable' 60 | self.sem_view.add(self.sem_vis) 61 | visuals.XYZAxis(parent=self.sem_view.scene) 62 | # self.sem_view.camera.link(self.scan_view.camera) 63 | 64 | if self.instances: 65 | print("Using instances in visualizer") 66 | self.inst_view = vispy.scene.widgets.ViewBox( 67 | border_color='white', parent=self.canvas.scene) 68 | self.grid.add_widget(self.inst_view, 0, 2) 69 | self.inst_vis = visuals.Markers() 70 | self.inst_view.camera = 'turntable' 71 | self.inst_view.add(self.inst_vis) 72 | visuals.XYZAxis(parent=self.inst_view.scene) 73 | # self.inst_view.camera.link(self.scan_view.camera) 74 | 75 | # img canvas size 76 | self.multiplier = 1 77 | self.canvas_W = 1024 78 | self.canvas_H = 64 79 | if self.semantics: 80 | self.multiplier += 1 81 | if self.instances: 82 | self.multiplier += 1 83 | 84 | # new canvas for img 85 | self.img_canvas = SceneCanvas(keys='interactive', show=True, 86 | size=(self.canvas_W, self.canvas_H * self.multiplier)) 87 | # grid 88 | self.img_grid = self.img_canvas.central_widget.add_grid() 89 | # interface (n next, b back, q quit, very simple) 90 | self.img_canvas.events.key_press.connect(self.key_press) 91 | self.img_canvas.events.draw.connect(self.draw) 92 | 93 | # add a view for the depth 94 | self.img_view = vispy.scene.widgets.ViewBox( 95 | border_color='white', parent=self.img_canvas.scene) 96 | self.img_grid.add_widget(self.img_view, 0, 0) 97 | self.img_vis = visuals.Image(cmap='viridis') 98 | self.img_view.add(self.img_vis) 99 | 100 | # add semantics 101 | if self.semantics: 102 | self.sem_img_view = vispy.scene.widgets.ViewBox( 103 | border_color='white', parent=self.img_canvas.scene) 104 | self.img_grid.add_widget(self.sem_img_view, 1, 0) 105 | self.sem_img_vis = visuals.Image(cmap='viridis') 106 | self.sem_img_view.add(self.sem_img_vis) 107 | 108 | # add instances 109 | if self.instances: 110 | self.inst_img_view = vispy.scene.widgets.ViewBox( 111 | border_color='white', parent=self.img_canvas.scene) 112 | self.img_grid.add_widget(self.inst_img_view, 2, 0) 113 | self.inst_img_vis = visuals.Image(cmap='viridis') 114 | self.inst_img_view.add(self.inst_img_vis) 115 | 116 | def get_mpl_colormap(self, cmap_name): 117 | cmap = plt.get_cmap(cmap_name) 118 | 119 | # Initialize the matplotlib color map 120 | sm = plt.cm.ScalarMappable(cmap=cmap) 121 | 122 | # Obtain linear color range 123 | color_range = sm.to_rgba(np.linspace(0, 1, 256), bytes=True)[:, 2::-1] 124 | 125 | return color_range.reshape(256, 3).astype(np.float32) / 255.0 126 | 127 | def update_scan(self): 128 | # first open data 129 | self.scan.open_scan(self.scan_names[self.offset], None, None, False) 130 | if self.semantics: 131 | self.scan.open_label(self.label_names[self.offset]) 132 | self.scan.colorize() 133 | 134 | # then change names 135 | title = "scan " + str(self.offset) + " of " + \ 136 | str(len(self.scan_names)-1) 137 | self.canvas.title = title 138 | self.img_canvas.title = title 139 | 140 | # then do all the point cloud stuff 141 | 142 | # plot scan 143 | power = 16 144 | # print() 145 | range_data = np.copy(self.scan.unproj_range) 146 | # print(range_data.max(), range_data.min()) 147 | range_data = range_data**(1 / power) 148 | # print(range_data.max(), range_data.min()) 149 | viridis_range = ((range_data - range_data.min()) / 150 | (range_data.max() - range_data.min()) * 255).astype(np.uint8) 151 | viridis_map = self.get_mpl_colormap("viridis") 152 | viridis_colors = viridis_map[viridis_range] 153 | self.scan_vis.set_data(self.scan.points, 154 | face_color=viridis_colors[..., ::-1], 155 | edge_color=viridis_colors[..., ::-1], 156 | size=1) 157 | 158 | # plot semantics 159 | if self.semantics: 160 | self.sem_vis.set_data(self.scan.points, 161 | face_color=self.scan.sem_label_color[..., ::-1], 162 | edge_color=self.scan.sem_label_color[..., ::-1], 163 | size=1) 164 | 165 | # plot instances 166 | if self.instances: 167 | self.inst_vis.set_data(self.scan.points, 168 | face_color=self.scan.inst_label_color[..., ::-1], 169 | edge_color=self.scan.inst_label_color[..., ::-1], 170 | size=1) 171 | 172 | # now do all the range image stuff 173 | # plot range image 174 | data = np.copy(self.scan.proj_range) 175 | # print(data[data > 0].max(), data[data > 0].min()) 176 | data[data > 0] = data[data > 0]**(1 / power) 177 | data[data < 0] = data[data > 0].min() 178 | # print(data.max(), data.min()) 179 | data = (data - data[data > 0].min()) / \ 180 | (data.max() - data[data > 0].min()) 181 | # print(data.max(), data.min()) 182 | self.img_vis.set_data(data) 183 | self.img_vis.update() 184 | 185 | if self.semantics: 186 | self.sem_img_vis.set_data(self.scan.proj_sem_color[..., ::-1]) 187 | self.sem_img_vis.update() 188 | 189 | if self.instances: 190 | self.inst_img_vis.set_data(self.scan.proj_inst_color[..., ::-1]) 191 | self.inst_img_vis.update() 192 | 193 | # interface 194 | def key_press(self, event): 195 | self.canvas.events.key_press.block() 196 | self.img_canvas.events.key_press.block() 197 | if event.key == 'N': 198 | self.offset += 1 199 | if self.offset >= len(self.scan_names): 200 | self.offset = 0 201 | self.update_scan() 202 | elif event.key == 'B': 203 | self.offset -= 1 204 | if self.offset <= 0: 205 | self.offset = len(self.scan_names)-1 206 | self.update_scan() 207 | elif event.key == 'Q' or event.key == 'Escape': 208 | self.destroy() 209 | 210 | def draw(self, event): 211 | if self.canvas.events.key_press.blocked(): 212 | self.canvas.events.key_press.unblock() 213 | if self.img_canvas.events.key_press.blocked(): 214 | self.img_canvas.events.key_press.unblock() 215 | 216 | def destroy(self): 217 | # destroy the visualization 218 | self.canvas.close() 219 | self.img_canvas.close() 220 | vispy.app.quit() 221 | 222 | def run(self): 223 | vispy.app.run() 224 | -------------------------------------------------------------------------------- /common/posslaserscanvis.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This file is covered by the LICENSE file in the root of this project. 3 | 4 | import vispy 5 | from vispy.scene import visuals, SceneCanvas 6 | import numpy as np 7 | from matplotlib import pyplot as plt 8 | 9 | 10 | class LaserScanVis: 11 | """Class that creates and handles a visualizer for a pointcloud""" 12 | 13 | def __init__(self, scan, scan_names, tag_names, label_names, offset=0, 14 | semantics=True, instances=False): 15 | self.scan = scan 16 | self.scan_names = scan_names 17 | self.tag_names = tag_names 18 | self.label_names = label_names 19 | self.offset = offset 20 | self.semantics = semantics 21 | 22 | self.reset() 23 | self.update_scan() 24 | 25 | def reset(self): 26 | """ Reset. """ 27 | # last key press (it should have a mutex, but visualization is not 28 | # safety critical, so let's do things wrong) 29 | self.action = "no" # no, next, back, quit are the possibilities 30 | 31 | # new canvas prepared for visualizing data 32 | self.canvas = SceneCanvas(keys='interactive', show=True) 33 | # interface (n next, b back, q quit, very simple) 34 | self.canvas.events.key_press.connect(self.key_press) 35 | self.canvas.events.draw.connect(self.draw) 36 | # grid 37 | self.grid = self.canvas.central_widget.add_grid() 38 | 39 | # laserscan part 40 | self.scan_view = vispy.scene.widgets.ViewBox( 41 | border_color='white', parent=self.canvas.scene) 42 | self.grid.add_widget(self.scan_view, 0, 0) 43 | self.scan_vis = visuals.Markers() 44 | self.scan_view.camera = 'turntable' 45 | self.scan_view.add(self.scan_vis) 46 | visuals.XYZAxis(parent=self.scan_view.scene) 47 | # add semantics 48 | if self.semantics: 49 | print("Using semantics in visualizer") 50 | self.sem_view = vispy.scene.widgets.ViewBox( 51 | border_color='white', parent=self.canvas.scene) 52 | self.grid.add_widget(self.sem_view, 0, 1) 53 | self.sem_vis = visuals.Markers() 54 | self.sem_view.camera = 'turntable' 55 | self.sem_view.add(self.sem_vis) 56 | visuals.XYZAxis(parent=self.sem_view.scene) 57 | # self.sem_view.camera.link(self.scan_view.camera) 58 | 59 | # img canvas size 60 | self.multiplier = 1 61 | self.canvas_W = 1800 62 | self.canvas_H = 40 63 | if self.semantics: 64 | self.multiplier += 1 65 | 66 | # new canvas for img 67 | self.img_canvas = SceneCanvas(keys='interactive', show=True, 68 | size=(self.canvas_W, self.canvas_H * self.multiplier)) 69 | # grid 70 | self.img_grid = self.img_canvas.central_widget.add_grid() 71 | # interface (n next, b back, q quit, very simple) 72 | self.img_canvas.events.key_press.connect(self.key_press) 73 | self.img_canvas.events.draw.connect(self.draw) 74 | 75 | # add a view for the depth 76 | self.img_view = vispy.scene.widgets.ViewBox( 77 | border_color='white', parent=self.img_canvas.scene) 78 | self.img_grid.add_widget(self.img_view, 0, 0) 79 | self.img_vis = visuals.Image(cmap='viridis') 80 | self.img_view.add(self.img_vis) 81 | 82 | # add semantics 83 | if self.semantics: 84 | self.sem_img_view = vispy.scene.widgets.ViewBox( 85 | border_color='white', parent=self.img_canvas.scene) 86 | self.img_grid.add_widget(self.sem_img_view, 1, 0) 87 | self.sem_img_vis = visuals.Image(cmap='viridis') 88 | self.sem_img_view.add(self.sem_img_vis) 89 | 90 | def get_mpl_colormap(self, cmap_name): 91 | cmap = plt.get_cmap(cmap_name) 92 | 93 | # Initialize the matplotlib color map 94 | sm = plt.cm.ScalarMappable(cmap=cmap) 95 | 96 | # Obtain linear color range 97 | color_range = sm.to_rgba(np.linspace(0, 1, 256), bytes=True)[:, 2::-1] 98 | 99 | return color_range.reshape(256, 3).astype(np.float32) / 255.0 100 | 101 | def update_scan(self): 102 | # first open data 103 | self.scan.open_scan( 104 | self.scan_names[self.offset], self.tag_names[self.offset]) 105 | if self.semantics: 106 | self.scan.open_label( 107 | self.label_names[self.offset], self.tag_names[self.offset]) 108 | self.scan.colorize() 109 | 110 | # then change names 111 | title = "scan " + str(self.offset) + " of " + \ 112 | str(len(self.scan_names)-1) 113 | self.canvas.title = title 114 | self.img_canvas.title = title 115 | 116 | # then do all the point cloud stuff 117 | 118 | # plot scan 119 | power = 1 120 | # print() 121 | range_data = np.copy(self.scan.unproj_range) 122 | 123 | # print(range_data.max(), range_data.min()) 124 | # range_data = range_data**(1 / power) 125 | # print(range_data.max(), range_data.min()) 126 | viridis_range = ((range_data - range_data.min()) / 127 | (range_data.max() - range_data.min()) * 128 | 255).astype(np.uint8) 129 | viridis_map = self.get_mpl_colormap("viridis") 130 | viridis_colors = viridis_map[viridis_range] 131 | self.scan_vis.set_data(self.scan.points, 132 | face_color=viridis_colors[..., ::-1], 133 | edge_color=viridis_colors[..., ::-1], 134 | size=1) 135 | 136 | # plot semantics 137 | if self.semantics: 138 | self.sem_vis.set_data(self.scan.points, 139 | face_color=self.scan.sem_label_color[..., ::-1], 140 | edge_color=self.scan.sem_label_color[..., ::-1], 141 | size=1) 142 | 143 | # now do all the range image stuff 144 | # plot range image 145 | data = np.copy(self.scan.proj_range) 146 | # print(data[data > 0].max(), data[data > 0].min()) 147 | data[data > 0] = data[data > 0]**(1 / power) 148 | data[data < 0] = data[data > 0].min() 149 | # print(data.max(), data.min()) 150 | data = (data - data[data > 0].min()) / \ 151 | (data.max() - data[data > 0].min()) 152 | # print(data.max(), data.min()) 153 | self.img_vis.set_data(data) 154 | self.img_vis.update() 155 | 156 | if self.semantics: 157 | self.sem_img_vis.set_data(self.scan.proj_sem_color[..., ::-1]) 158 | self.sem_img_vis.update() 159 | 160 | # interface 161 | def key_press(self, event): 162 | self.canvas.events.key_press.block() 163 | self.img_canvas.events.key_press.block() 164 | if event.key == 'N': 165 | self.offset += 1 166 | if self.offset >= len(self.scan_names): 167 | self.offset = 0 168 | self.update_scan() 169 | elif event.key == 'B': 170 | self.offset -= 1 171 | if self.offset <= 0: 172 | self.offset = len(self.scan_names)-1 173 | self.update_scan() 174 | elif event.key == 'Q' or event.key == 'Escape': 175 | self.destroy() 176 | 177 | def draw(self, event): 178 | if self.canvas.events.key_press.blocked(): 179 | self.canvas.events.key_press.unblock() 180 | if self.img_canvas.events.key_press.blocked(): 181 | self.img_canvas.events.key_press.unblock() 182 | 183 | def destroy(self): 184 | # destroy the visualization 185 | self.canvas.close() 186 | self.img_canvas.close() 187 | vispy.app.quit() 188 | 189 | def run(self): 190 | vispy.app.run() 191 | -------------------------------------------------------------------------------- /common/sync_batchnorm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fengluodb/RangeSeg/f61f703ff2fb3a8bc0a190cfd87daf18b057365a/common/sync_batchnorm/__init__.py -------------------------------------------------------------------------------- /common/sync_batchnorm/comm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : comm.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | import collections 12 | import queue 13 | import threading 14 | 15 | __all__ = ['FutureResult', 'SlavePipe', 'SyncMaster'] 16 | 17 | 18 | class FutureResult(object): 19 | """A thread-safe future implementation. Used only as one-to-one pipe.""" 20 | 21 | def __init__(self): 22 | self._result = None 23 | self._lock = threading.Lock() 24 | self._cond = threading.Condition(self._lock) 25 | 26 | def put(self, result): 27 | with self._lock: 28 | assert self._result is None, 'Previous result has\'t been fetched.' 29 | self._result = result 30 | self._cond.notify() 31 | 32 | def get(self): 33 | with self._lock: 34 | if self._result is None: 35 | self._cond.wait() 36 | 37 | res = self._result 38 | self._result = None 39 | return res 40 | 41 | 42 | _MasterRegistry = collections.namedtuple('MasterRegistry', ['result']) 43 | _SlavePipeBase = collections.namedtuple( 44 | '_SlavePipeBase', ['identifier', 'queue', 'result']) 45 | 46 | 47 | class SlavePipe(_SlavePipeBase): 48 | """Pipe for master-slave communication.""" 49 | 50 | def run_slave(self, msg): 51 | self.queue.put((self.identifier, msg)) 52 | ret = self.result.get() 53 | self.queue.put(True) 54 | return ret 55 | 56 | 57 | class SyncMaster(object): 58 | """An abstract `SyncMaster` object. 59 | 60 | - During the replication, as the data parallel will trigger an callback of each module, all slave devices should 61 | call `register(id)` and obtain an `SlavePipe` to communicate with the master. 62 | - During the forward pass, master device invokes `run_master`, all messages from slave devices will be collected, 63 | and passed to a registered callback. 64 | - After receiving the messages, the master device should gather the information and determine to message passed 65 | back to each slave devices. 66 | """ 67 | 68 | def __init__(self, master_callback): 69 | """ 70 | 71 | Args: 72 | master_callback: a callback to be invoked after having collected messages from slave devices. 73 | """ 74 | self._master_callback = master_callback 75 | self._queue = queue.Queue() 76 | self._registry = collections.OrderedDict() 77 | self._activated = False 78 | 79 | def __getstate__(self): 80 | return {'master_callback': self._master_callback} 81 | 82 | def __setstate__(self, state): 83 | self.__init__(state['master_callback']) 84 | 85 | def register_slave(self, identifier): 86 | """ 87 | Register an slave device. 88 | 89 | Args: 90 | identifier: an identifier, usually is the device id. 91 | 92 | Returns: a `SlavePipe` object which can be used to communicate with the master device. 93 | 94 | """ 95 | if self._activated: 96 | assert self._queue.empty(), 'Queue is not clean before next initialization.' 97 | self._activated = False 98 | self._registry.clear() 99 | future = FutureResult() 100 | self._registry[identifier] = _MasterRegistry(future) 101 | return SlavePipe(identifier, self._queue, future) 102 | 103 | def run_master(self, master_msg): 104 | """ 105 | Main entry for the master device in each forward pass. 106 | The messages were first collected from each devices (including the master device), and then 107 | an callback will be invoked to compute the message to be sent back to each devices 108 | (including the master device). 109 | 110 | Args: 111 | master_msg: the message that the master want to send to itself. This will be placed as the first 112 | message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example. 113 | 114 | Returns: the message to be sent back to the master device. 115 | 116 | """ 117 | self._activated = True 118 | 119 | intermediates = [(0, master_msg)] 120 | for i in range(self.nr_slaves): 121 | intermediates.append(self._queue.get()) 122 | 123 | results = self._master_callback(intermediates) 124 | assert results[0][0] == 0, 'The first result should belongs to the master.' 125 | 126 | for i, res in results: 127 | if i == 0: 128 | continue 129 | self._registry[i].result.put(res) 130 | 131 | for i in range(self.nr_slaves): 132 | assert self._queue.get() is True 133 | 134 | return results[0][1] 135 | 136 | @property 137 | def nr_slaves(self): 138 | return len(self._registry) 139 | -------------------------------------------------------------------------------- /common/sync_batchnorm/replicate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : replicate.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | import functools 12 | 13 | from torch.nn.parallel.data_parallel import DataParallel 14 | 15 | __all__ = [ 16 | 'CallbackContext', 17 | 'execute_replication_callbacks', 18 | 'DataParallelWithCallback', 19 | 'patch_replication_callback' 20 | ] 21 | 22 | 23 | class CallbackContext(object): 24 | pass 25 | 26 | 27 | def execute_replication_callbacks(modules): 28 | """ 29 | Execute an replication callback `__data_parallel_replicate__` on each module created by original replication. 30 | 31 | The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` 32 | 33 | Note that, as all modules are isomorphism, we assign each sub-module with a context 34 | (shared among multiple copies of this module on different devices). 35 | Through this context, different copies can share some information. 36 | 37 | We guarantee that the callback on the master copy (the first copy) will be called ahead of calling the callback 38 | of any slave copies. 39 | """ 40 | master_copy = modules[0] 41 | nr_modules = len(list(master_copy.modules())) 42 | ctxs = [CallbackContext() for _ in range(nr_modules)] 43 | 44 | for i, module in enumerate(modules): 45 | for j, m in enumerate(module.modules()): 46 | if hasattr(m, '__data_parallel_replicate__'): 47 | m.__data_parallel_replicate__(ctxs[j], i) 48 | 49 | 50 | class DataParallelWithCallback(DataParallel): 51 | """ 52 | Data Parallel with a replication callback. 53 | 54 | An replication callback `__data_parallel_replicate__` of each module will be invoked after being created by 55 | original `replicate` function. 56 | The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` 57 | 58 | Examples: 59 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) 60 | > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) 61 | # sync_bn.__data_parallel_replicate__ will be invoked. 62 | """ 63 | 64 | def replicate(self, module, device_ids): 65 | modules = super(DataParallelWithCallback, 66 | self).replicate(module, device_ids) 67 | execute_replication_callbacks(modules) 68 | return modules 69 | 70 | 71 | def patch_replication_callback(data_parallel): 72 | """ 73 | Monkey-patch an existing `DataParallel` object. Add the replication callback. 74 | Useful when you have customized `DataParallel` implementation. 75 | 76 | Examples: 77 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) 78 | > sync_bn = DataParallel(sync_bn, device_ids=[0, 1]) 79 | > patch_replication_callback(sync_bn) 80 | # this is equivalent to 81 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) 82 | > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) 83 | """ 84 | 85 | assert isinstance(data_parallel, DataParallel) 86 | 87 | old_replicate = data_parallel.replicate 88 | 89 | @functools.wraps(old_replicate) 90 | def new_replicate(module, device_ids): 91 | modules = old_replicate(module, device_ids) 92 | execute_replication_callbacks(modules) 93 | return modules 94 | 95 | data_parallel.replicate = new_replicate 96 | -------------------------------------------------------------------------------- /config/arch/CENet.yaml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # training parameters 3 | ################################################################################ 4 | train: 5 | pipeline: "CENet" # model name 6 | loss: "xentropy" # must be either xentropy or iou 7 | max_epochs: 50 8 | batch_size: 6 # batch size 9 | report_batch: 10 # every x batches, report loss 10 | report_epoch: 1 # every x epochs, report validation set 11 | epsilon_w: 0.001 # class weight w = 1 / (content + epsilon_w) 12 | save_summary: False # Summary of weight histograms for tensorboard 13 | save_scans: False # False doesn't save anything, True saves some sample images 14 | # (one per batch of the last calculated batch) in log folder 15 | show_scans: False # show scans during training 16 | workers: 12 # number of threads to get data 17 | 18 | syncbn: True # sync batchnorm 19 | act: Hardswish # act layer, LeakyReLU, SiLU, Hardswish, GELU 20 | 21 | optimizer: "adam" # sgd or adam 22 | 23 | scheduler: "consine" # "consine" or "warmup" 24 | consine: 25 | min_lr: 0.00001 26 | max_lr: 0.00200 27 | first_cycle: 50 28 | cycle: 2 29 | wup_epochs: 1 30 | gamma: 1.0 31 | warmup: 32 | lr: 0.01 # learning rate 33 | wup_epochs: 1 # warmup during first XX epochs (can be float) 34 | lr_decay: 0.99 # learning rate decay per epoch after initial cycle (from min lr) 35 | momentum: 0.9 # sgd momentum 36 | 37 | aux_loss: 38 | use: True 39 | lamda: [1.0, 1.0, 1.0] 40 | 41 | # for mos 42 | residual: False # This needs to be the same as in the dataset params below! 43 | n_input_scans: 2 # This needs to be the same as in the dataset params below! 44 | 45 | ################################################################################ 46 | # postproc parameters 47 | ################################################################################ 48 | post: 49 | CRF: 50 | use: False 51 | train: True 52 | params: False # this should be a dict when in use 53 | KNN: 54 | use: False # This parameter default is false 55 | params: 56 | knn: 7 57 | search: 7 58 | sigma: 1.0 59 | cutoff: 2.0 60 | 61 | ################################################################################ 62 | # classification head parameters 63 | ################################################################################ 64 | # dataset (to find parser) 65 | dataset: 66 | labels: "kitti" 67 | scans: "kitti" 68 | max_points: 150000 # max of any scan in dataset 69 | sensor: 70 | name: "HDL64" 71 | type: "spherical" # projective 72 | fov_up: 3 73 | fov_down: -25 74 | img_prop: 75 | width: 2048 76 | height: 64 77 | img_means: #range,x,y,z,signal 78 | - 11.71279 79 | - -0.1023471 80 | - 0.4952 81 | - -1.0545 82 | - 0.2877 83 | img_stds: #range,x,y,z,signal 84 | - 10.24 85 | - 12.295865 86 | - 9.4287 87 | - 0.8643 88 | - 0.1450 89 | 90 | # img_means: #range,x,y,z,signal 91 | # - 12.12 92 | # - 10.88 93 | # - 0.23 94 | # - -1.04 95 | # - 0.21 96 | # img_stds: #range,x,y,z,signal 97 | # - 12.32 98 | # - 11.47 99 | # - 6.91 100 | # - 0.86 101 | # - 0.16 102 | 103 | # for mos 104 | n_input_scans: 2 # This needs to be the same as in the backbone params above! 105 | residual: False # This needs to be the same as in the backbone params above! 106 | transform: False # tranform the last n_input_scans - 1 frames before concatenation 107 | use_normal: False # if use normal vector as channels of range image 108 | -------------------------------------------------------------------------------- /config/arch/CENet_nusc.yaml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # training parameters 3 | ################################################################################ 4 | train: 5 | pipeline: "CENet" # model name 6 | loss: "xentropy" # must be either xentropy or iou 7 | max_epochs: 50 8 | batch_size: 48 # batch size 9 | report_batch: 10 # every x batches, report loss 10 | report_epoch: 1 # every x epochs, report validation set 11 | epsilon_w: 0.001 # class weight w = 1 / (content + epsilon_w) 12 | save_summary: False # Summary of weight histograms for tensorboard 13 | save_scans: False # False doesn't save anything, True saves some sample images 14 | # (one per batch of the last calculated batch) in log folder 15 | show_scans: False # show scans during training 16 | workers: 12 # number of threads to get data 17 | 18 | syncbn: True # sync batchnorm 19 | act: Hardswish # act layer, LeakyReLU, SiLU, Hardswish, GELU 20 | 21 | optimizer: "adam" # sgd or adam 22 | 23 | scheduler: "consine" # "consine" or "warmup" 24 | consine: 25 | min_lr: 0.00001 26 | max_lr: 0.00200 27 | first_cycle: 50 28 | cycle: 2 29 | wup_epochs: 1 30 | gamma: 1.0 31 | warmup: 32 | lr: 0.01 # learning rate 33 | wup_epochs: 1 # warmup during first XX epochs (can be float) 34 | lr_decay: 0.99 # learning rate decay per epoch after initial cycle (from min lr) 35 | momentum: 0.9 # sgd momentum 36 | 37 | aux_loss: 38 | use: True 39 | lamda: [1.0, 1.0, 1.0] 40 | 41 | # for mos 42 | residual: False # This needs to be the same as in the dataset params below! 43 | n_input_scans: 2 # This needs to be the same as in the dataset params below! 44 | 45 | ################################################################################ 46 | # postproc parameters 47 | ################################################################################ 48 | post: 49 | CRF: 50 | use: False 51 | train: True 52 | params: False # this should be a dict when in use 53 | KNN: 54 | use: False # This parameter default is false 55 | params: 56 | knn: 7 57 | search: 7 58 | sigma: 1.0 59 | cutoff: 2.0 60 | 61 | ################################################################################ 62 | # classification head parameters 63 | ################################################################################ 64 | # dataset (to find parser) 65 | dataset: 66 | labels: "kitti" 67 | scans: "kitti" 68 | max_points: 35000 # max of any scan in dataset 69 | sensor: 70 | name: "HDL32" 71 | type: "spherical" # projective 72 | fov_up: 10 73 | fov_down: -30 74 | img_prop: 75 | width: 1024 76 | height: 32 77 | img_means: #range,x,y,z,signal 78 | - 1.7835 79 | - 0.0325 80 | - 0.1707 81 | - 0.0283 82 | - 2.7328 83 | img_stds: #range,x,y,z,signal 84 | - 9.5803 85 | - 6.7944 86 | - 8.1486 87 | - 1.1270 88 | - 17.4203 89 | # img_means: #range,x,y,z,signal 90 | # - 12.12 91 | # - 10.88 92 | # - 0.23 93 | # - -1.04 94 | # - 0.21 95 | # img_stds: #range,x,y,z,signal 96 | # - 12.32 97 | # - 11.47 98 | # - 6.91 99 | # - 0.86 100 | # - 0.16 101 | 102 | # for mos 103 | n_input_scans: 2 # This needs to be the same as in the backbone params above! 104 | residual: False # This needs to be the same as in the backbone params above! 105 | transform: False # tranform the last n_input_scans - 1 frames before concatenation 106 | use_normal: False # if use normal vector as channels of range image 107 | -------------------------------------------------------------------------------- /config/arch/CENet_poss.yaml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # training parameters 3 | ################################################################################ 4 | train: 5 | pipeline: "CENet" # model name 6 | loss: "xentropy" # must be either xentropy or iou 7 | max_epochs: 60 8 | batch_size: 6 # batch size 9 | report_batch: 10 # every x batches, report loss 10 | report_epoch: 1 # every x epochs, report validation set 11 | epsilon_w: 0.001 # class weight w = 1 / (content + epsilon_w) 12 | save_summary: False # Summary of weight histograms for tensorboard 13 | save_scans: False # False doesn't save anything, True saves some sample images 14 | # (one per batch of the last calculated batch) in log folder 15 | show_scans: False # show scans during training 16 | workers: 12 # number of threads to get data 17 | 18 | syncbn: True # sync batchnorm 19 | act: Hardswish # act layer, LeakyReLU, SiLU, Hardswish, GELU 20 | 21 | optimizer: "adam" # sgd or adam 22 | 23 | scheduler: "consine" # "consine" or "warmup" 24 | consine: 25 | min_lr: 0.00001 26 | max_lr: 0.00200 27 | first_cycle: 30 28 | cycle: 1 29 | wup_epochs: 1 30 | gamma: 1.0 31 | warmup: 32 | lr: 0.01 # learning rate 33 | wup_epochs: 1 # warmup during first XX epochs (can be float) 34 | lr_decay: 0.99 # learning rate decay per epoch after initial cycle (from min lr) 35 | momentum: 0.9 # sgd momentum 36 | 37 | aux_loss: 38 | use: True 39 | lamda: [1.0, 1.0, 1.0] 40 | 41 | # for mos 42 | residual: False # This needs to be the same as in the dataset params below! 43 | n_input_scans: 2 # This needs to be the same as in the dataset params below! 44 | 45 | ################################################################################ 46 | # postproc parameters 47 | ################################################################################ 48 | post: 49 | CRF: 50 | use: False 51 | train: True 52 | params: False # this should be a dict when in use 53 | KNN: 54 | use: False # This parameter default is false 55 | params: 56 | knn: 7 57 | search: 7 58 | sigma: 1.0 59 | cutoff: 2.0 60 | 61 | ################################################################################ 62 | # classification head parameters 63 | ################################################################################ 64 | # dataset (to find parser) 65 | dataset: 66 | labels: "poss" 67 | scans: "poss" 68 | max_points: 72000 # max of any scan in dataset 69 | sensor: 70 | name: "Pandora" 71 | type: "spherical" # projective 72 | fov_up: 7 73 | fov_down: -16 74 | img_prop: 75 | width: 1800 76 | height: 40 77 | img_means: #range,x,y,z,signal (40, 1800) 78 | - 22.26779 79 | - 0.51144063 80 | - 1.5727469 81 | - -0.6350901 82 | - 13.875261 83 | img_stds: #range,x,y,z,signal 84 | - 17.735949 85 | - 17.422485 86 | - 22.217215 87 | - 1.6433295 88 | - 14.0925865 89 | 90 | # for mos 91 | n_input_scans: 2 # This needs to be the same as in the backbone params above! 92 | residual: False # This needs to be the same as in the backbone params above! 93 | transform: False # tranform the last n_input_scans - 1 frames before concatenation 94 | use_normal: False # if use normal vector as channels of range image -------------------------------------------------------------------------------- /config/arch/Fid.yaml: -------------------------------------------------------------------------------- 1 | # training parameters 2 | ################################################################################ 3 | train: 4 | pipeline: "Fid" # model name 5 | loss: "xentropy" # must be either xentropy or iou 6 | max_epochs: 50 7 | batch_size: 6 # batch size 8 | report_batch: 10 # every x batches, report loss 9 | report_epoch: 1 # every x epochs, report validation set 10 | epsilon_w: 0.001 # class weight w = 1 / (content + epsilon_w) 11 | save_summary: False # Summary of weight histograms for tensorboard 12 | save_scans: False # False doesn't save anything, True saves some sample images 13 | # (one per batch of the last calculated batch) in log folder 14 | show_scans: False # show scans during training 15 | workers: 12 # number of threads to get data 16 | 17 | syncbn: True # sync batchnorm 18 | act: LeakyReLU # act layer 19 | 20 | optimizer: "adam" # sgd or adam 21 | sgd: 22 | momentum: 0.9 # sgd momentum 23 | w_decay: 0.0001 # weight decay 24 | 25 | scheduler: "consine" # "consine" or "warmup" 26 | consine: 27 | min_lr: 0.00001 28 | max_lr: 0.00200 29 | first_cycle: 50 30 | cycle: 2 31 | wup_epochs: 1 32 | gamma: 1.0 33 | warmup: 34 | lr: 0.01 # learning rate 35 | wup_epochs: 1 # warmup during first XX epochs (can be float) 36 | lr_decay: 0.99 # learning rate decay per epoch after initial cycle (from min lr) 37 | momentum: 0.9 # sgd momentum 38 | 39 | aux_loss: 40 | use: False 41 | lamda: [0.5, 1.0, 1.0] 42 | 43 | # for mos 44 | residual: False # This needs to be the same as in the dataset params below! 45 | n_input_scans: 8 # This needs to be the same as in the dataset params below! 46 | 47 | ################################################################################ 48 | # postproc parameters 49 | ################################################################################ 50 | post: 51 | CRF: 52 | use: False 53 | train: True 54 | params: False # this should be a dict when in use 55 | KNN: 56 | use: True # This parameter default is false 57 | params: 58 | knn: 7 59 | search: 7 60 | sigma: 1.0 61 | cutoff: 2.0 62 | 63 | ################################################################################ 64 | # classification head parameters 65 | ################################################################################ 66 | # dataset (to find parser) 67 | dataset: 68 | labels: "kitti" 69 | scans: "kitti" 70 | max_points: 150000 # max of any scan in dataset 71 | sensor: 72 | name: "HDL64" 73 | type: "spherical" # projective 74 | fov_up: 3 75 | fov_down: -25 76 | img_prop: 77 | width: 512 78 | height: 64 79 | img_means: #range,x,y,z,signal 80 | - 11.71279 81 | - -0.1023471 82 | - 0.4952 83 | - -1.0545 84 | - 0.2877 85 | img_stds: #range,x,y,z,signal 86 | - 10.24 87 | - 12.295865 88 | - 9.4287 89 | - 0.8643 90 | - 0.1450 91 | 92 | # img_means: #range,x,y,z,signal 93 | # - 12.12 94 | # - 10.88 95 | # - 0.23 96 | # - -1.04 97 | # - 0.21 98 | # img_stds: #range,x,y,z,signal 99 | # - 12.32 100 | # - 11.47 101 | # - 6.91 102 | # - 0.86 103 | # - 0.16 104 | 105 | # for mos 106 | n_input_scans: 8 # This needs to be the same as in the backbone params above! 107 | residual: False # This needs to be the same as in the backbone params above! 108 | transform: False # tranform the last n_input_scans - 1 frames before concatenation 109 | use_normal: False # if use normal vector as channels of range image 110 | -------------------------------------------------------------------------------- /config/arch/Fid_nusc.yaml: -------------------------------------------------------------------------------- 1 | # training parameters 2 | ################################################################################ 3 | train: 4 | pipeline: "Fid" # model name 5 | loss: "xentropy" # must be either xentropy or iou 6 | max_epochs: 50 7 | batch_size: 6 # batch size 8 | report_batch: 10 # every x batches, report loss 9 | report_epoch: 1 # every x epochs, report validation set 10 | epsilon_w: 0.001 # class weight w = 1 / (content + epsilon_w) 11 | save_summary: False # Summary of weight histograms for tensorboard 12 | save_scans: False # False doesn't save anything, True saves some sample images 13 | # (one per batch of the last calculated batch) in log folder 14 | show_scans: False # show scans during training 15 | workers: 12 # number of threads to get data 16 | 17 | syncbn: True # sync batchnorm 18 | act: LeakyReLU # act layer 19 | 20 | optimizer: "adam" # sgd or adam 21 | sgd: 22 | momentum: 0.9 # sgd momentum 23 | w_decay: 0.0001 # weight decay 24 | 25 | scheduler: "consine" # "consine" or "warmup" 26 | consine: 27 | min_lr: 0.00001 28 | max_lr: 0.00200 29 | first_cycle: 50 30 | cycle: 2 31 | wup_epochs: 1 32 | gamma: 1.0 33 | warmup: 34 | lr: 0.01 # learning rate 35 | wup_epochs: 1 # warmup during first XX epochs (can be float) 36 | lr_decay: 0.99 # learning rate decay per epoch after initial cycle (from min lr) 37 | momentum: 0.9 # sgd momentum 38 | 39 | aux_loss: 40 | use: False 41 | lamda: [0.5, 1.0, 1.0] 42 | 43 | # for mos 44 | residual: False # This needs to be the same as in the dataset params below! 45 | n_input_scans: 8 # This needs to be the same as in the dataset params below! 46 | 47 | ################################################################################ 48 | # postproc parameters 49 | ################################################################################ 50 | post: 51 | CRF: 52 | use: False 53 | train: True 54 | params: False # this should be a dict when in use 55 | KNN: 56 | use: True # This parameter default is false 57 | params: 58 | knn: 7 59 | search: 7 60 | sigma: 1.0 61 | cutoff: 2.0 62 | 63 | ################################################################################ 64 | # classification head parameters 65 | ################################################################################ 66 | # dataset (to find parser) 67 | dataset: 68 | labels: "kitti" 69 | scans: "kitti" 70 | max_points: 35000 # max of any scan in dataset 71 | sensor: 72 | name: "HDL32" 73 | type: "spherical" # projective 74 | fov_up: 10 75 | fov_down: -30 76 | img_prop: 77 | width: 1024 78 | height: 32 79 | img_means: #range,x,y,z,signal 80 | - 1.7835 81 | - 0.0325 82 | - 0.1707 83 | - 0.0283 84 | - 2.7328 85 | img_stds: #range,x,y,z,signal 86 | - 9.5803 87 | - 6.7944 88 | - 8.1486 89 | - 1.1270 90 | - 17.4203 91 | # img_means: #range,x,y,z,signal 92 | # - 12.12 93 | # - 10.88 94 | # - 0.23 95 | # - -1.04 96 | # - 0.21 97 | # img_stds: #range,x,y,z,signal 98 | # - 12.32 99 | # - 11.47 100 | # - 6.91 101 | # - 0.86 102 | # - 0.16 103 | 104 | # for mos 105 | n_input_scans: 8 # This needs to be the same as in the backbone params above! 106 | residual: False # This needs to be the same as in the backbone params above! 107 | transform: False # tranform the last n_input_scans - 1 frames before concatenation 108 | use_normal: False # if use normal vector as channels of range image 109 | -------------------------------------------------------------------------------- /config/arch/Fid_poss.yaml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # training parameters 3 | ################################################################################ 4 | train: 5 | pipeline: "Fid" # model name 6 | loss: "xentropy" # must be either xentropy or iou 7 | max_epochs: 50 8 | batch_size: 6 # batch size 9 | report_batch: 10 # every x batches, report loss 10 | report_epoch: 1 # every x epochs, report validation set 11 | epsilon_w: 0.001 # class weight w = 1 / (content + epsilon_w) 12 | save_summary: False # Summary of weight histograms for tensorboard 13 | save_scans: False # False doesn't save anything, True saves some sample images 14 | # (one per batch of the last calculated batch) in log folder 15 | show_scans: False # show scans during training 16 | workers: 12 # number of threads to get data 17 | 18 | syncbn: True # sync batchnorm 19 | act: LeakyReLU # act layer 20 | 21 | optimizer: "adam" # sgd or adam 22 | 23 | scheduler: "consine" # "consine" or "warmup" 24 | consine: 25 | min_lr: 0.00001 26 | max_lr: 0.00200 27 | first_cycle: 50 28 | cycle: 1 29 | wup_epochs: 1 30 | gamma: 1.0 31 | warmup: 32 | lr: 0.01 # learning rate 33 | wup_epochs: 1 # warmup during first XX epochs (can be float) 34 | lr_decay: 0.99 # learning rate decay per epoch after initial cycle (from min lr) 35 | momentum: 0.9 # sgd momentum 36 | 37 | aux_loss: 38 | use: False 39 | lamda: [1.0, 1.0, 1.0] 40 | 41 | # for mos 42 | residual: False # This needs to be the same as in the dataset params below! 43 | n_input_scans: 2 # This needs to be the same as in the dataset params below! 44 | 45 | ################################################################################ 46 | # postproc parameters 47 | ################################################################################ 48 | post: 49 | CRF: 50 | use: False 51 | train: True 52 | params: False # this should be a dict when in use 53 | KNN: 54 | use: True # This parameter default is false 55 | params: 56 | knn: 7 57 | search: 7 58 | sigma: 1.0 59 | cutoff: 2.0 60 | 61 | ################################################################################ 62 | # classification head parameters 63 | ################################################################################ 64 | # dataset (to find parser) 65 | dataset: 66 | labels: "poss" 67 | scans: "poss" 68 | max_points: 72000 # max of any scan in dataset 69 | sensor: 70 | name: "Pandora" 71 | type: "spherical" # projective 72 | fov_up: 7 73 | fov_down: -16 74 | img_prop: 75 | width: 1800 76 | height: 40 77 | img_means: #range,x,y,z,signal (40, 1800) 78 | - 22.26779 79 | - 0.51144063 80 | - 1.5727469 81 | - -0.6350901 82 | - 13.875261 83 | img_stds: #range,x,y,z,signal 84 | - 17.735949 85 | - 17.422485 86 | - 22.217215 87 | - 1.6433295 88 | - 14.0925865 89 | 90 | # for mos 91 | n_input_scans: 2 # This needs to be the same as in the backbone params above! 92 | residual: False # This needs to be the same as in the backbone params above! 93 | transform: False # tranform the last n_input_scans - 1 frames before concatenation 94 | use_normal: False # if use normal vector as channels of range image -------------------------------------------------------------------------------- /config/arch/LENet.yaml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # training parameters 3 | ################################################################################ 4 | train: 5 | pipeline: "LENet" # model name 6 | loss: "xentropy" # must be either xentropy or iou 7 | max_epochs: 50 8 | batch_size: 6 # batch size 9 | report_batch: 10 # every x batches, report loss 10 | report_epoch: 1 # every x epochs, report validation set 11 | epsilon_w: 0.001 # class weight w = 1 / (content + epsilon_w) 12 | save_summary: False # Summary of weight histograms for tensorboard 13 | save_scans: False # False doesn't save anything, True saves some sample images 14 | # (one per batch of the last calculated batch) in log folder 15 | show_scans: False # show scans during training 16 | workers: 12 # number of threads to get data 17 | 18 | syncbn: True # sync batchnorm 19 | act: SiLU # act layer, LeakyReLU, SiLU, Hardswish, GELU 20 | 21 | optimizer: "adam" # sgd or adam 22 | 23 | scheduler: "consine" # "consine" or "warmup" 24 | consine: 25 | min_lr: 0.00001 26 | max_lr: 0.00200 27 | first_cycle: 50 28 | cycle: 2 29 | wup_epochs: 1 30 | gamma: 1.0 31 | warmup: 32 | lr: 0.01 # learning rate 33 | wup_epochs: 1 # warmup during first XX epochs (can be float) 34 | lr_decay: 0.99 # learning rate decay per epoch after initial cycle (from min lr) 35 | momentum: 0.9 # sgd momentum 36 | 37 | aux_loss: 38 | use: True 39 | lamda: [0.5, 1.0, 1.0] 40 | 41 | # for mos 42 | residual: False # This needs to be the same as in the dataset params below! 43 | n_input_scans: 2 # This needs to be the same as in the dataset params below! 44 | 45 | ################################################################################ 46 | # postproc parameters 47 | ################################################################################ 48 | post: 49 | CRF: 50 | use: False 51 | train: True 52 | params: False # this should be a dict when in use 53 | KNN: 54 | use: True # This parameter default is false 55 | params: 56 | knn: 7 57 | search: 7 58 | sigma: 1.0 59 | cutoff: 2.0 60 | 61 | ################################################################################ 62 | # classification head parameters 63 | ################################################################################ 64 | # dataset (to find parser) 65 | dataset: 66 | labels: "kitti" 67 | scans: "kitti" 68 | max_points: 150000 # max of any scan in dataset 69 | sensor: 70 | name: "HDL64" 71 | type: "spherical" # projective 72 | fov_up: 3 73 | fov_down: -25 74 | img_prop: 75 | width: 2048 76 | height: 64 77 | img_means: #range,x,y,z,signal 78 | - 11.71279 79 | - -0.1023471 80 | - 0.4952 81 | - -1.0545 82 | - 0.2877 83 | img_stds: #range,x,y,z,signal 84 | - 10.24 85 | - 12.295865 86 | - 9.4287 87 | - 0.8643 88 | - 0.1450 89 | 90 | # img_means: #range,x,y,z,signal 91 | # - 12.12 92 | # - 10.88 93 | # - 0.23 94 | # - -1.04 95 | # - 0.21 96 | # img_stds: #range,x,y,z,signal 97 | # - 12.32 98 | # - 11.47 99 | # - 6.91 100 | # - 0.86 101 | # - 0.16 102 | 103 | # for mos 104 | n_input_scans: 2 # This needs to be the same as in the backbone params above! 105 | residual: False # This needs to be the same as in the backbone params above! 106 | transform: False # tranform the last n_input_scans - 1 frames before concatenation 107 | use_normal: False # if use normal vector as channels of range image 108 | -------------------------------------------------------------------------------- /config/arch/LENet_nusc.yaml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # training parameters 3 | ################################################################################ 4 | train: 5 | pipeline: "LENet" # model name 6 | loss: "xentropy" # must be either xentropy or iou 7 | max_epochs: 50 8 | batch_size: 32 # batch size 9 | report_batch: 10 # every x batches, report loss 10 | report_epoch: 1 # every x epochs, report validation set 11 | epsilon_w: 0.001 # class weight w = 1 / (content + epsilon_w) 12 | save_summary: False # Summary of weight histograms for tensorboard 13 | save_scans: False # False doesn't save anything, True saves some sample images 14 | # (one per batch of the last calculated batch) in log folder 15 | show_scans: False # show scans during training 16 | workers: 12 # number of threads to get data 17 | 18 | syncbn: True # sync batchnorm 19 | act: SiLU # act layer, LeakyReLU, SiLU, Hardswish, GELU 20 | 21 | optimizer: "adam" # sgd or adam 22 | 23 | scheduler: "consine" # "consine" or "warmup" 24 | consine: 25 | min_lr: 0.00001 26 | max_lr: 0.00200 27 | first_cycle: 50 28 | cycle: 2 29 | wup_epochs: 1 30 | gamma: 1.0 31 | warmup: 32 | lr: 0.01 # learning rate 33 | wup_epochs: 1 # warmup during first XX epochs (can be float) 34 | lr_decay: 0.99 # learning rate decay per epoch after initial cycle (from min lr) 35 | momentum: 0.9 # sgd momentum 36 | 37 | aux_loss: 38 | use: True 39 | lamda: [1.0, 1.0, 1.0] 40 | 41 | # for mos 42 | residual: False # This needs to be the same as in the dataset params below! 43 | n_input_scans: 2 # This needs to be the same as in the dataset params below! 44 | 45 | ################################################################################ 46 | # postproc parameters 47 | ################################################################################ 48 | post: 49 | CRF: 50 | use: False 51 | train: True 52 | params: False # this should be a dict when in use 53 | KNN: 54 | use: True # This parameter default is false 55 | params: 56 | knn: 7 57 | search: 7 58 | sigma: 1.0 59 | cutoff: 2.0 60 | 61 | ################################################################################ 62 | # classification head parameters 63 | ################################################################################ 64 | # dataset (to find parser) 65 | dataset: 66 | labels: "kitti" 67 | scans: "kitti" 68 | max_points: 35000 # max of any scan in dataset 69 | sensor: 70 | name: "HDL32" 71 | type: "spherical" # projective 72 | fov_up: 10 73 | fov_down: -30 74 | img_prop: 75 | width: 1024 76 | height: 32 77 | img_means: #range,x,y,z,signal 78 | - 1.7835 79 | - 0.0325 80 | - 0.1707 81 | - 0.0283 82 | - 2.7328 83 | img_stds: #range,x,y,z,signal 84 | - 9.5803 85 | - 6.7944 86 | - 8.1486 87 | - 1.1270 88 | - 17.4203 89 | # img_means: #range,x,y,z,signal 90 | # - 12.12 91 | # - 10.88 92 | # - 0.23 93 | # - -1.04 94 | # - 0.21 95 | # img_stds: #range,x,y,z,signal 96 | # - 12.32 97 | # - 11.47 98 | # - 6.91 99 | # - 0.86 100 | # - 0.16 101 | 102 | # for mos 103 | n_input_scans: 2 # This needs to be the same as in the backbone params above! 104 | residual: False # This needs to be the same as in the backbone params above! 105 | transform: False # tranform the last n_input_scans - 1 frames before concatenation 106 | use_normal: False # if use normal vector as channels of range image 107 | -------------------------------------------------------------------------------- /config/arch/LENet_poss.yaml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # training parameters 3 | ################################################################################ 4 | train: 5 | pipeline: "LENet" # model name 6 | loss: "xentropy" # must be either xentropy or iou 7 | max_epochs: 60 8 | batch_size: 6 # batch size 9 | report_batch: 10 # every x batches, report loss 10 | report_epoch: 1 # every x epochs, report validation set 11 | epsilon_w: 0.001 # class weight w = 1 / (content + epsilon_w) 12 | save_summary: False # Summary of weight histograms for tensorboard 13 | save_scans: False # False doesn't save anything, True saves some sample images 14 | # (one per batch of the last calculated batch) in log folder 15 | show_scans: False # show scans during training 16 | workers: 12 # number of threads to get data 17 | 18 | syncbn: True # sync batchnorm 19 | act: SiLU # act layer, LeakyReLU, SiLU, Hardswish, GELU 20 | 21 | optimizer: "adam" # sgd or adam 22 | 23 | scheduler: "consine" # "consine" or "warmup" 24 | consine: 25 | min_lr: 0.00001 26 | max_lr: 0.00200 27 | first_cycle: 30 28 | cycle: 1 29 | wup_epochs: 1 30 | gamma: 1.0 31 | warmup: 32 | lr: 0.01 # learning rate 33 | wup_epochs: 1 # warmup during first XX epochs (can be float) 34 | lr_decay: 0.99 # learning rate decay per epoch after initial cycle (from min lr) 35 | momentum: 0.9 # sgd momentum 36 | 37 | aux_loss: 38 | use: True 39 | lamda: [0.5, 1.0, 1.0] 40 | 41 | # for mos 42 | residual: False # This needs to be the same as in the dataset params below! 43 | n_input_scans: 2 # This needs to be the same as in the dataset params below! 44 | 45 | ################################################################################ 46 | # postproc parameters 47 | ################################################################################ 48 | post: 49 | CRF: 50 | use: False 51 | train: True 52 | params: False # this should be a dict when in use 53 | KNN: 54 | use: True # This parameter default is false 55 | params: 56 | knn: 7 57 | search: 7 58 | sigma: 1.0 59 | cutoff: 2.0 60 | 61 | ################################################################################ 62 | # classification head parameters 63 | ################################################################################ 64 | # dataset (to find parser) 65 | dataset: 66 | labels: "poss" 67 | scans: "poss" 68 | max_points: 72000 # max of any scan in dataset 69 | sensor: 70 | name: "Pandora" 71 | type: "spherical" # projective 72 | fov_up: 7 73 | fov_down: -16 74 | img_prop: 75 | width: 1800 76 | height: 40 77 | img_means: #range,x,y,z,signal (40, 1800) 78 | - 22.26779 79 | - 0.51144063 80 | - 1.5727469 81 | - -0.6350901 82 | - 13.875261 83 | img_stds: #range,x,y,z,signal 84 | - 17.735949 85 | - 17.422485 86 | - 22.217215 87 | - 1.6433295 88 | - 14.0925865 89 | 90 | # for mos 91 | n_input_scans: 2 # This needs to be the same as in the backbone params above! 92 | residual: False # This needs to be the same as in the backbone params above! 93 | transform: False # tranform the last n_input_scans - 1 frames before concatenation 94 | use_normal: False # if use normal vector as channels of range image -------------------------------------------------------------------------------- /config/data_preparing.yaml: -------------------------------------------------------------------------------- 1 | # This file is covered by the LICENSE file in the root of this project. 2 | # Developed by: Xieyuanli Chen 3 | # Configuration for preparing residual images (specifying all the paths) 4 | # -------------------------------------------------------------------- 5 | 6 | # General parameters 7 | # number of frames for training, -1 uses all frames 8 | num_frames: -1 9 | # plot images 10 | debug: False 11 | # normalize/scale the difference with corresponding range value 12 | normalize: True 13 | # use the last n frame to calculate the difference image 14 | num_last_n: 8 15 | 16 | # Inputs 17 | # the folder of raw LiDAR scans 18 | scan_folder: 'data/sequences/08/velodyne' 19 | # ground truth poses file 20 | pose_file: 'data/sequences/08/poses.txt' 21 | # calibration file 22 | calib_file: 'data/sequences/08/calib.txt' 23 | 24 | # Outputs 25 | # the suffix should be the same as num_last_n! 26 | residual_image_folder: 'data/sequences/08/residual_images_8' 27 | visualize: True 28 | visualization_folder: 'data/sequences/08/visualization_8' 29 | 30 | # range image parameters 31 | range_image: 32 | height: 64 33 | width: 2048 34 | fov_up: 3.0 35 | fov_down: -25.0 36 | max_range: 50.0 37 | min_range: 2.0 38 | 39 | -------------------------------------------------------------------------------- /config/labels/semantic-kitti-all.yaml: -------------------------------------------------------------------------------- 1 | # This file is covered by the LICENSE file in the root of this project. 2 | name: "kitti" 3 | labels: 4 | 0: "unlabeled" 5 | 1: "outlier" 6 | 10: "car" 7 | 11: "bicycle" 8 | 13: "bus" 9 | 15: "motorcycle" 10 | 16: "on-rails" 11 | 18: "truck" 12 | 20: "other-vehicle" 13 | 30: "person" 14 | 31: "bicyclist" 15 | 32: "motorcyclist" 16 | 40: "road" 17 | 44: "parking" 18 | 48: "sidewalk" 19 | 49: "other-ground" 20 | 50: "building" 21 | 51: "fence" 22 | 52: "other-structure" 23 | 60: "lane-marking" 24 | 70: "vegetation" 25 | 71: "trunk" 26 | 72: "terrain" 27 | 80: "pole" 28 | 81: "traffic-sign" 29 | 99: "other-object" 30 | 252: "moving-car" 31 | 253: "moving-bicyclist" 32 | 254: "moving-person" 33 | 255: "moving-motorcyclist" 34 | 256: "moving-on-rails" 35 | 257: "moving-bus" 36 | 258: "moving-truck" 37 | 259: "moving-other-vehicle" 38 | color_map: # bgr 39 | 0: [0, 0, 0] 40 | 1: [0, 0, 255] 41 | 10: [245, 150, 100] 42 | 11: [245, 230, 100] 43 | 13: [250, 80, 100] 44 | 15: [150, 60, 30] 45 | 16: [255, 0, 0] 46 | 18: [180, 30, 80] 47 | 20: [255, 0, 0] 48 | 30: [30, 30, 255] 49 | 31: [200, 40, 255] 50 | 32: [90, 30, 150] 51 | 40: [255, 0, 255] 52 | 44: [255, 150, 255] 53 | 48: [75, 0, 75] 54 | 49: [75, 0, 175] 55 | 50: [0, 200, 255] 56 | 51: [50, 120, 255] 57 | 52: [0, 150, 255] 58 | 60: [170, 255, 150] 59 | 70: [0, 175, 0] 60 | 71: [0, 60, 135] 61 | 72: [80, 240, 150] 62 | 80: [150, 240, 255] 63 | 81: [0, 0, 255] 64 | 99: [255, 255, 50] 65 | 252: [245, 150, 100] 66 | 256: [255, 0, 0] 67 | 253: [200, 40, 255] 68 | 254: [30, 30, 255] 69 | 255: [90, 30, 150] 70 | 257: [250, 80, 100] 71 | 258: [180, 30, 80] 72 | 259: [255, 0, 0] 73 | content: # as a ratio with the total number of points 74 | 0: 0.018889854628292943 75 | 1: 0.0002937197336781505 76 | 10: 0.040818519255974316 77 | 11: 0.00016609538710764618 78 | 13: 2.7879693665067774e-05 79 | 15: 0.00039838616015114444 80 | 16: 0.0 81 | 18: 0.0020633612104619787 82 | 20: 0.0016218197275284021 83 | 30: 0.00017698551338515307 84 | 31: 1.1065903904919655e-08 85 | 32: 5.532951952459828e-09 86 | 40: 0.1987493871255525 87 | 44: 0.014717169549888214 88 | 48: 0.14392298360372 89 | 49: 0.0039048553037472045 90 | 50: 0.1326861944777486 91 | 51: 0.0723592229456223 92 | 52: 0.002395131480328884 93 | 60: 4.7084144280367186e-05 94 | 70: 0.26681502148037506 95 | 71: 0.006035012012626033 96 | 72: 0.07814222006271769 97 | 80: 0.002855498193863172 98 | 81: 0.0006155958086189918 99 | 99: 0.009923127583046915 100 | 252: 0.001789309418528068 101 | 253: 0.00012709999297008662 102 | 254: 0.00016059776092534436 103 | 255: 3.745553104802113e-05 104 | 256: 0.0 105 | 257: 0.00011351574470342043 106 | 258: 0.00010157861367183268 107 | 259: 4.3840131989471124e-05 108 | # classes that are indistinguishable from single scan or inconsistent in 109 | # ground truth are mapped to their closest equivalent 110 | learning_map: 111 | 0: 0 # "unlabeled" 112 | 1: 0 # "outlier" mapped to "unlabeled" --------------------------mapped 113 | 10: 1 # "car" 114 | 11: 2 # "bicycle" 115 | 13: 5 # "bus" mapped to "other-vehicle" --------------------------mapped 116 | 15: 3 # "motorcycle" 117 | 16: 5 # "on-rails" mapped to "other-vehicle" ---------------------mapped 118 | 18: 4 # "truck" 119 | 20: 5 # "other-vehicle" 120 | 30: 6 # "person" 121 | 31: 7 # "bicyclist" 122 | 32: 8 # "motorcyclist" 123 | 40: 9 # "road" 124 | 44: 10 # "parking" 125 | 48: 11 # "sidewalk" 126 | 49: 12 # "other-ground" 127 | 50: 13 # "building" 128 | 51: 14 # "fence" 129 | 52: 0 # "other-structure" mapped to "unlabeled" ------------------mapped 130 | 60: 9 # "lane-marking" to "road" ---------------------------------mapped 131 | 70: 15 # "vegetation" 132 | 71: 16 # "trunk" 133 | 72: 17 # "terrain" 134 | 80: 18 # "pole" 135 | 81: 19 # "traffic-sign" 136 | 99: 0 # "other-object" to "unlabeled" ----------------------------mapped 137 | 252: 20 # "moving-car" 138 | 253: 21 # "moving-bicyclist" 139 | 254: 22 # "moving-person" 140 | 255: 23 # "moving-motorcyclist" 141 | 256: 24 # "moving-on-rails" mapped to "moving-other-vehicle" ------mapped 142 | 257: 24 # "moving-bus" mapped to "moving-other-vehicle" -----------mapped 143 | 258: 25 # "moving-truck" 144 | 259: 24 # "moving-other-vehicle" 145 | learning_map_inv: # inverse of previous map 146 | 0: 0 # "unlabeled", and others ignored 147 | 1: 10 # "car" 148 | 2: 11 # "bicycle" 149 | 3: 15 # "motorcycle" 150 | 4: 18 # "truck" 151 | 5: 20 # "other-vehicle" 152 | 6: 30 # "person" 153 | 7: 31 # "bicyclist" 154 | 8: 32 # "motorcyclist" 155 | 9: 40 # "road" 156 | 10: 44 # "parking" 157 | 11: 48 # "sidewalk" 158 | 12: 49 # "other-ground" 159 | 13: 50 # "building" 160 | 14: 51 # "fence" 161 | 15: 70 # "vegetation" 162 | 16: 71 # "trunk" 163 | 17: 72 # "terrain" 164 | 18: 80 # "pole" 165 | 19: 81 # "traffic-sign" 166 | 20: 252 # "moving-car" 167 | 21: 253 # "moving-bicyclist" 168 | 22: 254 # "moving-person" 169 | 23: 255 # "moving-motorcyclist" 170 | 24: 259 # "moving-other-vehicle" 171 | 25: 258 # "moving-truck" 172 | learning_ignore: # Ignore classes 173 | 0: True # "unlabeled", and others ignored 174 | 1: False # "car" 175 | 2: False # "bicycle" 176 | 3: False # "motorcycle" 177 | 4: False # "truck" 178 | 5: False # "other-vehicle" 179 | 6: False # "person" 180 | 7: False # "bicyclist" 181 | 8: False # "motorcyclist" 182 | 9: False # "road" 183 | 10: False # "parking" 184 | 11: False # "sidewalk" 185 | 12: False # "other-ground" 186 | 13: False # "building" 187 | 14: False # "fence" 188 | 15: False # "vegetation" 189 | 16: False # "trunk" 190 | 17: False # "terrain" 191 | 18: False # "pole" 192 | 19: False # "traffic-sign" 193 | 20: False # "moving-car" 194 | 21: False # "moving-bicyclist" 195 | 22: False # "moving-person" 196 | 23: False # "moving-motorcyclist" 197 | 24: False # "moving-other-vehicle" 198 | 25: False # "moving-truck" 199 | split: # sequence numbers 200 | train: 201 | - 0 202 | - 1 203 | - 2 204 | - 3 205 | - 4 206 | - 5 207 | - 6 208 | - 7 209 | - 9 210 | - 10 211 | valid: 212 | - 8 213 | test: 214 | - 11 215 | - 12 216 | - 13 217 | - 14 218 | - 15 219 | - 16 220 | - 17 221 | - 18 222 | - 19 223 | - 20 224 | - 21 225 | -------------------------------------------------------------------------------- /config/labels/semantic-kitti.yaml: -------------------------------------------------------------------------------- 1 | # This file is covered by the LICENSE file in the root of this project. 2 | name: "kitti" 3 | labels: 4 | 0: "unlabeled" 5 | 1: "outlier" 6 | 10: "car" 7 | 11: "bicycle" 8 | 13: "bus" 9 | 15: "motorcycle" 10 | 16: "on-rails" 11 | 18: "truck" 12 | 20: "other-vehicle" 13 | 30: "person" 14 | 31: "bicyclist" 15 | 32: "motorcyclist" 16 | 40: "road" 17 | 44: "parking" 18 | 48: "sidewalk" 19 | 49: "other-ground" 20 | 50: "building" 21 | 51: "fence" 22 | 52: "other-structure" 23 | 60: "lane-marking" 24 | 70: "vegetation" 25 | 71: "trunk" 26 | 72: "terrain" 27 | 80: "pole" 28 | 81: "traffic-sign" 29 | 99: "other-object" 30 | 252: "moving-car" 31 | 253: "moving-bicyclist" 32 | 254: "moving-person" 33 | 255: "moving-motorcyclist" 34 | 256: "moving-on-rails" 35 | 257: "moving-bus" 36 | 258: "moving-truck" 37 | 259: "moving-other-vehicle" 38 | color_map: # bgr 39 | 0: [0, 0, 0] 40 | 1: [0, 0, 255] 41 | 10: [245, 150, 100] 42 | 11: [245, 230, 100] 43 | 13: [250, 80, 100] 44 | 15: [150, 60, 30] 45 | 16: [255, 0, 0] 46 | 18: [180, 30, 80] 47 | 20: [255, 0, 0] 48 | 30: [30, 30, 255] 49 | 31: [200, 40, 255] 50 | 32: [90, 30, 150] 51 | 40: [255, 0, 255] 52 | 44: [255, 150, 255] 53 | 48: [75, 0, 75] 54 | 49: [75, 0, 175] 55 | 50: [0, 200, 255] 56 | 51: [50, 120, 255] 57 | 52: [0, 150, 255] 58 | 60: [170, 255, 150] 59 | 70: [0, 175, 0] 60 | 71: [0, 60, 135] 61 | 72: [80, 240, 150] 62 | 80: [150, 240, 255] 63 | 81: [0, 0, 255] 64 | 99: [255, 255, 50] 65 | 252: [245, 150, 100] 66 | 256: [255, 0, 0] 67 | 253: [200, 40, 255] 68 | 254: [30, 30, 255] 69 | 255: [90, 30, 150] 70 | 257: [250, 80, 100] 71 | 258: [180, 30, 80] 72 | 259: [255, 0, 0] 73 | content: # as a ratio with the total number of points 74 | 0: 0.018889854628292943 75 | 1: 0.0002937197336781505 76 | 10: 0.040818519255974316 77 | 11: 0.00016609538710764618 78 | 13: 2.7879693665067774e-05 79 | 15: 0.00039838616015114444 80 | 16: 0.0 81 | 18: 0.0020633612104619787 82 | 20: 0.0016218197275284021 83 | 30: 0.00017698551338515307 84 | 31: 1.1065903904919655e-08 85 | 32: 5.532951952459828e-09 86 | 40: 0.1987493871255525 87 | 44: 0.014717169549888214 88 | 48: 0.14392298360372 89 | 49: 0.0039048553037472045 90 | 50: 0.1326861944777486 91 | 51: 0.0723592229456223 92 | 52: 0.002395131480328884 93 | 60: 4.7084144280367186e-05 94 | 70: 0.26681502148037506 95 | 71: 0.006035012012626033 96 | 72: 0.07814222006271769 97 | 80: 0.002855498193863172 98 | 81: 0.0006155958086189918 99 | 99: 0.009923127583046915 100 | 252: 0.001789309418528068 101 | 253: 0.00012709999297008662 102 | 254: 0.00016059776092534436 103 | 255: 3.745553104802113e-05 104 | 256: 0.0 105 | 257: 0.00011351574470342043 106 | 258: 0.00010157861367183268 107 | 259: 4.3840131989471124e-05 108 | # classes that are indistinguishable from single scan or inconsistent in 109 | # ground truth are mapped to their closest equivalent 110 | learning_map: 111 | 0: 0 # "unlabeled" 112 | 1: 0 # "outlier" mapped to "unlabeled" --------------------------mapped 113 | 10: 1 # "car" 114 | 11: 2 # "bicycle" 115 | 13: 5 # "bus" mapped to "other-vehicle" --------------------------mapped 116 | 15: 3 # "motorcycle" 117 | 16: 5 # "on-rails" mapped to "other-vehicle" ---------------------mapped 118 | 18: 4 # "truck" 119 | 20: 5 # "other-vehicle" 120 | 30: 6 # "person" 121 | 31: 7 # "bicyclist" 122 | 32: 8 # "motorcyclist" 123 | 40: 9 # "road" 124 | 44: 10 # "parking" 125 | 48: 11 # "sidewalk" 126 | 49: 12 # "other-ground" 127 | 50: 13 # "building" 128 | 51: 14 # "fence" 129 | 52: 0 # "other-structure" mapped to "unlabeled" ------------------mapped 130 | 60: 9 # "lane-marking" to "road" ---------------------------------mapped 131 | 70: 15 # "vegetation" 132 | 71: 16 # "trunk" 133 | 72: 17 # "terrain" 134 | 80: 18 # "pole" 135 | 81: 19 # "traffic-sign" 136 | 99: 0 # "other-object" to "unlabeled" ----------------------------mapped 137 | 252: 1 # "moving-car" to "car" ------------------------------------mapped 138 | 253: 7 # "moving-bicyclist" to "bicyclist" ------------------------mapped 139 | 254: 6 # "moving-person" to "person" ------------------------------mapped 140 | 255: 8 # "moving-motorcyclist" to "motorcyclist" ------------------mapped 141 | 256: 5 # "moving-on-rails" mapped to "other-vehicle" --------------mapped 142 | 257: 5 # "moving-bus" mapped to "other-vehicle" -------------------mapped 143 | 258: 4 # "moving-truck" to "truck" --------------------------------mapped 144 | 259: 5 # "moving-other"-vehicle to "other-vehicle" ----------------mapped 145 | learning_map_inv: # inverse of previous map 146 | 0: 0 # "unlabeled", and others ignored 147 | 1: 10 # "car" 148 | 2: 11 # "bicycle" 149 | 3: 15 # "motorcycle" 150 | 4: 18 # "truck" 151 | 5: 20 # "other-vehicle" 152 | 6: 30 # "person" 153 | 7: 31 # "bicyclist" 154 | 8: 32 # "motorcyclist" 155 | 9: 40 # "road" 156 | 10: 44 # "parking" 157 | 11: 48 # "sidewalk" 158 | 12: 49 # "other-ground" 159 | 13: 50 # "building" 160 | 14: 51 # "fence" 161 | 15: 70 # "vegetation" 162 | 16: 71 # "trunk" 163 | 17: 72 # "terrain" 164 | 18: 80 # "pole" 165 | 19: 81 # "traffic-sign" 166 | learning_ignore: # Ignore classes 167 | 0: True # "unlabeled", and others ignored 168 | 1: False # "car" 169 | 2: False # "bicycle" 170 | 3: False # "motorcycle" 171 | 4: False # "truck" 172 | 5: False # "other-vehicle" 173 | 6: False # "person" 174 | 7: False # "bicyclist" 175 | 8: False # "motorcyclist" 176 | 9: False # "road" 177 | 10: False # "parking" 178 | 11: False # "sidewalk" 179 | 12: False # "other-ground" 180 | 13: False # "building" 181 | 14: False # "fence" 182 | 15: False # "vegetation" 183 | 16: False # "trunk" 184 | 17: False # "terrain" 185 | 18: False # "pole" 186 | 19: False # "traffic-sign" 187 | split: # sequence numbers 188 | train: 189 | - 0 190 | - 1 191 | - 2 192 | - 3 193 | - 4 194 | - 5 195 | - 6 196 | - 7 197 | - 9 198 | - 10 199 | valid: 200 | - 8 201 | test: 202 | - 11 203 | - 12 204 | - 13 205 | - 14 206 | - 15 207 | - 16 208 | - 17 209 | - 18 210 | - 19 211 | - 20 212 | - 21 213 | -------------------------------------------------------------------------------- /config/labels/semantic-nuscenes.yaml: -------------------------------------------------------------------------------- 1 | name: nusc 2 | orig_labels: 3 | 0: 'noise' 4 | 1: 'animal' 5 | 2: 'human.pedestrian.adult' 6 | 3: 'human.pedestrian.child' 7 | 4: 'human.pedestrian.construction_worker' 8 | 5: 'human.pedestrian.personal_mobility' 9 | 6: 'human.pedestrian.police_officer' 10 | 7: 'human.pedestrian.stroller' 11 | 8: 'human.pedestrian.wheelchair' 12 | 9: 'movable_object.barrier' 13 | 10: 'movable_object.debris' 14 | 11: 'movable_object.pushable_pullable' 15 | 12: 'movable_object.trafficcone' 16 | 13: 'static_object.bicycle_rack' 17 | 14: 'vehicle.bicycle' 18 | 15: 'vehicle.bus.bendy' 19 | 16: 'vehicle.bus.rigid' 20 | 17: 'vehicle.car' 21 | 18: 'vehicle.construction' 22 | 19: 'vehicle.emergency.ambulance' 23 | 20: 'vehicle.emergency.police' 24 | 21: 'vehicle.motorcycle' 25 | 22: 'vehicle.trailer' 26 | 23: 'vehicle.truck' 27 | 24: 'flat.driveable_surface' 28 | 25: 'flat.other' 29 | 26: 'flat.sidewalk' 30 | 27: 'flat.terrain' 31 | 28: 'static.manmade' 32 | 29: 'static.other' 33 | 30: 'static.vegetation' 34 | 31: 'vehicle.ego' 35 | labels: 36 | 0: 'noise' 37 | 1: 'animal' 38 | 2: 'pedestrian' 39 | 3: 'human.pedestrian.child' 40 | 4: 'human.pedestrian.construction_worker' 41 | 5: 'human.pedestrian.personal_mobility' 42 | 6: 'human.pedestrian.police_officer' 43 | 7: 'human.pedestrian.stroller' 44 | 8: 'human.pedestrian.wheelchair' 45 | 9: 'barrier' 46 | 10: 'movable_object.debris' 47 | 11: 'movable_object.pushable_pullable' 48 | 12: 'traffic_cone' 49 | 13: 'static_object.bicycle_rack' 50 | 14: 'bicycle' 51 | 15: 'vehicle.bus.bendy' 52 | 16: 'bus' 53 | 17: 'car' 54 | 18: 'construction_vehicle' 55 | 19: 'vehicle.emergency.ambulance' 56 | 20: 'vehicle.emergency.police' 57 | 21: 'motorcycle' 58 | 22: 'trailer' 59 | 23: 'truck' 60 | 24: 'driveable_surface' 61 | 25: 'other_flat' 62 | 26: 'sidewalk' 63 | 27: 'terrain' 64 | 28: 'manmade' 65 | 29: 'static.other' 66 | 30: 'vegetation' 67 | 31: 'vehicle.ego' 68 | labels_16: 69 | 0: 'noise' 70 | 1: 'barrier' 71 | 2: 'bicycle' 72 | 3: 'bus' 73 | 4: 'car' 74 | 5: 'construction_vehicle' 75 | 6: 'motorcycle' 76 | 7: 'pedestrian' 77 | 8: 'traffic_cone' 78 | 9: 'trailer' 79 | 10: 'truck' 80 | 11: 'driveable_surface' 81 | 12: 'other_flat' 82 | 13: 'sidewalk' 83 | 14: 'terrain' 84 | 15: 'manmade' 85 | 16: 'vegetation' 86 | color_map: # bgr 87 | 0 : [0, 0, 0] 88 | 1 : [0, 0, 255] 89 | 2: [245, 150, 100] 90 | 3: [245, 230, 100] 91 | 4: [250, 80, 100] 92 | 5: [150, 60, 30] 93 | 6: [255, 0, 0] 94 | 7: [180, 30, 80] 95 | 8: [255, 0, 0] 96 | 9: [30, 30, 255] 97 | 10: [200, 40, 255] 98 | 11: [90, 30, 150] 99 | 12: [255, 0, 255] 100 | 13: [255, 150, 255] 101 | 14: [75, 0, 75] 102 | 15: [75, 0, 175] 103 | 16: [0, 200, 255] 104 | 17: [50, 120, 255] 105 | 18: [0, 150, 255] 106 | 19: [170, 255, 150] 107 | 20: [0, 175, 0] 108 | 21: [0, 60, 135] 109 | 22: [80, 240, 150] 110 | 23: [150, 240, 255] 111 | 24: [0, 0, 255] 112 | 25: [255, 255, 50] 113 | 26: [245, 150, 100] 114 | 27: [255, 0, 0] 115 | 28: [200, 40, 255] 116 | 29: [30, 30, 255] 117 | 30: [90, 30, 150] 118 | 31: [250, 80, 100] 119 | learning_map: 120 | 1: 0 #noise 121 | 5: 0 #noise 122 | 7: 0 #noise 123 | 8: 0 #noise 124 | 10: 0 #noise 125 | 11: 0 #noise 126 | 13: 0 #noise 127 | 19: 0 #noise 128 | 20: 0 #noise 129 | 0: 0 #noise 130 | 29: 0 #noise 131 | 31: 0 #noise 132 | 9: 1 #barrier 133 | 14: 2 #bicycle 134 | 15: 3 #bus 135 | 16: 3 #bus 136 | 17: 4 #car 137 | 18: 5 #construction vehicle 138 | 21: 6 #motorcycle 139 | 2: 7 #pedestrian 140 | 3: 7 #pedestrian 141 | 4: 7 #pedestrian 142 | 6: 7 #pedestrian 143 | 12: 8 #trafic cone 144 | 22: 9 #trailer 145 | 23: 10 #truck 146 | 24: 11 #driveble surface 147 | 25: 12 #other_flat 148 | 26: 13 #sidewalk 149 | 27: 14 #terrain 150 | 28: 15 #manmade 151 | 30: 16 #vegetation 152 | learning_map_inv: 153 | 0: 0 154 | 1: 9 155 | 2: 14 156 | 3: 16 157 | 4: 17 158 | 5: 18 159 | 6: 21 160 | 7: 2 161 | 8: 12 162 | 9: 22 163 | 10: 23 164 | 11: 24 165 | 12: 25 166 | 13: 26 167 | 14: 27 168 | 15: 28 169 | 16: 30 170 | learning_ignore: 171 | 0: True 172 | 1: False 173 | 2: False 174 | 3: False 175 | 4: False 176 | 5: False 177 | 6: False 178 | 7: False 179 | 8: False 180 | 9: False 181 | 10: False 182 | 11: False 183 | 12: False 184 | 13: False 185 | 14: False 186 | 15: False 187 | 16: False 188 | color_map_learning : { 189 | 0: [0, 0, 0], # noise 190 | 1: [50, 120, 255], # barrier 191 | 2: [245, 230, 100], # bicycle 192 | 3: [255, 0, 0], # bus 193 | 4: [245, 150, 100], # car 194 | 5: [255, 0, 0], # construction_vehicle 195 | 6: [150, 60, 30], # motorcycle 196 | 7: [30, 30, 255], # pedestrian 197 | 8: [0, 0, 255], # traffic cone 198 | 9: [255, 0, 0], # trailer 199 | 10: [180, 30, 80], # truck 200 | 11: [255, 0, 255], # driveable_surface 201 | 12: [75, 0, 175], # other_flat 202 | 13: [75, 0, 75], # sidewalk 203 | 14: [80, 240, 150], # terrain 204 | 15: [0, 200, 255], # manmade 205 | 16: [0, 175, 0], # vegetation 206 | } 207 | split: # sequence numbers 208 | mini_train: [61, 553, 655, 757, 796, 1077, 1094, 1100] 209 | mini_valid: [103, 916] 210 | train: [1,2,4,5,6,7,8,9,10,11,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,138,139,149,150,151,152,154,155,157,158,159,160,161,162,163,164,165,166,167,168,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,187,188,190,191,192,193,194,195,196,199,200,202,203,204,206,207,208,209,210,211,212,213,214,218,219,220,222,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,315,316,317,318,321,323,324,328,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,405,406,407,408,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,461,462,463,464,465,467,468,469,471,472,474,475,476,477,478,479,480,499,500,501,502,504,505,506,507,508,509,510,511,512,513,514,515,517,518,525,526,527,528,529,530,531,532,533,534,535,536,537,538,539,541,542,543,544,545,546,566,568,570,571,572,573,574,575,576,577,578,580,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,639,640,641,642,643,644,645,646,647,648,649,650,651,652,653,654,655,656,657,658,659,660,661,662,663,664,665,666,667,668,669,670,671,672,673,674,675,676,677,678,679,681,683,684,685,686,687,688,689,695,696,697,698,700,701,703,704,705,706,707,708,709,710,711,72,713,714,715,716,717,718,719,726,727,728,730,731,733,734,735,736,737,738,739,740,741,744,746,747,749,750,751,752,757,758,759,760,761,762,763,764,765,767,768,769,786,787,789,790,791,792,803,804,805,806,808,809,810,811,812,813,815,816,817,819,820,821,822,847,848,849,850,851,852,853,854,855,856,858,860,861,862,863,864,865,866,868,869,870,871,872,873,875,876,877,878,880,882,883,884,885,886,887,888,889,890,891,892,893,894,895,896,897,898,899,900,901,902,903,945,947,949,952,953,955,956,957,958,959,960,961,975,976,977,978,979,980,981,982,983,984,988,989,990,991,992,994,995,996,997,998,999,1000,1001,1002,1003,1004,1005,1006,1007,1008,1009,1010,1011,1012,1013,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023,1024,1025,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1104,1105,1106,1107,1108,1109,1110] 211 | valid: [3,12,13,14,15,16,17,18,35,36,38,39,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,221,268,269,270,271,272,273,274,275,276,277,278,329,330,331,332,344,345,346,519,520,521,522,523,524,552,553,554,555,556,557,558,559,560,561,562,563,564,565,625,626,627,629,630,632,633,634,635,636,637,638,770,771,775,777,778,780,781,782,783,784,794,795,796,797,798,799,800,802,904,905,906,907,908,909,910,911,912,913,914,915,916,917,919,920,921,922,923,924,925,926,927,928,929,930,931,962,963,966,967,968,969,971,972,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073] 212 | test: [77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,111,112,113,114,115,116,117,118,119,140,142,143,144,145,146,147,148,265,266,279,280,281,282,307,308,309,310,311,312,313,314,333,334,335,336,337,338,339,340,341,342,343,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,547,548,549,550,551,601,602,603,604,606,607,608,609,610,611,612,613,614,615,616,617,618,619,620,621,622,623,624,827,828,829,830,831,833,834,835,836,837,838,839,840,841,842,844,845,846,932,933,935,936,937,938,939,940,941,942,943,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1037,1038,1039,1040,1041,1042,1043] 213 | -------------------------------------------------------------------------------- /config/labels/semantic-poss.yaml: -------------------------------------------------------------------------------- 1 | # This file is covered by the LICENSE file in the root of this project. 2 | name: "poss" 3 | labels: 4 | 0: "unlabeled" 5 | 1: "not know" 6 | 4: "1 person" 7 | 5: "2+ person" 8 | 6: "rider" 9 | 7: "car" 10 | 8: "trunk" 11 | 9: "plants" 12 | 10: "traffic sign 1" # standing sign 13 | 11: "traffic sign 2" # hanging sign 14 | 12: "traffic sign 3" # high/big hanging sign 15 | 13: "pole" 16 | 14: "trashcan" 17 | 15: "building" 18 | 16: "cone/stone" 19 | 17: "fence" 20 | 18: "not know" 21 | 19: "not know" 22 | 20: "not know" 23 | 21: "bike" 24 | 22: "ground" # class definition 25 | color_map: # bgr 26 | 0: [0, 0, 0] 27 | 1: [0, 0, 0] 28 | 4: [30, 30, 255] 29 | 5: [30, 30, 255] 30 | 6: [200, 40, 255] 31 | 7: [245, 150, 100] 32 | 8: [0, 60, 135] 33 | 9: [0, 175, 0] 34 | 10: [0, 0, 255]# standing sign 35 | 11: [0, 0, 255]# hanging sign 36 | 12: [0, 0, 255]# high/big hanging sign 37 | 13: [150, 240, 255] 38 | 14: [0, 255, 125] 39 | 15: [0, 200, 255] 40 | 16: [255, 255, 50] 41 | 17: [50, 120, 255] 42 | 18: [0, 0, 0] 43 | 19: [0, 0, 0] 44 | 20: [0, 0, 0] 45 | 21: [245, 230, 100] 46 | 22: [128, 128, 128] # class definition 47 | 48 | content: # as a ratio with the total number of points 49 | 0: 0.020862830428742977 50 | 1: 2.469082128891203e-08 51 | 4: 0.015402000989588365 52 | 5: 0.0030953055529133635 53 | 6: 0.004473705218516682 54 | 7: 0.08265452422115742 55 | 8: 0.012670218398511653 56 | 9: 0.3590911520036982 57 | 10: 0.0032106116883325827 58 | 11: 0.0012968804500358467 59 | 12: 0.00163708540024725 60 | 13: 0.00486631890599593 61 | 14: 0.0008666823943906168 62 | 15: 0.21372203059566083 63 | 16: 0.0009603445558679757 64 | 17: 0.015234582406756767 65 | 18: 0.0007389617140273326 66 | 19: 3.6991788455048e-05 67 | 20: 0.0005476522925165844 68 | 21: 0.05418175057745164 69 | 22: 0.20445034572631166 70 | 71 | ## classes that are indistinguishable from single scan or inconsistent in 72 | 73 | # ground truth are mapped to their closest equivalent 74 | learning_map: 75 | 0: 0 76 | 1: 0 77 | 4: 1 78 | 5: 1 79 | 6: 2 80 | 7: 3 81 | 8: 4 82 | 9: 5 83 | 10: 6 # standing sign 84 | 11: 6 # hanging sign 85 | 12: 6 # high/big hanging sign 86 | 13: 7 87 | 14: 8 88 | 15: 9 89 | 16: 10 90 | 17: 11 91 | 18: 0 92 | 19: 0 93 | 20: 0 94 | 21: 12 95 | 22: 13 # class definition 96 | learning_map_inv: # inverse of previous map 97 | 0: 0 # "unlabeled", and others ignored 98 | 1: 4 # "car" 99 | 2: 6 # "bicycle" 100 | 3: 7 # "motorcycle" 101 | 4: 8 # "truck" 102 | 5: 9 # "other-vehicle" 103 | 6: 10 # "person" 104 | 7: 13 # "bicyclist" 105 | 8: 14 # "motorcyclist" 106 | 9: 15 # "road" 107 | 10: 16 # "parking" 108 | 11: 17 # "sidewalk" 109 | 12: 21 # "other-ground" 110 | 13: 22 # "building" 111 | learning_ignore: # Ignore classes 112 | 0: True # "unlabeled", and others ignored 113 | 1: False # "car" 114 | 2: False # "bicycle" 115 | 3: False # "motorcycle" 116 | 4: False # "truck" 117 | 5: False # "other-vehicle" 118 | 6: False # "person" 119 | 7: False # "bicyclist" 120 | 8: False # "motorcyclist" 121 | 9: False # "road" 122 | 10: False # "parking" 123 | 11: False # "sidewalk" 124 | 12: False # "other-ground" 125 | 13: False # "building" 126 | 127 | split: # sequence numbers 128 | train: 129 | - 0 130 | - 1 131 | - 3 132 | - 4 133 | - 5 134 | valid: 135 | - 2 136 | test: 137 | - 2 -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: LENet 2 | channels: 3 | - pytorch 4 | - defaults 5 | # - anaconda 6 | # You can use the TUNA mirror to speed up the installation if you are in mainland China. 7 | # - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/ 8 | # - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ 9 | # - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/ 10 | # - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/ 11 | # - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch-lts/ 12 | dependencies: 13 | - python=3.8.16 14 | - pytorch=1.8.2 15 | - torchvision=0.9.2 16 | - cudatoolkit=11.1 17 | - ipython 18 | - tqdm 19 | - numba 20 | - sparsehash # dependency for torchsparse 21 | - pyqt 22 | - pip 23 | - pip: 24 | - -r requirements.txt 25 | # You can use the TUNA mirror to speed up the installation if you are in mainland China. 26 | # -i https://pypi.tuna.tsinghua.edu.cn/simpl 27 | # - git+http://github.com/mit-han-lab/torchsparse.git@v1.4.0 -------------------------------------------------------------------------------- /infer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This file is covered by the LICENSE file in the root of this project. 3 | 4 | import os 5 | from utils.utils import * 6 | from modules.user import * 7 | from modules.user_refine import * 8 | from modules.user_poss import * 9 | from modules.user_nusc import * 10 | 11 | 12 | if __name__ == '__main__': 13 | 14 | parser = get_args(flags="infer") 15 | FLAGS, unparsed = parser.parse_known_args() 16 | 17 | print("----------") 18 | print("INTERFACE:") 19 | print(" dataset", FLAGS.dataset) 20 | print(" log", FLAGS.log) 21 | print(" model", FLAGS.model) 22 | print(" infering", FLAGS.split) 23 | print(" pointrefine", FLAGS.pointrefine) 24 | print("----------\n") 25 | 26 | # open arch / data config file 27 | ARCH = load_yaml(FLAGS.model + "/arch_cfg.yaml") 28 | DATA = load_yaml(FLAGS.model + "/data_cfg.yaml") 29 | 30 | make_predictions_dir(FLAGS, DATA) # create predictions file folder 31 | check_model_dir(FLAGS.model) # does model folder exist? 32 | 33 | # create user and infer dataset 34 | if not FLAGS.pointrefine: 35 | if DATA["name"] == "kitti": 36 | user = User(ARCH, DATA, datadir=FLAGS.dataset, outputdir=FLAGS.log, 37 | modeldir=FLAGS.model, split=FLAGS.split) 38 | elif DATA["name"] == "poss": 39 | user = UserPoss(ARCH, DATA, datadir=FLAGS.dataset, outputdir=FLAGS.log, 40 | modeldir=FLAGS.model, split=FLAGS.split) 41 | elif DATA["name"] == "nusc": 42 | user = UserNusc(ARCH, DATA, datadir=FLAGS.dataset, outputdir=FLAGS.log, 43 | modeldir=FLAGS.model, split=FLAGS.split) 44 | else: 45 | raise ValueError("unsupported dataset {}".format(DATA["name"])) 46 | else: 47 | user = UserRefine(ARCH, DATA, datadir=FLAGS.dataset, outputdir=FLAGS.log, 48 | modeldir=FLAGS.model, split=FLAGS.split) 49 | user.infer() 50 | -------------------------------------------------------------------------------- /modules/PointRefine/PointMLP.py: -------------------------------------------------------------------------------- 1 | # A simple MLP network structure for point clouds, 2 | # 3 | # Added by Jiadai Sun 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class PointRefine(nn.Module): 11 | 12 | def __init__(self, n_class=3, 13 | in_fea_dim=35, 14 | out_point_fea_dim=64): 15 | super(PointRefine, self).__init__() 16 | 17 | self.n_class = n_class 18 | self.PPmodel = nn.Sequential( 19 | nn.BatchNorm1d(in_fea_dim), 20 | 21 | nn.Linear(in_fea_dim, 64), 22 | nn.BatchNorm1d(64), 23 | nn.ReLU(), 24 | 25 | nn.Linear(64, 128), 26 | nn.BatchNorm1d(128), 27 | nn.ReLU(), 28 | 29 | nn.Linear(128, 256), 30 | nn.BatchNorm1d(256), 31 | nn.ReLU(), 32 | 33 | nn.Linear(256, out_point_fea_dim) 34 | ) 35 | 36 | self.logits = nn.Sequential( 37 | nn.Linear(out_point_fea_dim, self.n_class) 38 | ) 39 | 40 | def forward(self, point_fea): 41 | # the point_fea need with size (b, N, c) e.g. torch.Size([1, 121722, 35]) 42 | # process feature 43 | # torch.Size([124668, 9]) --> torch.Size([124668, 256]) 44 | processed_point_fea = self.PPmodel(point_fea) 45 | logits = self.logits(processed_point_fea) 46 | point_predict = F.softmax(logits, dim=1) 47 | return point_predict 48 | 49 | 50 | if __name__ == '__main__': 51 | 52 | import time 53 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 54 | model = PointRefine() 55 | model.train() 56 | 57 | # t0 = time.time() 58 | # pred = model(cloud) 59 | # t1 = time.time() 60 | # print(t1-t0) 61 | 62 | total = sum([param.nelement() for param in model.parameters()]) 63 | print("Number of PointRefine parameter: %.2fM" % (total/1e6)) 64 | # Number of PointRefine parameter: 0.04M 65 | -------------------------------------------------------------------------------- /modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fengluodb/RangeSeg/f61f703ff2fb3a8bc0a190cfd87daf18b057365a/modules/__init__.py -------------------------------------------------------------------------------- /modules/loss/DiceLoss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This file is covered by the LICENSE file in the root of this project. 3 | 4 | import numpy 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | # PyTorch 10 | # class DiceLoss(nn.Module): 11 | # def __init__(self, weight=None, size_average=True): 12 | # super(DiceLoss, self).__init__() 13 | 14 | # def forward(self, inputs, targets, smooth=1): 15 | 16 | # #comment out if your model contains a sigmoid or equivalent activation layer 17 | # inputs = F.sigmoid(inputs) 18 | 19 | # #flatten label and prediction tensors 20 | # inputs = inputs.view(-1) 21 | # targets = targets.view(-1) 22 | 23 | # intersection = (inputs * targets).sum() 24 | # dice = (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth) 25 | 26 | # return 1 - dice 27 | 28 | # https://smp.readthedocs.io/en/latest/losses.html 29 | # https://github.com/pytorch/pytorch/issues/1249 30 | # https://www.kaggle.com/bigironsphere/loss-function-library-keras-pytorch#Dice-Loss 31 | # https://kornia.readthedocs.io/en/v0.1.2/_modules/torchgeometry/losses/dice.html 32 | 33 | 34 | # based on: 35 | # https://github.com/kevinzakka/pytorch-goodies/blob/master/losses.py 36 | 37 | class DiceLoss(nn.Module): 38 | r"""Criterion that computes Sørensen-Dice Coefficient loss. 39 | 40 | According to [1], we compute the Sørensen-Dice Coefficient as follows: 41 | 42 | .. math:: 43 | 44 | \text{Dice}(x, class) = \frac{2 |X| \cap |Y|}{|X| + |Y|} 45 | 46 | where: 47 | - :math:`X` expects to be the scores of each class. 48 | - :math:`Y` expects to be the one-hot tensor with the class labels. 49 | 50 | the loss, is finally computed as: 51 | 52 | .. math:: 53 | 54 | \text{loss}(x, class) = 1 - \text{Dice}(x, class) 55 | 56 | [1] https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient 57 | 58 | Shape: 59 | - Input: :math:`(N, C, H, W)` where C = number of classes. 60 | - Target: :math:`(N, H, W)` where each value is 61 | :math:`0 ≤ targets[i] ≤ C−1`. 62 | 63 | Examples: 64 | >>> N = 5 # num_classes 65 | >>> loss = tgm.losses.DiceLoss() 66 | >>> input = torch.randn(1, N, 3, 5, requires_grad=True) 67 | >>> target = torch.empty(1, 3, 5, dtype=torch.long).random_(N) 68 | >>> output = loss(input, target) 69 | >>> output.backward() 70 | """ 71 | 72 | def __init__(self) -> None: 73 | super(DiceLoss, self).__init__() 74 | self.eps: float = 1e-6 75 | 76 | def forward(self, input: torch.Tensor, 77 | target: torch.Tensor) -> torch.Tensor: 78 | if not torch.is_tensor(input): 79 | raise TypeError("Input type is not a torch.Tensor. Got {}" 80 | .format(type(input))) 81 | if not len(input.shape) == 4: 82 | raise ValueError("Invalid input shape, we expect BxNxHxW. Got: {}" 83 | .format(input.shape)) 84 | if not input.shape[-2:] == target.shape[-2:]: 85 | raise ValueError("input and target shapes must be the same. Got: {}" 86 | .format(input.shape, input.shape)) 87 | if not input.device == target.device: 88 | raise ValueError( 89 | "input and target must be in the same device. Got: {}" .format( 90 | input.device, target.device)) 91 | # compute softmax over the classes axis 92 | # input_soft = F.softmax(input, dim=1) # have done is network last layer 93 | 94 | # create the labels one hot tensor 95 | # target_one_hot = one_hot(target, num_classes=input.shape[1], 96 | # device=input.device, dtype=input.dtype) 97 | target_one_hot = F.one_hot( 98 | target, num_classes=input.shape[1]).permute(0, 3, 1, 2) 99 | 100 | # compute the actual dice score 101 | dims = (1, 2, 3) 102 | # intersection = torch.sum(input_soft * target_one_hot, dims) 103 | # cardinality = torch.sum(input_soft + target_one_hot, dims) 104 | 105 | # if we need to ignore the class=0 106 | input_filter = input[:, 1:, :, :] 107 | target_one_hot_filter = input[:, 1:, :, :] 108 | intersection = torch.sum(input_filter * target_one_hot_filter, dims) 109 | cardinality = torch.sum(input_filter + target_one_hot_filter, dims) 110 | 111 | dice_score = 2. * intersection / (cardinality + self.eps) 112 | return torch.mean(1. - dice_score) 113 | 114 | 115 | ###################### 116 | # functional interface 117 | ###################### 118 | 119 | 120 | def dice_loss(input: torch.Tensor, target: torch.Tensor) -> torch.Tensor: 121 | r"""Function that computes Sørensen-Dice Coefficient loss. 122 | 123 | See :class:`~torchgeometry.losses.DiceLoss` for details. 124 | """ 125 | return DiceLoss()(input, target) 126 | -------------------------------------------------------------------------------- /modules/loss/Lovasz_Softmax.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | MIT License 4 | 5 | Copyright (c) 2018 Maxim Berman 6 | Copyright (c) 2020 Tiago Cortinhal, George Tzelepis and Eren Erdal Aksoy 7 | 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | """ 20 | import torch 21 | import torch.nn as nn 22 | from torch.autograd import Variable 23 | 24 | 25 | try: 26 | from itertools import ifilterfalse 27 | except ImportError: 28 | from itertools import filterfalse as ifilterfalse 29 | 30 | 31 | def isnan(x): 32 | return x != x 33 | 34 | 35 | def mean(l, ignore_nan=False, empty=0): 36 | """ 37 | nanmean compatible with generators. 38 | """ 39 | l = iter(l) 40 | if ignore_nan: 41 | l = ifilterfalse(isnan, l) 42 | try: 43 | n = 1 44 | acc = next(l) 45 | except StopIteration: 46 | if empty == 'raise': 47 | raise ValueError('Empty mean') 48 | return empty 49 | for n, v in enumerate(l, 2): 50 | acc += v 51 | if n == 1: 52 | return acc 53 | return acc / n 54 | 55 | 56 | def lovasz_grad(gt_sorted): 57 | """ 58 | Computes gradient of the Lovasz extension w.r.t sorted errors 59 | See Alg. 1 in paper 60 | """ 61 | p = len(gt_sorted) 62 | gts = gt_sorted.sum() 63 | intersection = gts - gt_sorted.float().cumsum(0) 64 | union = gts + (1 - gt_sorted).float().cumsum(0) 65 | jaccard = 1. - intersection / union 66 | if p > 1: # cover 1-pixel case 67 | jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] 68 | return jaccard 69 | 70 | 71 | def lovasz_softmax(probas, labels, classes='present', per_image=False, ignore=None): 72 | """ 73 | Multi-class Lovasz-Softmax loss 74 | probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1). 75 | Interpreted as binary (sigmoid) output with outputs of size [B, H, W]. 76 | labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1) 77 | classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. 78 | per_image: compute the loss per image instead of per batch 79 | ignore: void class labels 80 | """ 81 | if per_image: 82 | loss = mean(lovasz_softmax_flat(*flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), classes=classes) 83 | for prob, lab in zip(probas, labels)) 84 | else: 85 | loss = lovasz_softmax_flat( 86 | *flatten_probas(probas, labels, ignore), classes=classes) 87 | return loss 88 | 89 | 90 | def lovasz_softmax_flat(probas, labels, classes='present'): 91 | """ 92 | Multi-class Lovasz-Softmax loss 93 | probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1) 94 | labels: [P] Tensor, ground truth labels (between 0 and C - 1) 95 | classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. 96 | """ 97 | if probas.numel() == 0: 98 | # only void pixels, the gradients should be 0 99 | return probas * 0. 100 | C = probas.size(1) 101 | losses = [] 102 | class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes 103 | for c in class_to_sum: 104 | fg = (labels == c).float() # foreground for class c 105 | if (classes == 'present' and fg.sum() == 0): 106 | continue 107 | if C == 1: 108 | if len(classes) > 1: 109 | raise ValueError('Sigmoid output possible only with 1 class') 110 | class_pred = probas[:, 0] 111 | else: 112 | class_pred = probas[:, c] 113 | errors = (Variable(fg) - class_pred).abs() 114 | errors_sorted, perm = torch.sort(errors, 0, descending=True) 115 | perm = perm.data 116 | fg_sorted = fg[perm] 117 | losses.append( 118 | torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted)))) 119 | return mean(losses) 120 | 121 | 122 | def flatten_probas(probas, labels, ignore=None): 123 | """ 124 | Flattens predictions in the batch 125 | """ 126 | if probas.dim() == 3: 127 | # assumes output of a sigmoid layer 128 | B, H, W = probas.size() 129 | probas = probas.view(B, 1, H, W) 130 | B, C, H, W = probas.size() 131 | probas = probas.permute(0, 2, 3, 1).contiguous( 132 | ).view(-1, C) # B * H * W, C = P, C 133 | labels = labels.view(-1) 134 | if ignore is None: 135 | return probas, labels 136 | valid = (labels != ignore) 137 | vprobas = probas[valid.nonzero(as_tuple=False).squeeze()] 138 | vlabels = labels[valid] 139 | return vprobas, vlabels 140 | 141 | 142 | class Lovasz_softmax(nn.Module): 143 | def __init__(self, classes='present', per_image=False, ignore=None): 144 | super(Lovasz_softmax, self).__init__() 145 | self.classes = classes 146 | self.per_image = per_image 147 | self.ignore = ignore 148 | 149 | def forward(self, probas, labels): 150 | return lovasz_softmax(probas, labels, self.classes, self.per_image, self.ignore) 151 | 152 | 153 | # Used to calculate Lovasz Loss with point cloud as input 154 | # Add by Jiadai Sun 155 | class Lovasz_softmax_PointCloud(nn.Module): 156 | def __init__(self, classes='present', ignore=None): 157 | super(Lovasz_softmax_PointCloud, self).__init__() 158 | self.classes = classes 159 | self.ignore = ignore 160 | 161 | def forward(self, probas, labels): 162 | 163 | B, C, N = probas.size() 164 | probas = probas.permute(0, 2, 1).contiguous().view(-1, C) 165 | labels = labels.view(-1) 166 | if self.ignore is not None: 167 | valid = (labels != self.ignore) 168 | vprobas = probas[valid.nonzero(as_tuple=False).squeeze()] 169 | vlabels = labels[valid] 170 | return lovasz_softmax_flat(vprobas, vlabels, classes=self.classes) 171 | else: 172 | return lovasz_softmax_flat(probas, labels, classes=self.classes) 173 | -------------------------------------------------------------------------------- /modules/loss/boundary_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import cv2 5 | import numpy as np 6 | 7 | 8 | def one_hot(label, n_classes, requires_grad=True): 9 | """Return One Hot Label""" 10 | divce = label.device 11 | one_hot_label = torch.eye( 12 | n_classes, device='cuda', requires_grad=requires_grad)[label] 13 | one_hot_label = one_hot_label.transpose(1, 3).transpose(2, 3) 14 | 15 | return one_hot_label 16 | 17 | 18 | class BoundaryLoss(nn.Module): 19 | """Boundary Loss proposed in: 20 | Alexey Bokhovkin et al., Boundary Loss for Remote Sensing Imagery Semantic Segmentation 21 | https://arxiv.org/abs/1905.07852 22 | """ 23 | 24 | def __init__(self, theta0=3, theta=5): 25 | super().__init__() 26 | 27 | self.theta0 = theta0 28 | self.theta = theta 29 | 30 | def forward(self, pred, gt): 31 | """ 32 | Input: 33 | - pred: the output from model (before softmax) 34 | shape (N, C, H, W) 35 | - gt: ground truth map 36 | shape (N, H, w) 37 | Return: 38 | - boundary loss, averaged over mini-bathc 39 | """ 40 | 41 | n, c, _, _ = pred.shape 42 | 43 | # softmax so that predicted map can be distributed in [0, 1] 44 | # pred = torch.softmax(pred, dim=1) 45 | 46 | # one-hot vector of ground truth 47 | one_hot_gt = one_hot(gt, c) 48 | 49 | # boundary map 50 | gt_b = F.max_pool2d( 51 | 1 - one_hot_gt, kernel_size=self.theta0, stride=1, padding=(self.theta0 - 1) // 2) 52 | gt_b -= 1 - one_hot_gt 53 | 54 | pred_b = F.max_pool2d( 55 | 1 - pred, kernel_size=self.theta0, stride=1, padding=(self.theta0 - 1) // 2) 56 | pred_b -= 1 - pred 57 | 58 | # Visualization Boundary 59 | # for i in range(c): 60 | # gt_bv = gt_b.detach().cpu().numpy() 61 | # # cv2.imshow('gt_b_cls.png'.format(i), gt_bv[0][i]) 62 | # cv2.imwrite('gt_b_cls{}.png'.format(i), gt_bv[0][i]*255) 63 | # 64 | # pred_bv = pred_b.detach().cpu().numpy() 65 | # #cv2.imshow('pred_b_cls{}'.format(i), pred_bv[0][i]) 66 | # cv2.imwrite('pred_b_cls{}.png'.format(i), pred_bv[0][i]*255) 67 | 68 | # reshape 69 | # gt_b = gt_b[:, 1:, :, :] 70 | # pred_b = pred_b[:, 1:, :, :] 71 | # c = c-1 72 | 73 | gt_b = gt_b.view(n, c, -1) 74 | pred_b = pred_b.view(n, c, -1) 75 | 76 | # Precision, Recall 77 | P = torch.sum(pred_b * gt_b, dim=2) / (torch.sum(pred_b, dim=2) + 1e-7) 78 | R = torch.sum(pred_b * gt_b, dim=2) / (torch.sum(gt_b, dim=2) + 1e-7) 79 | 80 | # Boundary F1 Score 81 | BF1 = 2 * P * R / (P + R + 1e-7) 82 | 83 | # summing BF1 Score for each class and average over mini-batch 84 | loss = torch.mean(1 - BF1) 85 | 86 | return loss 87 | -------------------------------------------------------------------------------- /modules/network/CENet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | from torch.nn import functional as F 4 | import numpy as np 5 | 6 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 7 | """3x3 convolution with padding""" 8 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 9 | padding=dilation, groups=groups, bias=False, dilation=dilation) 10 | 11 | def conv1x1(in_planes, out_planes, stride=1): 12 | """1x1 convolution""" 13 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 14 | 15 | 16 | class BasicConv2d(nn.Module): 17 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, relu=True): 18 | super(BasicConv2d, self).__init__() 19 | self.relu = relu 20 | self.conv = nn.Conv2d(in_planes, out_planes, 21 | kernel_size=kernel_size, stride=stride, 22 | padding=padding, dilation=dilation, bias=False) 23 | self.bn = nn.BatchNorm2d(out_planes) 24 | if self.relu: 25 | self.relu = nn.LeakyReLU() 26 | 27 | def forward(self, x): 28 | x = self.conv(x) 29 | x = self.bn(x) 30 | if self.relu: 31 | x = self.relu(x) 32 | return x 33 | 34 | class Final_Model(nn.Module): 35 | 36 | def __init__(self, backbone_net, semantic_head): 37 | super(Final_Model, self).__init__() 38 | self.backend = backbone_net 39 | self.semantic_head = semantic_head 40 | 41 | def forward(self, x): 42 | middle_feature_maps = self.backend(x) 43 | 44 | semantic_output = self.semantic_head(middle_feature_maps) 45 | 46 | return semantic_output 47 | 48 | 49 | class BasicBlock(nn.Module): 50 | expansion = 1 51 | 52 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 53 | base_width=64, dilation=1, if_BN=None): 54 | super(BasicBlock, self).__init__() 55 | self.if_BN = if_BN 56 | if self.if_BN: 57 | norm_layer = nn.BatchNorm2d 58 | if groups != 1 or base_width != 64: 59 | raise ValueError('BasicBlock only supports groups=1 and base_width=64') 60 | if dilation > 1: 61 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 62 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 63 | self.conv1 = conv3x3(inplanes, planes, stride) 64 | if self.if_BN: 65 | self.bn1 = norm_layer(planes) 66 | self.relu = nn.LeakyReLU() 67 | self.conv2 = conv3x3(planes, planes) 68 | if self.if_BN: 69 | self.bn2 = norm_layer(planes) 70 | self.downsample = downsample 71 | self.stride = stride 72 | 73 | def forward(self, x): 74 | identity = x 75 | 76 | out = self.conv1(x) 77 | if self.if_BN: 78 | out = self.bn1(out) 79 | out = self.relu(out) 80 | 81 | out = self.conv2(out) 82 | if self.if_BN: 83 | out = self.bn2(out) 84 | if self.downsample is not None: 85 | identity = self.downsample(x) 86 | out += identity 87 | out = self.relu(out) 88 | return out 89 | 90 | 91 | class ResNet_34(nn.Module): 92 | def __init__(self, nclasses, params, block=BasicBlock, layers=[3, 4, 6, 3], if_BN=True, zero_init_residual=False, 93 | norm_layer=None, groups=1, width_per_group=64): 94 | super(ResNet_34, self).__init__() 95 | if norm_layer is None: 96 | norm_layer = nn.BatchNorm2d 97 | self._norm_layer = norm_layer 98 | self.if_BN = if_BN 99 | self.dilation = 1 100 | self.aux = params["train"]["aux_loss"]["use"] 101 | 102 | self.groups = groups 103 | self.base_width = width_per_group 104 | 105 | self.conv1 = BasicConv2d(5, 64, kernel_size=3, padding=1) 106 | self.conv2 = BasicConv2d(64, 128, kernel_size=3, padding=1) 107 | self.conv3 = BasicConv2d(128, 128, kernel_size=3, padding=1) 108 | 109 | self.inplanes = 128 110 | 111 | self.layer1 = self._make_layer(block, 128, layers[0]) 112 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 113 | self.layer3 = self._make_layer(block, 128, layers[2], stride=2) 114 | self.layer4 = self._make_layer(block, 128, layers[3], stride=2) 115 | 116 | self.conv_1 = BasicConv2d(640, 256, kernel_size=3, padding=1) 117 | self.conv_2 = BasicConv2d(256, 128, kernel_size=3, padding=1) 118 | self.semantic_output = nn.Conv2d(128, nclasses, 1) 119 | 120 | if self.aux: 121 | self.aux_head1 = nn.Conv2d(128, nclasses, 1) 122 | self.aux_head2 = nn.Conv2d(128, nclasses, 1) 123 | self.aux_head3 = nn.Conv2d(128, nclasses, 1) 124 | 125 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 126 | norm_layer = self._norm_layer 127 | downsample = None 128 | previous_dilation = self.dilation 129 | if dilate: 130 | self.dilation *= stride 131 | stride = 1 132 | if stride != 1 or self.inplanes != planes * block.expansion: 133 | if self.if_BN: 134 | downsample = nn.Sequential( 135 | conv1x1(self.inplanes, planes * block.expansion, stride), 136 | norm_layer(planes * block.expansion), 137 | ) 138 | else: 139 | downsample = nn.Sequential( 140 | conv1x1(self.inplanes, planes * block.expansion, stride) 141 | ) 142 | 143 | layers = [] 144 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups, 145 | self.base_width, previous_dilation, if_BN=self.if_BN)) 146 | self.inplanes = planes * block.expansion 147 | for _ in range(1, blocks): 148 | layers.append(block(self.inplanes, planes, groups=self.groups, 149 | base_width=self.base_width, dilation=self.dilation, 150 | if_BN=self.if_BN)) 151 | 152 | return nn.Sequential(*layers) 153 | 154 | def forward(self, x): 155 | 156 | x = self.conv1(x) 157 | x = self.conv2(x) 158 | x = self.conv3(x) 159 | 160 | x_1 = self.layer1(x) # 1 161 | x_2 = self.layer2(x_1) # 1/2 162 | x_3 = self.layer3(x_2) # 1/4 163 | x_4 = self.layer4(x_3) # 1/8 164 | 165 | res_2 = F.interpolate(x_2, size=x.size()[2:], mode='bilinear', align_corners=True) 166 | res_3 = F.interpolate(x_3, size=x.size()[2:], mode='bilinear', align_corners=True) 167 | res_4 = F.interpolate(x_4, size=x.size()[2:], mode='bilinear', align_corners=True) 168 | res = [x, x_1, res_2, res_3, res_4] 169 | 170 | out = torch.cat(res, dim=1) 171 | out = self.conv_1(out) 172 | out = self.conv_2(out) 173 | out = self.semantic_output(out) 174 | out = F.softmax(out, dim=1) 175 | 176 | if self.aux: 177 | res_2 = self.aux_head1(res_2) 178 | res_2 = F.softmax(res_2, dim=1) 179 | 180 | res_3 = self.aux_head2(res_3) 181 | res_3 = F.softmax(res_3, dim=1) 182 | 183 | res_4 = self.aux_head3(res_4) 184 | res_4 = F.softmax(res_4, dim=1) 185 | 186 | # res_2 = self.aux_head1(x_2) 187 | # res_2 = F.softmax(x_2, dim=1) 188 | 189 | # res_3 = self.aux_head2(x_3) 190 | # res_3 = F.softmax(x_3, dim=1) 191 | 192 | # res_4 = self.aux_head3(x_4) 193 | # res_4 = F.softmax(x_4, dim=1) 194 | 195 | if self.aux: 196 | return [out, res_2, res_3, res_4] 197 | else: 198 | return out 199 | 200 | 201 | 202 | 203 | 204 | if __name__ == "__main__": 205 | import time 206 | model = ResNet_34(20).cuda() 207 | pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad) 208 | print("Number of parameters: ", pytorch_total_params / 1000000, "M") 209 | time_train = [] 210 | for i in range(20): 211 | inputs = torch.randn(1, 5, 64, 2048).cuda() 212 | model.eval() 213 | with torch.no_grad(): 214 | start_time = time.time() 215 | outputs = model(inputs) 216 | torch.cuda.synchronize() # wait for cuda to finish (cuda is asynchronous!) 217 | fwt = time.time() - start_time 218 | time_train.append(fwt) 219 | print ("Forward time per img: %.3f (Mean: %.3f)" % ( 220 | fwt / 1, sum(time_train) / len(time_train) / 1)) 221 | time.sleep(0.15) 222 | 223 | 224 | 225 | -------------------------------------------------------------------------------- /modules/network/Fid.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | from torch.nn import functional as F 4 | import numpy as np 5 | 6 | 7 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 8 | """3x3 convolution with padding""" 9 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 10 | padding=dilation, groups=groups, bias=False, dilation=dilation) 11 | 12 | 13 | def conv1x1(in_planes, out_planes, stride=1): 14 | """1x1 convolution""" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 16 | 17 | 18 | class BasicConv2d(nn.Module): 19 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, relu=True): 20 | super(BasicConv2d, self).__init__() 21 | self.relu = relu 22 | self.conv = nn.Conv2d(in_planes, out_planes, 23 | kernel_size=kernel_size, stride=stride, 24 | padding=padding, dilation=dilation, bias=False) 25 | self.bn = nn.BatchNorm2d(out_planes) 26 | if self.relu: 27 | self.relu = nn.LeakyReLU() 28 | 29 | def forward(self, x): 30 | x = self.conv(x) 31 | x = self.bn(x) 32 | if self.relu: 33 | x = self.relu(x) 34 | return x 35 | 36 | 37 | class Final_Model(nn.Module): 38 | 39 | def __init__(self, backbone_net, semantic_head): 40 | super(Final_Model, self).__init__() 41 | self.backend = backbone_net 42 | self.semantic_head = semantic_head 43 | 44 | def forward(self, x): 45 | middle_feature_maps = self.backend(x) 46 | 47 | semantic_output = self.semantic_head(middle_feature_maps) 48 | 49 | return semantic_output 50 | 51 | 52 | class BasicBlock(nn.Module): 53 | expansion = 1 54 | 55 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 56 | base_width=64, dilation=1, if_BN=None): 57 | super(BasicBlock, self).__init__() 58 | self.if_BN = if_BN 59 | if self.if_BN: 60 | norm_layer = nn.BatchNorm2d 61 | if groups != 1 or base_width != 64: 62 | raise ValueError( 63 | 'BasicBlock only supports groups=1 and base_width=64') 64 | if dilation > 1: 65 | raise NotImplementedError( 66 | "Dilation > 1 not supported in BasicBlock") 67 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 68 | self.conv1 = conv3x3(inplanes, planes, stride) 69 | if self.if_BN: 70 | self.bn1 = norm_layer(planes) 71 | self.relu = nn.LeakyReLU() 72 | self.conv2 = conv3x3(planes, planes) 73 | if self.if_BN: 74 | self.bn2 = norm_layer(planes) 75 | self.downsample = downsample 76 | self.stride = stride 77 | 78 | def forward(self, x): 79 | identity = x 80 | 81 | out = self.conv1(x) 82 | if self.if_BN: 83 | out = self.bn1(out) 84 | out = self.relu(out) 85 | 86 | out = self.conv2(out) 87 | if self.if_BN: 88 | out = self.bn2(out) 89 | 90 | if self.downsample is not None: 91 | identity = self.downsample(x) 92 | out += identity 93 | out = self.relu(out) 94 | 95 | return out 96 | 97 | 98 | class ResNet_34(nn.Module): 99 | 100 | def __init__(self, nclasses, params, block=BasicBlock, layers=[3, 4, 6, 3], if_BN=True, zero_init_residual=False, 101 | norm_layer=None, groups=1, width_per_group=64): 102 | super(ResNet_34, self).__init__() 103 | if norm_layer is None: 104 | norm_layer = nn.BatchNorm2d 105 | self._norm_layer = norm_layer 106 | self.if_BN = if_BN 107 | # self.aux = params["train"]["aux_loss"]["use"] 108 | self.aux = False 109 | 110 | self.dilation = 1 111 | 112 | self.groups = groups 113 | self.base_width = width_per_group 114 | 115 | self.conv1 = BasicConv2d(5, 64, kernel_size=1) 116 | self.conv2 = BasicConv2d(64, 128, kernel_size=1) 117 | self.conv3 = BasicConv2d(128, 256, kernel_size=1) 118 | self.conv4 = BasicConv2d(256, 512, kernel_size=1) 119 | self.inplanes = 512 120 | 121 | self.layer1 = self._make_layer(block, 128, layers[0]) 122 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 123 | self.layer3 = self._make_layer(block, 128, layers[2], stride=2) 124 | self.layer4 = self._make_layer(block, 128, layers[3], stride=2) 125 | 126 | self.conv_1 = BasicConv2d(1024, 512, kernel_size=1) 127 | self.conv_2 = BasicConv2d(512, 128, kernel_size=1) 128 | self.semantic_output = nn.Conv2d(128, nclasses, 1) 129 | 130 | if self.aux: 131 | self.aux_head1 = nn.Conv2d(128, nclasses, 1) 132 | self.aux_head2 = nn.Conv2d(128, nclasses, 1) 133 | self.aux_head3 = nn.Conv2d(128, nclasses, 1) 134 | 135 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 136 | norm_layer = self._norm_layer 137 | downsample = None 138 | previous_dilation = self.dilation 139 | if dilate: 140 | self.dilation *= stride 141 | stride = 1 142 | if stride != 1 or self.inplanes != planes * block.expansion: 143 | if self.if_BN: 144 | downsample = nn.Sequential( 145 | conv1x1(self.inplanes, planes * block.expansion, stride), 146 | norm_layer(planes * block.expansion), 147 | ) 148 | else: 149 | downsample = nn.Sequential( 150 | conv1x1(self.inplanes, planes * block.expansion, stride) 151 | ) 152 | 153 | layers = [] 154 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups, 155 | self.base_width, previous_dilation, if_BN=self.if_BN)) 156 | self.inplanes = planes * block.expansion 157 | for _ in range(1, blocks): 158 | layers.append(block(self.inplanes, planes, groups=self.groups, 159 | base_width=self.base_width, dilation=self.dilation, 160 | if_BN=self.if_BN)) 161 | 162 | return nn.Sequential(*layers) 163 | 164 | def forward(self, x): 165 | 166 | x = self.conv1(x) 167 | x = self.conv2(x) 168 | x = self.conv3(x) 169 | x = self.conv4(x) 170 | 171 | x_1 = self.layer1(x) # 1 172 | x_2 = self.layer2(x_1) # 1/2 173 | x_3 = self.layer3(x_2) # 1/4 174 | x_4 = self.layer4(x_3) # 1/8 175 | 176 | res_2 = F.interpolate( 177 | x_2, size=x.size()[2:], mode='bilinear', align_corners=True) 178 | res_3 = F.interpolate( 179 | x_3, size=x.size()[2:], mode='bilinear', align_corners=True) 180 | res_4 = F.interpolate( 181 | x_4, size=x.size()[2:], mode='bilinear', align_corners=True) 182 | res = [x, x_1, res_2, res_3, res_4] 183 | out = torch.cat(res, dim=1) 184 | 185 | out = self.conv_1(out) 186 | out = self.conv_2(out) 187 | out = self.semantic_output(out) 188 | 189 | out = F.softmax(out, dim=1) 190 | 191 | if self.aux: 192 | res_2 = self.aux_head1(res_2) 193 | res_2 = F.softmax(res_2, dim=1) 194 | 195 | res_3 = self.aux_head2(res_3) 196 | res_3 = F.softmax(res_3, dim=1) 197 | 198 | res_4 = self.aux_head3(res_4) 199 | res_4 = F.softmax(res_4, dim=1) 200 | 201 | if self.aux: 202 | return [out, res_2, res_3, res_4] 203 | else: 204 | return out, None 205 | -------------------------------------------------------------------------------- /modules/network/LENet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | from torch.nn import functional as F 4 | import numpy as np 5 | 6 | 7 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 8 | """3x3 convolution with padding""" 9 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 10 | padding=dilation, groups=groups, bias=False, dilation=dilation) 11 | 12 | 13 | def conv1x1(in_planes, out_planes, stride=1): 14 | """1x1 convolution""" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 16 | 17 | 18 | class BasicConv2d(nn.Module): 19 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, relu=True): 20 | super(BasicConv2d, self).__init__() 21 | self.relu = relu 22 | self.conv = nn.Conv2d(in_planes, out_planes, 23 | kernel_size=kernel_size, stride=stride, 24 | padding=padding, dilation=dilation, bias=False) 25 | self.bn = nn.BatchNorm2d(out_planes) 26 | if self.relu: 27 | self.relu = nn.LeakyReLU() 28 | 29 | def forward(self, x): 30 | x = self.conv(x) 31 | x = self.bn(x) 32 | if self.relu: 33 | x = self.relu(x) 34 | return x 35 | 36 | 37 | class AttentionModule(nn.Module): 38 | def __init__(self, dim): 39 | super().__init__() 40 | self.conv0 = nn.Conv2d(dim, dim, 5, padding=2, groups=dim) 41 | 42 | self.conv1_1 = nn.Conv2d(dim, dim, (1, 3), padding=(0, 1), groups=dim) 43 | self.conv1_2 = nn.Conv2d(dim, dim, (3, 1), padding=(1, 0), groups=dim) 44 | 45 | self.conv2_1 = nn.Conv2d(dim, dim, (1, 5), padding=(0, 2), groups=dim) 46 | self.conv2_2 = nn.Conv2d(dim, dim, (5, 1), padding=(2, 0), groups=dim) 47 | 48 | self.conv3_1 = nn.Conv2d(dim, dim, (1, 7), padding=(0, 3), groups=dim) 49 | self.conv3_2 = nn.Conv2d(dim, dim, (7, 1), padding=(3, 0), groups=dim) 50 | 51 | self.conv4 = nn.Conv2d(dim, dim, 1) 52 | 53 | def forward(self, x): 54 | attn = self.conv0(x) 55 | 56 | attn_0 = self.conv1_1(attn) 57 | attn_0 = self.conv1_2(attn_0) 58 | 59 | attn_1 = self.conv2_1(attn) 60 | attn_1 = self.conv2_2(attn_1) 61 | 62 | attn_2 = self.conv3_1(attn) 63 | attn_2 = self.conv3_2(attn_2) 64 | 65 | attn = attn + attn_0 + attn_1 + attn_2 66 | 67 | attn = self.conv4(attn) 68 | 69 | return attn * x 70 | 71 | 72 | class SpatialAttention(nn.Module): 73 | def __init__(self, dim): 74 | super().__init__() 75 | self.proj_1 = nn.Conv2d(dim, dim, 1) 76 | self.act = nn.LeakyReLU() 77 | self.spatial_gating_unit = AttentionModule(dim) 78 | self.proj_2 = nn.Conv2d(dim, dim, 1) 79 | 80 | def forward(self, x): 81 | shortcut = x.clone() 82 | x = self.proj_1(x) 83 | x = self.act(x) 84 | x = self.spatial_gating_unit(x) 85 | x = self.proj_2(x) 86 | x = x + shortcut 87 | return x 88 | 89 | 90 | class BasicBlock(nn.Module): 91 | expansion = 1 92 | 93 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 94 | base_width=64, dilation=1, if_BN=None): 95 | super(BasicBlock, self).__init__() 96 | if groups != 1 or base_width != 64: 97 | raise ValueError( 98 | 'BasicBlock only supports groups=1 and base_width=64') 99 | if dilation > 1: 100 | raise NotImplementedError( 101 | "Dilation > 1 not supported in BasicBlock") 102 | 103 | self.downsample = downsample 104 | 105 | self.conv = BasicConv2d(planes, planes, 3, padding=1) 106 | 107 | self.attn = SpatialAttention(planes) 108 | self.dropout = nn.Dropout2d(p=0.2) 109 | 110 | def forward(self, x): 111 | if self.downsample is not None: 112 | x = self.downsample(x) 113 | 114 | out = self.conv(x) 115 | out = out + self.dropout(self.attn(out)) 116 | 117 | out += x 118 | 119 | return out 120 | 121 | 122 | class ResNet_34(nn.Module): 123 | def __init__(self, nclasses, params, block=BasicBlock, layers=[3, 4, 6, 3], if_BN=True, zero_init_residual=False, 124 | norm_layer=None, groups=1, width_per_group=64): 125 | super(ResNet_34, self).__init__() 126 | self.nclasses = nclasses 127 | 128 | # mos modification 129 | if params['train']['residual']: 130 | self.input_size = 5 + params['train']['n_input_scans'] 131 | else: 132 | self.input_size = 5 133 | 134 | print("Depth of backbone input = ", self.input_size) 135 | ### 136 | 137 | if norm_layer is None: 138 | norm_layer = nn.BatchNorm2d 139 | self._norm_layer = norm_layer 140 | self.if_BN = if_BN 141 | self.dilation = 1 142 | self.aux = params["train"]["aux_loss"]["use"] 143 | 144 | self.groups = groups 145 | self.base_width = width_per_group 146 | 147 | self.conv1 = BasicConv2d(5, 64, kernel_size=3, padding=1) 148 | self.conv2 = BasicConv2d(64, 128, kernel_size=3, padding=1) 149 | self.conv3 = BasicConv2d(128, 128, kernel_size=3, padding=1) 150 | 151 | self.inplanes = 128 152 | 153 | self.layer1 = self._make_layer(block, 128, layers[0]) 154 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 155 | self.layer3 = self._make_layer(block, 128, layers[2], stride=2) 156 | self.layer4 = self._make_layer(block, 128, layers[3], stride=2) 157 | 158 | self.decoder1 = BasicConv2d(256, 128, 3, padding=1) 159 | self.decoder2 = BasicConv2d(256, 128, 3, padding=1) 160 | self.decoder3 = BasicConv2d(256, 128, 3, padding=1) 161 | self.decoder4 = BasicConv2d(256, 128, 3, padding=1) 162 | 163 | self.fusion_conv = BasicConv2d(128 * 3, 128, kernel_size=1) 164 | self.semantic_output = nn.Conv2d(128, nclasses, 1) 165 | 166 | if self.aux: 167 | self.aux_head1 = nn.Conv2d(128, nclasses, 1) 168 | self.aux_head2 = nn.Conv2d(128, nclasses, 1) 169 | self.aux_head3 = nn.Conv2d(128, nclasses, 1) 170 | 171 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 172 | norm_layer = self._norm_layer 173 | downsample = None 174 | previous_dilation = self.dilation 175 | if dilate: 176 | self.dilation *= stride 177 | stride = 1 178 | if stride != 1 or self.inplanes != planes * block.expansion: 179 | if self.if_BN: 180 | downsample = nn.Sequential( 181 | # conv1x1(self.inplanes, planes * block.expansion, stride), 182 | nn.AvgPool2d(kernel_size=(3, 3), stride=2, padding=1), 183 | # SoftPool2d(kernel_size=(2, 2), stride=(2, 2)), 184 | norm_layer(planes * block.expansion), 185 | ) 186 | else: 187 | downsample = nn.Sequential( 188 | # conv1x1(self.inplanes, planes * block.expansion, stride) 189 | # SoftPool2d(kernel_size=(2, 2), stride=(2, 2)) 190 | # nn.AvgPool2d(kernel_size=(3, 3), stride=2, padding=1), 191 | ) 192 | 193 | layers = [] 194 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups, 195 | self.base_width, previous_dilation, if_BN=self.if_BN)) 196 | self.inplanes = planes * block.expansion 197 | for _ in range(1, blocks): 198 | layers.append(block(planes, planes, groups=self.groups, 199 | base_width=self.base_width, dilation=self.dilation, 200 | if_BN=self.if_BN)) 201 | 202 | return nn.Sequential(*layers) 203 | 204 | def forward(self, x): 205 | 206 | x = self.conv1(x) 207 | x = self.conv2(x) 208 | x = self.conv3(x) 209 | 210 | x_1 = self.layer1(x) # 1 211 | x_2 = self.layer2(x_1) # 1/2 212 | x_3 = self.layer3(x_2) # 1/4 213 | x_4 = self.layer4(x_3) # 1/8 214 | 215 | res_1 = self.decoder1(torch.cat((x, x_1), dim=1)) 216 | 217 | res_2 = F.interpolate( 218 | x_2, size=x.size()[2:], mode='bilinear', align_corners=True) 219 | res_2 = self.decoder2(torch.cat((res_1, res_2), dim=1)) 220 | 221 | res_3 = F.interpolate( 222 | x_3, size=x.size()[2:], mode='bilinear', align_corners=True) 223 | res_3 = self.decoder3(torch.cat((res_2, res_3), dim=1)) 224 | 225 | res_4 = F.interpolate( 226 | x_4, size=x.size()[2:], mode='bilinear', align_corners=True) 227 | res_4 = self.decoder4(torch.cat((res_3, res_4), dim=1)) 228 | res = [res_2, res_3, res_4] 229 | 230 | out = torch.cat(res, dim=1) 231 | out = self.fusion_conv(out) 232 | out = self.semantic_output(out) 233 | logits = F.softmax(out, dim=1) 234 | 235 | if self.aux: 236 | res_2 = self.aux_head1(res_2) 237 | res_2 = F.softmax(res_2, dim=1) 238 | 239 | res_3 = self.aux_head2(res_3) 240 | res_3 = F.softmax(res_3, dim=1) 241 | 242 | res_4 = self.aux_head3(res_4) 243 | res_4 = F.softmax(res_4, dim=1) 244 | 245 | if self.aux: 246 | return [logits, res_2, res_3, res_4] 247 | else: 248 | return logits, out 249 | -------------------------------------------------------------------------------- /modules/scheduler/consine.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from torch.optim.lr_scheduler import _LRScheduler 4 | 5 | 6 | class CosineAnnealingWarmUpRestarts(_LRScheduler): 7 | def __init__( 8 | self, optimizer, T_0, T_mult=1, eta_max=0.1, T_up=0, gamma=1.0, last_epoch=-1 9 | ): 10 | if T_0 <= 0 or not isinstance(T_0, int): 11 | raise ValueError( 12 | "Expected positive integer T_0, but got {}".format(T_0)) 13 | if T_mult < 1 or not isinstance(T_mult, int): 14 | raise ValueError( 15 | "Expected integer T_mult >= 1, but got {}".format(T_mult)) 16 | if T_up < 0 or not isinstance(T_up, int): 17 | raise ValueError( 18 | "Expected positive integer T_up, but got {}".format(T_up)) 19 | self.T_0 = T_0 20 | self.T_mult = T_mult 21 | self.base_eta_max = eta_max 22 | self.eta_max = eta_max 23 | self.T_up = T_up 24 | self.T_i = T_0 25 | self.gamma = gamma 26 | self.cycle = 0 27 | self.T_cur = last_epoch 28 | super(CosineAnnealingWarmUpRestarts, self).__init__( 29 | optimizer, last_epoch) 30 | 31 | def get_lr(self): 32 | if self.T_cur == -1: 33 | return self.base_lrs 34 | elif self.T_cur < self.T_up: 35 | return [ 36 | (self.eta_max - base_lr) * self.T_cur / self.T_up + base_lr 37 | for base_lr in self.base_lrs 38 | ] 39 | else: 40 | return [ 41 | base_lr 42 | + (self.eta_max - base_lr) 43 | * ( 44 | 1 45 | + math.cos( 46 | math.pi * (self.T_cur - self.T_up) / 47 | (self.T_i - self.T_up) 48 | ) 49 | ) 50 | / 2 51 | for base_lr in self.base_lrs 52 | ] 53 | 54 | def step(self, epoch=None): 55 | if epoch is None: 56 | epoch = self.last_epoch + 1 57 | self.T_cur = self.T_cur + 1 58 | if self.T_cur >= self.T_i: 59 | self.cycle += 1 60 | self.T_cur = self.T_cur - self.T_i 61 | self.T_i = (self.T_i - self.T_up) * self.T_mult + self.T_up 62 | else: 63 | if epoch >= self.T_0: 64 | if self.T_mult == 1: 65 | self.T_cur = epoch % self.T_0 66 | self.cycle = epoch // self.T_0 67 | else: 68 | n = int( 69 | math.log( 70 | (epoch / self.T_0 * (self.T_mult - 1) + 1), self.T_mult 71 | ) 72 | ) 73 | self.cycle = n 74 | self.T_cur = epoch - self.T_0 * (self.T_mult ** n - 1) / ( 75 | self.T_mult - 1 76 | ) 77 | self.T_i = self.T_0 * self.T_mult ** (n) 78 | else: 79 | self.T_i = self.T_0 80 | self.T_cur = epoch 81 | 82 | self.eta_max = self.base_eta_max * (self.gamma ** self.cycle) 83 | self.last_epoch = math.floor(epoch) 84 | for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()): 85 | param_group["lr"] = lr 86 | -------------------------------------------------------------------------------- /modules/scheduler/warmupLR.py: -------------------------------------------------------------------------------- 1 | # This file is covered by the LICENSE file in the root of this project. 2 | 3 | import torch.optim.lr_scheduler as toptim 4 | 5 | 6 | class warmupLR(toptim._LRScheduler): 7 | """ Warmup learning rate scheduler. 8 | Initially, increases the learning rate from 0 to the final value, in a 9 | certain number of steps. After this number of steps, each step decreases 10 | LR exponentially. 11 | """ 12 | 13 | def __init__(self, optimizer, lr, warmup_steps, momentum, decay): 14 | # cyclic params 15 | self.optimizer = optimizer 16 | self.lr = lr 17 | self.warmup_steps = warmup_steps 18 | self.momentum = momentum 19 | self.decay = decay 20 | 21 | # cap to one 22 | if self.warmup_steps < 1: 23 | self.warmup_steps = 1 24 | 25 | # cyclic lr 26 | self.initial_scheduler = toptim.CyclicLR(self.optimizer, 27 | base_lr=0, 28 | max_lr=self.lr, 29 | step_size_up=self.warmup_steps, 30 | step_size_down=self.warmup_steps, 31 | cycle_momentum=False, 32 | base_momentum=self.momentum, 33 | max_momentum=self.momentum) 34 | 35 | # our params 36 | self.last_epoch = -1 # fix for pytorch 1.1 and below 37 | self.finished = False # am i done 38 | super().__init__(optimizer) 39 | 40 | def get_lr(self): 41 | return [self.lr * (self.decay ** self.last_epoch) for lr in self.base_lrs] 42 | 43 | def step(self, epoch=None): 44 | if self.finished or self.initial_scheduler.last_epoch >= self.warmup_steps: 45 | if not self.finished: 46 | self.base_lrs = [self.lr for lr in self.base_lrs] 47 | self.finished = True 48 | return super(warmupLR, self).step(epoch) 49 | else: 50 | return self.initial_scheduler.step(epoch) 51 | -------------------------------------------------------------------------------- /modules/user_nusc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This file is covered by the LICENSE file in the root of this project. 3 | 4 | import os 5 | import imp 6 | import time 7 | import numpy as np 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optim 12 | import torch.backends.cudnn as cudnn 13 | import __init__ as booger 14 | 15 | from tqdm import tqdm 16 | from modules.user import User 17 | from dataset.nuscenes.parser import Parser 18 | from utils.utils import * 19 | 20 | 21 | class UserNusc(User): 22 | def __init__(self, ARCH, DATA, datadir, outputdir, modeldir, split, point_refine=False): 23 | # parameters 24 | self.ARCH = ARCH 25 | self.DATA = DATA 26 | self.datadir = datadir 27 | self.outputdir = outputdir 28 | self.modeldir = modeldir 29 | self.split = split 30 | self.post = None 31 | self.infer_batch_size = 1 32 | self.point_refine = point_refine 33 | self.pipeline = self.ARCH["train"]["pipeline"] 34 | # get the data 35 | self.parser = Parser(root=self.datadir, 36 | train_sequences=self.DATA["split"]["train"], 37 | valid_sequences=self.DATA["split"]["valid"], 38 | test_sequences=self.DATA["split"]["test"], 39 | split=self.split, 40 | labels=self.DATA["labels"], 41 | color_map=self.DATA["color_map"], 42 | learning_map=self.DATA["learning_map"], 43 | learning_map_inv=self.DATA["learning_map_inv"], 44 | sensor=self.ARCH["dataset"]["sensor"], 45 | max_points=self.ARCH["dataset"]["max_points"], 46 | batch_size=self.infer_batch_size, 47 | workers=2, # self.ARCH["train"]["workers"], 48 | gt=True, 49 | shuffle_train=False) 50 | 51 | with torch.no_grad(): 52 | torch.nn.Module.dump_patches = True 53 | if not point_refine: 54 | self.set_model() 55 | checkpoint = self.pipeline + "_valid_best" 56 | w_dict = torch.load( 57 | f"{self.modeldir}/{checkpoint}", map_location=lambda storage, loc: storage) 58 | try: 59 | self.model = nn.DataParallel(self.model) 60 | self.model.load_state_dict( 61 | w_dict['state_dict'], strict=True) 62 | except: 63 | self.set_model() 64 | self.model.load_state_dict( 65 | w_dict['state_dict'], strict=True) 66 | self.set_knn_post() 67 | else: 68 | from modules.PointRefine.spvcnn import SPVCNN 69 | self.set_model() 70 | self.model = nn.DataParallel(self.model) 71 | checkpoint = self.pipeline + "_refine_module_valid_best" 72 | w_dict = torch.load( 73 | f"{self.modeldir}/{checkpoint}", map_location=lambda storage, loc: storage) 74 | # self.model.load_state_dict(w_dict['main_state_dict'], strict=True) 75 | self.model.load_state_dict( 76 | {f"module.{k}": v for k, v in w_dict['main_state_dict'].items()}, strict=True) 77 | 78 | net_config = {'num_classes': self.parser.get_n_classes(), 79 | 'cr': 1.0, 'pres': 0.05, 'vres': 0.05} 80 | self.refine_module = SPVCNN(num_classes=net_config['num_classes'], 81 | cr=net_config['cr'], 82 | pres=net_config['pres'], 83 | vres=net_config['vres']) 84 | self.refine_module = nn.DataParallel(self.refine_module) 85 | w_dict = torch.load( 86 | f"{modeldir}/{checkpoint}", map_location=lambda storage, loc: storage) 87 | # self.refine_module.load_state_dict(w_dict['state_dict'], strict=True) 88 | self.refine_module.load_state_dict( 89 | {f"module.{k}": v for k, v in w_dict['refine_state_dict'].items()}, strict=True) 90 | 91 | self.set_gpu_cuda() 92 | 93 | def infer_subset(self, loader, to_orig_fn, cnn, knn): 94 | # switch to evaluate mode 95 | self.model.eval() 96 | 97 | # empty the cache to infer in high res 98 | if self.gpu: 99 | torch.cuda.empty_cache() 100 | 101 | with torch.no_grad(): 102 | 103 | end = time.time() 104 | 105 | for i, (proj_in, proj_mask, _, _, path_seq, path_name, 106 | p_x, p_y, proj_range, unproj_range, _, _, _, _, npoints, lidar_token)\ 107 | in enumerate(tqdm(loader, ncols=80)): 108 | # first cut to rela size (batch size one allows it) 109 | p_x = p_x[0, :npoints] 110 | p_y = p_y[0, :npoints] 111 | proj_range = proj_range[0, :npoints] 112 | unproj_range = unproj_range[0, :npoints] 113 | path_seq = path_seq[0] 114 | path_name = path_name[0] 115 | 116 | if self.gpu: 117 | proj_in = proj_in.cuda() 118 | p_x = p_x.cuda() 119 | p_y = p_y.cuda() 120 | if self.post: 121 | proj_range = proj_range.cuda() 122 | unproj_range = unproj_range.cuda() 123 | 124 | end = time.time() 125 | # compute output 126 | if self.ARCH["train"]["aux_loss"]["use"]: 127 | proj_output, _, _, _ = self.model(proj_in) 128 | else: 129 | proj_output, _ = self.model(proj_in) 130 | proj_argmax = proj_output[0].argmax(dim=0) 131 | 132 | if torch.cuda.is_available(): 133 | torch.cuda.synchronize() 134 | res = time.time() - end 135 | cnn.append(res) 136 | end = time.time() 137 | # print(f"Network seq {path_seq} scan {path_name} in {res} sec") 138 | 139 | # if knn --> use knn to postprocess 140 | # else put in original pointcloud using indexes 141 | if self.post: 142 | unproj_argmax = self.post(proj_range, unproj_range, 143 | proj_argmax, p_x, p_y) 144 | else: 145 | unproj_argmax = proj_argmax[p_y, p_x] 146 | 147 | # measure elapsed time 148 | if torch.cuda.is_available(): 149 | torch.cuda.synchronize() 150 | res = time.time() - end 151 | knn.append(res) 152 | # print(f"KNN Infered seq {path_seq} scan {path_name} in {res} sec") 153 | 154 | # save scan # get the first scan in batch and project scan 155 | pred_np = unproj_argmax.cpu().numpy() 156 | pred_np = pred_np.reshape((-1)).astype(np.int32) 157 | 158 | # map to original label 159 | pred_np = to_orig_fn(pred_np) 160 | 161 | if self.split == "test": 162 | path = os.path.join(self.outputdir, "v1.0-test", "{}_lidarseg.bin".format(lidar_token[0])) 163 | else: 164 | path = os.path.join(self.outputdir, "sequences", 165 | path_seq, "predictions", path_name) 166 | pred_np.tofile(path) 167 | -------------------------------------------------------------------------------- /modules/user_poss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This file is covered by the LICENSE file in the root of this project. 3 | 4 | import os 5 | import imp 6 | import time 7 | import numpy as np 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optim 12 | import torch.backends.cudnn as cudnn 13 | import __init__ as booger 14 | 15 | from tqdm import tqdm 16 | from modules.user import User 17 | from dataset.poss.parser import Parser 18 | 19 | 20 | class UserPoss(User): 21 | def __init__(self, ARCH, DATA, datadir, outputdir, modeldir, split, point_refine=False): 22 | super().__init__(ARCH, DATA, datadir, outputdir, modeldir, split, point_refine) 23 | 24 | self.parser = Parser(root=self.datadir, 25 | train_sequences=self.DATA["split"]["train"], 26 | valid_sequences=self.DATA["split"]["valid"], 27 | test_sequences=None, 28 | labels=self.DATA["labels"], 29 | color_map=self.DATA["color_map"], 30 | learning_map=self.DATA["learning_map"], 31 | learning_map_inv=self.DATA["learning_map_inv"], 32 | sensor=self.ARCH["dataset"]["sensor"], 33 | max_points=self.ARCH["dataset"]["max_points"], 34 | batch_size=self.infer_batch_size, 35 | workers=2, 36 | gt=True, 37 | shuffle_train=False) 38 | 39 | def infer_subset(self, loader, to_orig_fn, cnn, knn): 40 | 41 | # switch to evaluate mode 42 | self.model.eval() 43 | 44 | # empty the cache to infer in high res 45 | if self.gpu: 46 | torch.cuda.empty_cache() 47 | 48 | with torch.no_grad(): 49 | 50 | end = time.time() 51 | 52 | proj_y = torch.full([40, 1800], 0, dtype=torch.long) 53 | proj_x = torch.full([40, 1800], 0, dtype=torch.long) 54 | for i in range(proj_y.size(0)): 55 | proj_y[i, :] = i 56 | for i in range(proj_x.size(1)): 57 | proj_x[:, i] = i 58 | 59 | proj_y = proj_y.reshape([40 * 1800]) 60 | proj_x = proj_x.reshape([40 * 1800]) 61 | proj_x = proj_x.cuda() 62 | proj_y = proj_y.cuda() 63 | 64 | for i, (proj_in, proj_labels, tags, unlabels, path_seq, path_name, proj_range, unresizerange, unproj_range, _, _)\ 65 | in enumerate(tqdm(loader, ncols=80)): 66 | # first cut to rela size (batch size one allows it) 67 | path_seq = path_seq[0] 68 | path_name = path_name[0] 69 | 70 | if self.gpu: 71 | proj_in = proj_in.cuda() 72 | unlabels = unlabels.cuda() 73 | if self.post: 74 | proj_range = proj_range[0].cuda() 75 | unproj_range = unproj_range[0].cuda() 76 | 77 | end = time.time() 78 | # compute output 79 | if self.ARCH["train"]["aux_loss"]["use"]: 80 | proj_output, _, _, _ = self.model(proj_in) 81 | else: 82 | proj_output, _ = self.model(proj_in) 83 | proj_argmax = proj_output[0].argmax(dim=0) 84 | 85 | if torch.cuda.is_available(): 86 | torch.cuda.synchronize() 87 | res = time.time() - end 88 | cnn.append(res) 89 | end = time.time() 90 | # print(f"Network seq {path_seq} scan {path_name} in {res} sec") 91 | 92 | # if knn --> use knn to postprocess 93 | # else put in original pointcloud using indexes 94 | if self.post: 95 | unproj_argmax = self.post(proj_range, unproj_range, 96 | proj_argmax, proj_x, proj_y) 97 | else: 98 | unproj_argmax = proj_argmax[proj_y, proj_x] 99 | 100 | unproj_argmax = unproj_argmax[tags.squeeze()] 101 | # measure elapsed time 102 | if torch.cuda.is_available(): 103 | torch.cuda.synchronize() 104 | res = time.time() - end 105 | knn.append(res) 106 | # print(f"KNN Infered seq {path_seq} scan {path_name} in {res} sec") 107 | 108 | # save scan # get the first scan in batch and project scan 109 | pred_np = unproj_argmax.cpu().numpy() 110 | pred_np = pred_np.reshape((-1)).astype(np.int32) 111 | 112 | # map to original label 113 | pred_np = to_orig_fn(pred_np) 114 | 115 | path = os.path.join(self.outputdir, "sequences", 116 | path_seq, "predictions", path_name) 117 | pred_np.tofile(path) 118 | -------------------------------------------------------------------------------- /modules/user_refine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This file is covered by the LICENSE file in the root of this project. 3 | 4 | import os 5 | import imp 6 | import time 7 | import numpy as np 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optim 12 | import torch.backends.cudnn as cudnn 13 | import __init__ as booger 14 | 15 | from tqdm import tqdm 16 | from modules.user import User 17 | 18 | # from modules.PointRefine.spvcnn import SPVCNN 19 | # from modules.PointRefine.spvcnn_lite import SPVCNN 20 | from torchsparse.utils.quantize import sparse_quantize 21 | from torchsparse.utils.collate import sparse_collate 22 | from torchsparse import SparseTensor 23 | 24 | 25 | class UserRefine(User): 26 | def __init__(self, ARCH, DATA, datadir, outputdir, modeldir, split): 27 | 28 | super(UserRefine, self).__init__(ARCH, DATA, datadir, 29 | outputdir, modeldir, split, point_refine=True) 30 | 31 | def infer(self): 32 | coarse, reproj, refine = [], [], [] 33 | 34 | if self.split == 'valid': 35 | self.infer_subset(loader=self.parser.get_valid_set(), 36 | to_orig_fn=self.parser.to_original, 37 | coarse=coarse, reproj=reproj, refine=refine) 38 | elif self.split == 'train': 39 | self.infer_subset(loader=self.parser.get_train_set(), 40 | to_orig_fn=self.parser.to_original, 41 | coarse=coarse, reproj=reproj, refine=refine) 42 | elif self.split == 'test': 43 | self.infer_subset(loader=self.parser.get_test_set(), 44 | to_orig_fn=self.parser.to_original, 45 | coarse=coarse, reproj=reproj, refine=refine) 46 | elif self.split == None: 47 | self.infer_subset(loader=self.parser.get_train_set(), 48 | to_orig_fn=self.parser.to_original, 49 | coarse=coarse, reproj=reproj, refine=refine) 50 | self.infer_subset(loader=self.parser.get_valid_set(), 51 | to_orig_fn=self.parser.to_original, 52 | coarse=coarse, reproj=reproj, refine=refine) 53 | self.infer_subset(loader=self.parser.get_test_set(), 54 | to_orig_fn=self.parser.to_original, 55 | coarse=coarse, reproj=reproj, refine=refine) 56 | else: 57 | raise NotImplementedError 58 | 59 | print( 60 | f"Mean Coarse inference time:{'%.8f'%np.mean(coarse)}\t std:{'%.8f'%np.std(coarse)}") 61 | print( 62 | f"Mean Reproject inference time:{'%.8f'%np.mean(reproj)}\t std:{'%.8f'%np.std(reproj)}") 63 | print( 64 | f"Mean Refine inference time:{'%.8f'%np.mean(refine)}\t std:{'%.8f'%np.std(refine)}") 65 | print(f"Total Frames: {len(coarse)}") 66 | print("Finished Infering") 67 | 68 | return 69 | 70 | def infer_subset(self, loader, to_orig_fn, coarse, reproj, refine): 71 | 72 | # switch to evaluate mode 73 | self.model.eval() 74 | self.refine_module.eval() 75 | 76 | # empty the cache to infer in high res 77 | if self.gpu: 78 | torch.cuda.empty_cache() 79 | 80 | with torch.no_grad(): 81 | 82 | end = time.time() 83 | 84 | for i, (proj_in, proj_mask, _, _, path_seq, path_name, 85 | p_x, p_y, proj_range, unproj_range, _, unproj_xyz, _, _, npoints)\ 86 | in enumerate(tqdm(loader, ncols=80)): 87 | 88 | # first cut to rela size (batch size one allows it) 89 | p_x = p_x[0, :npoints] 90 | p_y = p_y[0, :npoints] 91 | proj_range = proj_range[0, :npoints] 92 | unproj_range = unproj_range[0, :npoints] 93 | path_seq = path_seq[0] 94 | path_name = path_name[0] 95 | points_xyz = unproj_xyz[0, :npoints] 96 | 97 | if self.gpu: 98 | proj_in = proj_in.cuda() 99 | p_x = p_x.cuda() 100 | p_y = p_y.cuda() 101 | if self.post: 102 | proj_range = proj_range.cuda() 103 | unproj_range = unproj_range.cuda() 104 | 105 | end = time.time() 106 | # compute output 107 | proj_output, last_feature = self.model(proj_in) 108 | 109 | if torch.cuda.is_available(): 110 | torch.cuda.synchronize() 111 | res = time.time() - end 112 | coarse.append(res) 113 | end = time.time() 114 | # print(f"CoarseModule seq {path_seq} scan {path_name} in {res} sec") 115 | 116 | """ Reproject 2D features to 3D based on indices and form sparse Tensor""" 117 | points_feature = last_feature[0, :, p_y, p_x] 118 | coords = np.round(points_xyz[:, :3].cpu().numpy() / 0.05) 119 | coords -= coords.min(0, keepdims=1) 120 | coords, indices, inverse = sparse_quantize( 121 | coords, return_index=True, return_inverse=True) 122 | coords = torch.tensor(coords, dtype=torch.int, device='cuda') 123 | # torch.tensor(, dtype=torch.float) 124 | feats = points_feature.permute(1, 0)[indices] 125 | inputs = SparseTensor(coords=coords, feats=feats) 126 | inputs = sparse_collate([inputs]).cuda() 127 | """""""""""""""""""""""" 128 | 129 | # measure elapsed time 130 | if torch.cuda.is_available(): 131 | torch.cuda.synchronize() 132 | res = time.time() - end 133 | reproj.append(res) 134 | end = time.time() 135 | # print(f"DataConvert seq {path_seq} scan {path_name} in {res} sec") 136 | 137 | """ Input to PointHead, refine prediction """ 138 | predict = self.refine_module(inputs) 139 | 140 | if torch.cuda.is_available(): 141 | torch.cuda.synchronize() 142 | res = time.time() - end 143 | refine.append(res) 144 | # print(f"RefineModule seq {path_seq} scan {path_name} in {res} sec") 145 | 146 | predict = predict[inverse] # .permute(1,0) 147 | unproj_argmax = predict.argmax(dim=1) 148 | 149 | # save scan # get the first scan in batch and project scan 150 | pred_np = unproj_argmax.cpu().numpy() 151 | pred_np = pred_np.reshape((-1)).astype(np.int32) 152 | 153 | # map to original label 154 | pred_np = to_orig_fn(pred_np) 155 | 156 | path = os.path.join(self.outputdir, "sequences", 157 | path_seq, "predictions", path_name) 158 | pred_np.tofile(path) 159 | -------------------------------------------------------------------------------- /modules/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This file is covered by the LICENSE file in the root of this project. 3 | 4 | import torch 5 | import numpy as np 6 | import cv2 7 | from matplotlib import pyplot as plt 8 | 9 | 10 | class AverageMeter(object): 11 | """Computes and stores the average and current value""" 12 | 13 | def __init__(self): 14 | self.reset() 15 | 16 | def reset(self): 17 | self.val = 0 18 | self.avg = 0 19 | self.sum = 0 20 | self.count = 0 21 | 22 | def update(self, val, n=1): 23 | self.val = val 24 | self.sum += val * n 25 | self.count += n 26 | self.avg = self.sum / self.count 27 | 28 | 29 | # def one_hot_pred_from_label(y_pred, labels): 30 | # y_true = torch.zeros_like(y_pred) 31 | # ones = torch.ones_like(y_pred) 32 | # indexes = [l for l in labels] 33 | # y_true[torch.arange(labels.size(0)), indexes] = ones[torch.arange( 34 | # labels.size(0)), indexes] 35 | # return y_true 36 | 37 | 38 | # def keep_variance_fn(x): 39 | # return x + 1e-3 40 | 41 | 42 | # class SoftmaxHeteroscedasticLoss(torch.nn.Module): 43 | # def __init__(self): 44 | # super(SoftmaxHeteroscedasticLoss, self).__init__() 45 | # self.adf_softmax = adf.Softmax( 46 | # dim=1, keep_variance_fn=keep_variance_fn) 47 | 48 | # def forward(self, outputs, targets, eps=1e-5): 49 | # mean, var = self.adf_softmax(*outputs) 50 | # targets = torch.nn.functional.one_hot( 51 | # targets, num_classes=20).permute(0, 3, 1, 2).float() 52 | 53 | # precision = 1 / (var + eps) 54 | # return torch.mean(0.5 * precision * (targets - mean) ** 2 + 0.5 * torch.log(var + eps)) 55 | 56 | 57 | def save_to_txtlog(logdir, logfile, message): 58 | f = open(logdir + '/' + logfile, "a") 59 | f.write(message + '\n') 60 | f.close() 61 | return 62 | 63 | 64 | def save_checkpoint(to_save, logdir, pipeline="LENet", suffix=""): 65 | # Save the weights 66 | torch.save(to_save, logdir + 67 | "/" + pipeline + suffix) 68 | 69 | 70 | def get_mpl_colormap(cmap_name): 71 | cmap = plt.get_cmap(cmap_name) 72 | # Initialize the matplotlib color map 73 | sm = plt.cm.ScalarMappable(cmap=cmap) 74 | # Obtain linear color range 75 | color_range = sm.to_rgba(np.linspace(0, 1, 256), bytes=True)[:, 2::-1] 76 | return color_range.reshape(256, 1, 3) 77 | 78 | 79 | def make_log_img(depth, mask, pred, gt, color_fn): 80 | # input should be [depth, pred, gt] 81 | # make range image (normalized to 0,1 for saving) 82 | depth = (cv2.normalize(depth, None, alpha=0, beta=1, 83 | norm_type=cv2.NORM_MINMAX, 84 | dtype=cv2.CV_32F) * 255.0).astype(np.uint8) 85 | out_img = cv2.applyColorMap( 86 | depth, get_mpl_colormap('viridis')) * mask[..., None] 87 | # make label prediction 88 | pred_color = color_fn((pred * mask).astype(np.int32)) 89 | out_img = np.concatenate([out_img, pred_color], axis=0) 90 | # make label gt 91 | gt_color = color_fn(gt) 92 | out_img = np.concatenate([out_img, gt_color], axis=0) 93 | return (out_img).astype(np.uint8) 94 | 95 | 96 | def show_scans_in_training(proj_mask, in_vol, argmax, proj_labels, color_fn): 97 | # get the first scan in batch and project points 98 | mask_np = proj_mask[0].cpu().numpy() 99 | depth_np = in_vol[0][0].cpu().numpy() 100 | pred_np = argmax[0].cpu().numpy() 101 | gt_np = proj_labels[0].cpu().numpy() 102 | out = make_log_img(depth_np, mask_np, pred_np, gt_np, color_fn) 103 | 104 | mask_np = proj_mask[1].cpu().numpy() 105 | depth_np = in_vol[1][0].cpu().numpy() 106 | pred_np = argmax[1].cpu().numpy() 107 | gt_np = proj_labels[1].cpu().numpy() 108 | out2 = make_log_img(depth_np, mask_np, pred_np, gt_np, color_fn) 109 | 110 | out = np.concatenate([out, out2], axis=0) 111 | 112 | cv2.imshow("sample_training", out) 113 | cv2.waitKey(1) 114 | 115 | 116 | class iouEval: 117 | def __init__(self, n_classes, device, ignore=None): 118 | self.n_classes = n_classes 119 | self.device = device 120 | # if ignore is larger than n_classes, consider no ignoreIndex 121 | self.ignore = torch.tensor(ignore).long() 122 | self.include = torch.tensor( 123 | [n for n in range(self.n_classes) if n not in self.ignore]).long() 124 | print("[IOU EVAL] IGNORE: ", self.ignore) 125 | print("[IOU EVAL] INCLUDE: ", self.include) 126 | self.reset() 127 | 128 | def num_classes(self): 129 | return self.n_classes 130 | 131 | def reset(self): 132 | self.conf_matrix = torch.zeros( 133 | (self.n_classes, self.n_classes), device=self.device).long() 134 | self.ones = None 135 | self.last_scan_size = None # for when variable scan size is used 136 | 137 | def addBatch(self, x, y): # x=preds, y=targets 138 | # if numpy, pass to pytorch 139 | # to tensor 140 | if isinstance(x, np.ndarray): 141 | x = torch.from_numpy(np.array(x)).long().to(self.device) 142 | if isinstance(y, np.ndarray): 143 | y = torch.from_numpy(np.array(y)).long().to(self.device) 144 | 145 | # sizes should be "batch_size x H x W" 146 | x_row = x.reshape(-1) # de-batchify 147 | y_row = y.reshape(-1) # de-batchify 148 | 149 | # idxs are labels and predictions 150 | idxs = torch.stack([x_row, y_row], dim=0) 151 | 152 | # ones is what I want to add to conf when I 153 | if self.ones is None or self.last_scan_size != idxs.shape[-1]: 154 | self.ones = torch.ones((idxs.shape[-1]), device=self.device).long() 155 | self.last_scan_size = idxs.shape[-1] 156 | 157 | # make confusion matrix (cols = gt, rows = pred) 158 | self.conf_matrix = self.conf_matrix.index_put_( 159 | tuple(idxs), self.ones, accumulate=True) 160 | 161 | def getStats(self): 162 | # remove fp and fn from confusion on the ignore classes cols and rows 163 | conf = self.conf_matrix.clone().double() 164 | conf[self.ignore] = 0 165 | conf[:, self.ignore] = 0 166 | 167 | # get the clean stats 168 | tp = conf.diag() 169 | fp = conf.sum(dim=1) - tp 170 | fn = conf.sum(dim=0) - tp 171 | return tp, fp, fn 172 | 173 | def getIoU(self): 174 | tp, fp, fn = self.getStats() 175 | intersection = tp 176 | union = tp + fp + fn + 1e-15 177 | iou = intersection / union 178 | iou_mean = (intersection[self.include] / union[self.include]).mean() 179 | return iou_mean, iou # returns "iou mean", "iou per class" ALL CLASSES 180 | 181 | def getacc(self): 182 | tp, fp, fn = self.getStats() 183 | total_tp = tp.sum() 184 | total = tp[self.include].sum() + fp[self.include].sum() + 1e-15 185 | acc_mean = total_tp / total 186 | return acc_mean # returns "acc mean" 187 | -------------------------------------------------------------------------------- /postproc/KNN.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This file is covered by the LICENSE file in the root of this project. 3 | 4 | import math 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | def get_gaussian_kernel(kernel_size=3, sigma=2, channels=1): 12 | # Create a x, y coordinate grid of shape (kernel_size, kernel_size, 2) 13 | x_coord = torch.arange(kernel_size) 14 | x_grid = x_coord.repeat(kernel_size).view(kernel_size, kernel_size) 15 | y_grid = x_grid.t() 16 | xy_grid = torch.stack([x_grid, y_grid], dim=-1).float() 17 | 18 | mean = (kernel_size - 1) / 2. 19 | variance = sigma ** 2. 20 | 21 | # Calculate the 2-dimensional gaussian kernel which is 22 | # the product of two gaussian distributions for two different 23 | # variables (in this case called x and y) 24 | gaussian_kernel = (1. / (2. * math.pi * variance)) * \ 25 | torch.exp(-torch.sum((xy_grid - mean) ** 2., dim=-1) / (2 * variance)) 26 | 27 | # Make sure sum of values in gaussian kernel equals 1. 28 | gaussian_kernel = gaussian_kernel / torch.sum(gaussian_kernel) 29 | 30 | # Reshape to 2d depthwise convolutional weight 31 | gaussian_kernel = gaussian_kernel.view(kernel_size, kernel_size) 32 | 33 | return gaussian_kernel 34 | 35 | 36 | class KNN(nn.Module): 37 | def __init__(self, params, nclasses): 38 | super().__init__() 39 | print("*" * 80) 40 | print("Cleaning point-clouds with kNN post-processing") 41 | self.knn = params["knn"] 42 | self.search = params["search"] 43 | self.sigma = params["sigma"] 44 | self.cutoff = params["cutoff"] 45 | self.nclasses = nclasses 46 | print("kNN parameters:") 47 | print("knn:", self.knn) 48 | print("search:", self.search) 49 | print("sigma:", self.sigma) 50 | print("cutoff:", self.cutoff) 51 | print("nclasses:", self.nclasses) 52 | print("*" * 80) 53 | 54 | def forward(self, proj_range, unproj_range, proj_argmax, px, py): 55 | ''' Warning! Only works for un-batched pointclouds. 56 | If they come batched we need to iterate over the batch dimension or do 57 | something REALLY smart to handle unaligned number of points in memory 58 | ''' 59 | # get device 60 | if proj_range.is_cuda: 61 | device = torch.device("cuda") 62 | else: 63 | device = torch.device("cpu") 64 | 65 | # sizes of projection scan 66 | H, W = proj_range.shape 67 | 68 | # number of points 69 | P = unproj_range.shape 70 | 71 | # check if size of kernel is odd and complain 72 | if (self.search % 2 == 0): 73 | raise ValueError("Nearest neighbor kernel must be odd number") 74 | 75 | # calculate padding 76 | pad = int((self.search - 1) / 2) 77 | 78 | # unfold neighborhood to get nearest neighbors for each pixel (range image) 79 | proj_unfold_k_rang = F.unfold(proj_range[None, None, ...], 80 | kernel_size=(self.search, self.search), 81 | padding=(pad, pad)) 82 | 83 | # index with px, py to get ALL the pcld points 84 | idx_list = py * W + px 85 | unproj_unfold_k_rang = proj_unfold_k_rang[:, :, idx_list] 86 | 87 | # WARNING, THIS IS A HACK 88 | # Make non valid (<0) range points extremely big so that there is no screwing 89 | # up the nn self.search 90 | unproj_unfold_k_rang[unproj_unfold_k_rang < 0] = float("inf") 91 | 92 | # now the matrix is unfolded TOTALLY, replace the middle points with the actual range points 93 | center = int(((self.search * self.search) - 1) / 2) 94 | unproj_unfold_k_rang[:, center, :] = unproj_range 95 | 96 | # now compare range 97 | k2_distances = torch.abs(unproj_unfold_k_rang - unproj_range) 98 | 99 | # make a kernel to weigh the ranges according to distance in (x,y) 100 | # I make this 1 - kernel because I want distances that are close in (x,y) 101 | # to matter more 102 | inv_gauss_k = ( 103 | 1 - get_gaussian_kernel(self.search, self.sigma, 1)).view(1, -1, 1) 104 | inv_gauss_k = inv_gauss_k.to(device).type(proj_range.type()) 105 | 106 | # apply weighing 107 | k2_distances = k2_distances * inv_gauss_k 108 | 109 | # find nearest neighbors 110 | _, knn_idx = k2_distances.topk( 111 | self.knn, dim=1, largest=False, sorted=False) 112 | 113 | # do the same unfolding with the argmax 114 | proj_unfold_1_argmax = F.unfold(proj_argmax[None, None, ...].float(), 115 | kernel_size=(self.search, self.search), 116 | padding=(pad, pad)).long() 117 | unproj_unfold_1_argmax = proj_unfold_1_argmax[:, :, idx_list] 118 | 119 | # get the top k predictions from the knn at each pixel 120 | knn_argmax = torch.gather( 121 | input=unproj_unfold_1_argmax, dim=1, index=knn_idx) 122 | 123 | # fake an invalid argmax of classes + 1 for all cutoff items 124 | if self.cutoff > 0: 125 | knn_distances = torch.gather( 126 | input=k2_distances, dim=1, index=knn_idx) 127 | knn_invalid_idx = knn_distances > self.cutoff 128 | knn_argmax[knn_invalid_idx] = self.nclasses 129 | 130 | # now vote 131 | # argmax onehot has an extra class for objects after cutoff 132 | knn_argmax_onehot = torch.zeros( 133 | (1, self.nclasses + 1, P[0]), device=device).type(proj_range.type()) 134 | ones = torch.ones_like(knn_argmax).type(proj_range.type()) 135 | knn_argmax_onehot = knn_argmax_onehot.scatter_add_(1, knn_argmax, ones) 136 | 137 | # now vote (as a sum over the onehot shit) (don't let it choose unlabeled OR invalid) 138 | knn_argmax_out = knn_argmax_onehot[:, 1:-1].argmax(dim=1) + 1 139 | 140 | # reshape again 141 | knn_argmax_out = knn_argmax_out.view(P) 142 | 143 | return knn_argmax_out 144 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | icecream 2 | tqdm 3 | matplotlib 4 | open3d 5 | pyyaml 6 | tensorboard==2.4.0 7 | tensorboardX==2.1 8 | vispy==0.7.0 9 | cython==0.29.26 10 | easydict==1.9 11 | nose==1.3.7 12 | numpy==1.19.4 13 | opencv-contrib-python==4.5.1.48 14 | opencv-python==4.5.1.48 15 | scikit-learn==0.24.2 16 | sklearn 17 | strictyaml==1.4.4 18 | protobuf==3.20.1 19 | nuscenes-devkit==1.1.9 20 | # transforms3d 21 | # numba -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This file is covered by the LICENSE file in the root of this project. 3 | 4 | import os 5 | import random 6 | import numpy as np 7 | import torch 8 | 9 | from utils.utils import * 10 | from datetime import datetime 11 | from modules.trainer import Trainer 12 | 13 | 14 | def set_seed(seed=1024): 15 | random.seed(seed) 16 | # os.environ['PYTHONHASHSEED'] = str(seed) 17 | np.random.seed(seed) 18 | torch.manual_seed(seed) 19 | torch.cuda.manual_seed(seed) 20 | torch.cuda.manual_seed_all(seed) # if you are using multi-GPU. 21 | torch.backends.cudnn.benchmark = False 22 | torch.backends.cudnn.deterministic = True 23 | # torch.backends.cudnn.enabled = False 24 | # If we need to reproduce the results, increase the training speed 25 | # set benchmark = False 26 | # If we don’t need to reproduce the results, improve the network performance as much as possible 27 | # set benchmark = True 28 | 29 | 30 | if __name__ == '__main__': 31 | parser = get_args(flags="train") 32 | FLAGS, unparsed = parser.parse_known_args() 33 | FLAGS.log = FLAGS.log + '/' + datetime.now().strftime("%Y-%-m-%d-%H:%M") + FLAGS.name 34 | 35 | # open arch / data config file 36 | ARCH = load_yaml(FLAGS.arch_cfg) 37 | DATA = load_yaml(FLAGS.data_cfg) 38 | 39 | print("----------") 40 | print("INTERFACE:") 41 | print(" dataset:", FLAGS.dataset) 42 | print(" arch_cfg:", FLAGS.arch_cfg) 43 | print(" data_cfg:", FLAGS.data_cfg) 44 | # print(" uncertainty:", FLAGS.uncertainty) 45 | print(" log:", FLAGS.log) 46 | print(" pretrained:", FLAGS.pretrained) 47 | print("----------\n") 48 | 49 | make_logdir(FLAGS=FLAGS, resume_train=False) # create log folder 50 | check_pretrained_dir(FLAGS.pretrained) # does model folder exist? 51 | # backup code and config files to logdir 52 | backup_to_logdir(FLAGS=FLAGS) 53 | 54 | set_seed() 55 | # create trainer and start the training 56 | trainer = Trainer(ARCH, DATA, FLAGS.dataset, FLAGS.log, FLAGS.pretrained) 57 | trainer.train() 58 | -------------------------------------------------------------------------------- /train_nusc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This file is covered by the LICENSE file in the root of this project. 3 | 4 | import os 5 | import random 6 | import numpy as np 7 | import torch 8 | 9 | from utils.utils import * 10 | from datetime import datetime 11 | from modules.trainer_nusc import TrainerNusc 12 | 13 | 14 | def set_seed(seed=1024): 15 | random.seed(seed) 16 | # os.environ['PYTHONHASHSEED'] = str(seed) 17 | np.random.seed(seed) 18 | torch.manual_seed(seed) 19 | torch.cuda.manual_seed(seed) 20 | torch.cuda.manual_seed_all(seed) # if you are using multi-GPU. 21 | torch.backends.cudnn.benchmark = False 22 | torch.backends.cudnn.deterministic = True 23 | # torch.backends.cudnn.enabled = False 24 | # If we need to reproduce the results, increase the training speed 25 | # set benchmark = False 26 | # If we don’t need to reproduce the results, improve the network performance as much as possible 27 | # set benchmark = True 28 | 29 | 30 | if __name__ == '__main__': 31 | parser = get_args(flags="train") 32 | FLAGS, unparsed = parser.parse_known_args() 33 | FLAGS.log = FLAGS.log + '/' + datetime.now().strftime("%Y-%-m-%d-%H:%M") + FLAGS.name 34 | 35 | # open arch / data config file 36 | ARCH = load_yaml(FLAGS.arch_cfg) 37 | DATA = load_yaml(FLAGS.data_cfg) 38 | 39 | print("----------") 40 | print("INTERFACE:") 41 | print(" dataset:", FLAGS.dataset) 42 | print(" arch_cfg:", FLAGS.arch_cfg) 43 | print(" data_cfg:", FLAGS.data_cfg) 44 | # print(" uncertainty:", FLAGS.uncertainty) 45 | print(" log:", FLAGS.log) 46 | print(" pretrained:", FLAGS.pretrained) 47 | print("----------\n") 48 | 49 | make_logdir(FLAGS=FLAGS, resume_train=False) # create log folder 50 | check_pretrained_dir(FLAGS.pretrained) # does model folder exist? 51 | # backup code and config files to logdir 52 | backup_to_logdir(FLAGS=FLAGS) 53 | 54 | set_seed() 55 | # create trainer and start the training 56 | trainer = TrainerNusc(ARCH, DATA, FLAGS.dataset, 57 | FLAGS.log, FLAGS.pretrained) 58 | trainer.train() 59 | -------------------------------------------------------------------------------- /train_poss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This file is covered by the LICENSE file in the root of this project. 3 | 4 | import os 5 | import random 6 | import numpy as np 7 | import torch 8 | 9 | from utils.utils import * 10 | from datetime import datetime 11 | from modules.tariner_poss import TrainerPoss 12 | 13 | 14 | def set_seed(seed=1024): 15 | random.seed(seed) 16 | # os.environ['PYTHONHASHSEED'] = str(seed) 17 | np.random.seed(seed) 18 | torch.manual_seed(seed) 19 | torch.cuda.manual_seed(seed) 20 | torch.cuda.manual_seed_all(seed) # if you are using multi-GPU. 21 | torch.backends.cudnn.benchmark = False 22 | torch.backends.cudnn.deterministic = True 23 | # torch.backends.cudnn.enabled = False 24 | # If we need to reproduce the results, increase the training speed 25 | # set benchmark = False 26 | # If we don’t need to reproduce the results, improve the network performance as much as possible 27 | # set benchmark = True 28 | 29 | 30 | if __name__ == '__main__': 31 | parser = get_args(flags="train") 32 | FLAGS, unparsed = parser.parse_known_args() 33 | FLAGS.log = FLAGS.log + '/' + datetime.now().strftime("%Y-%-m-%d-%H:%M") + FLAGS.name 34 | 35 | # open arch / data config file 36 | ARCH = load_yaml(FLAGS.arch_cfg) 37 | DATA = load_yaml(FLAGS.data_cfg) 38 | 39 | print("----------") 40 | print("INTERFACE:") 41 | print(" dataset:", FLAGS.dataset) 42 | print(" arch_cfg:", FLAGS.arch_cfg) 43 | print(" data_cfg:", FLAGS.data_cfg) 44 | # print(" uncertainty:", FLAGS.uncertainty) 45 | print(" log:", FLAGS.log) 46 | print(" pretrained:", FLAGS.pretrained) 47 | print("----------\n") 48 | 49 | make_logdir(FLAGS=FLAGS, resume_train=False) # create log folder 50 | check_pretrained_dir(FLAGS.pretrained) # does model folder exist? 51 | # backup code and config files to logdir 52 | backup_to_logdir(FLAGS=FLAGS) 53 | 54 | set_seed() 55 | # create trainer and start the training 56 | trainer = TrainerPoss(ARCH, DATA, FLAGS.dataset, 57 | FLAGS.log, FLAGS.pretrained) 58 | trainer.train() 59 | -------------------------------------------------------------------------------- /utils/auto_gen_residual_images.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Developed by Jiadai Sun 3 | # and the main_funciton 'prosess_one_seq' refers to Xieyuanli Chen’s gen_residual_images.py 4 | # This file is covered by the LICENSE file in the root of this project. 5 | # Brief: This script generates residual images 6 | 7 | from kitti_utils import load_poses, load_calib, load_files, load_vertex, range_projection 8 | from icecream import ic 9 | from tqdm import tqdm 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import yaml 13 | import os 14 | os.environ["OMP_NUM_THREADS"] = "4" 15 | 16 | 17 | def check_and_makedirs(dir_path): 18 | if not os.path.exists(dir_path): 19 | os.makedirs(dir_path) 20 | 21 | 22 | def load_yaml(path): 23 | if yaml.__version__ >= '5.1': 24 | config = yaml.load(open(path), Loader=yaml.FullLoader) 25 | else: 26 | config = yaml.load(open(path)) 27 | return config 28 | 29 | 30 | def process_one_seq(config): 31 | # specify parameters 32 | num_frames = config['num_frames'] 33 | debug = config['debug'] 34 | normalize = config['normalize'] 35 | num_last_n = config['num_last_n'] 36 | visualize = config['visualize'] 37 | visualization_folder = config['visualization_folder'] 38 | 39 | # specify the output folders 40 | residual_image_folder = config['residual_image_folder'] 41 | check_and_makedirs(residual_image_folder) 42 | 43 | if visualize: 44 | check_and_makedirs(visualization_folder) 45 | 46 | # load poses 47 | pose_file = config['pose_file'] 48 | poses = np.array(load_poses(pose_file)) 49 | inv_frame0 = np.linalg.inv(poses[0]) 50 | 51 | # load calibrations 52 | calib_file = config['calib_file'] 53 | T_cam_velo = load_calib(calib_file) 54 | T_cam_velo = np.asarray(T_cam_velo).reshape((4, 4)) 55 | T_velo_cam = np.linalg.inv(T_cam_velo) 56 | 57 | # convert kitti poses from camera coord to LiDAR coord 58 | new_poses = [] 59 | for pose in poses: 60 | new_poses.append(T_velo_cam.dot(inv_frame0).dot(pose).dot(T_cam_velo)) 61 | poses = np.array(new_poses) 62 | 63 | # load LiDAR scans 64 | scan_folder = config['scan_folder'] 65 | scan_paths = load_files(scan_folder) 66 | 67 | # test for the first N scans 68 | if num_frames >= len(poses) or num_frames <= 0: 69 | print('generate training data for all frames with number of: ', len(poses)) 70 | else: 71 | poses = poses[:num_frames] 72 | scan_paths = scan_paths[:num_frames] 73 | 74 | range_image_params = config['range_image'] 75 | 76 | # generate residual images for the whole sequence 77 | for frame_idx in tqdm(range(len(scan_paths))): 78 | file_name = os.path.join( 79 | residual_image_folder, str(frame_idx).zfill(6)) 80 | diff_image = np.full((range_image_params['height'], range_image_params['width']), 0, 81 | dtype=np.float32) # [H,W] range (0 is no data) 82 | 83 | # for the first N frame we generate a dummy file 84 | if frame_idx < num_last_n: 85 | np.save(file_name, diff_image) 86 | else: 87 | # load current scan and generate current range image 88 | current_pose = poses[frame_idx] 89 | current_scan = load_vertex(scan_paths[frame_idx]) 90 | current_range = range_projection(current_scan.astype(np.float32), 91 | range_image_params['height'], range_image_params['width'], 92 | range_image_params['fov_up'], range_image_params['fov_down'], 93 | range_image_params['max_range'], range_image_params['min_range'])[:, :, 3] 94 | 95 | # load last scan, transform into the current coord and generate a transformed last range image 96 | last_pose = poses[frame_idx - num_last_n] 97 | last_scan = load_vertex(scan_paths[frame_idx - num_last_n]) 98 | last_scan_transformed = np.linalg.inv( 99 | current_pose).dot(last_pose).dot(last_scan.T).T 100 | last_range_transformed = range_projection(last_scan_transformed.astype(np.float32), 101 | range_image_params['height'], range_image_params['width'], 102 | range_image_params['fov_up'], range_image_params['fov_down'], 103 | range_image_params['max_range'], range_image_params['min_range'])[:, :, 3] 104 | 105 | # generate residual image 106 | valid_mask = (current_range > range_image_params['min_range']) & \ 107 | (current_range < range_image_params['max_range']) & \ 108 | (last_range_transformed > range_image_params['min_range']) & \ 109 | (last_range_transformed < 110 | range_image_params['max_range']) 111 | difference = np.abs( 112 | current_range[valid_mask] - last_range_transformed[valid_mask]) 113 | 114 | if normalize: 115 | difference = np.abs( 116 | current_range[valid_mask] - last_range_transformed[valid_mask]) / current_range[valid_mask] 117 | 118 | diff_image[valid_mask] = difference 119 | 120 | if debug: 121 | fig, axs = plt.subplots(3) 122 | axs[0].imshow(last_range_transformed) 123 | axs[1].imshow(current_range) 124 | axs[2].imshow(diff_image, vmin=0, vmax=1) 125 | plt.show() 126 | 127 | if visualize: 128 | fig = plt.figure(frameon=False, figsize=(16, 10)) 129 | fig.set_size_inches(20.48, 0.64) 130 | ax = plt.Axes(fig, [0., 0., 1., 1.]) 131 | ax.set_axis_off() 132 | fig.add_axes(ax) 133 | ax.imshow(diff_image, vmin=0, vmax=1) 134 | image_name = os.path.join( 135 | visualization_folder, str(frame_idx).zfill(6)) 136 | plt.savefig(image_name) 137 | plt.close() 138 | 139 | # save residual image 140 | np.save(file_name, diff_image) 141 | 142 | 143 | if __name__ == '__main__': 144 | 145 | # load config file 146 | # config_filename = 'config/data_preparing_hesai32.yaml' 147 | config_filename = 'config/data_preparing.yaml' 148 | config = load_yaml(config_filename) 149 | 150 | # used for kitti-raw and kitti-road 151 | for seq in range(0, 10): # sequences id 152 | 153 | for i in range(1, 9): # residual_image_i 154 | 155 | # Update the value in config to facilitate the iterative loop 156 | config['num_last_n'] = i 157 | config['scan_folder'] = f"data/sequences/{'%02d'%seq}/velodyne" 158 | config['pose_file'] = f"data/sequences/{'%02d'%seq}/poses.txt" 159 | config['calib_file'] = f"data/sequences/{'%02d'%seq}/calib.txt" 160 | # config['residual_image_folder'] = f"data/sequences/{'%02d'%seq}/residual_images_{i}" 161 | # config['visualization_folder'] = f"data/sequences/{'%02d'%seq}/visualization_{i}" 162 | config['residual_image_folder'] = f"tmpdata/sequences/{'%02d'%seq}/residual_images_{i}" 163 | config['visualization_folder'] = f"tmpdata/sequences/{'%02d'%seq}/visualization_{i}" 164 | ic(config) 165 | process_one_seq(config) 166 | -------------------------------------------------------------------------------- /utils/np_ioueval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This file is covered by the LICENSE file in the root of this project. 3 | 4 | import sys 5 | import numpy as np 6 | 7 | 8 | class iouEval: 9 | def __init__(self, n_classes, ignore=None): 10 | # classes 11 | self.n_classes = n_classes 12 | 13 | # What to include and ignore from the means 14 | self.ignore = np.array(ignore, dtype=np.int64) 15 | self.include = np.array( 16 | [n for n in range(self.n_classes) if n not in self.ignore], dtype=np.int64) 17 | print("[IOU EVAL] IGNORE: ", self.ignore) 18 | print("[IOU EVAL] INCLUDE: ", self.include) 19 | 20 | # reset the class counters 21 | self.reset() 22 | 23 | def num_classes(self): 24 | return self.n_classes 25 | 26 | def reset(self): 27 | self.conf_matrix = np.zeros( 28 | (self.n_classes, self.n_classes), dtype=np.int64) 29 | 30 | def addBatch(self, x, y): # x=preds, y=targets 31 | # sizes should be matching 32 | x_row = x.reshape(-1) # de-batchify 33 | y_row = y.reshape(-1) # de-batchify 34 | 35 | # check 36 | assert(x_row.shape == x_row.shape) 37 | 38 | # create indexes 39 | idxs = tuple(np.stack((x_row, y_row), axis=0)) 40 | 41 | # make confusion matrix (cols = gt, rows = pred) 42 | np.add.at(self.conf_matrix, idxs, 1) 43 | 44 | def getStats(self): 45 | # remove fp from confusion on the ignore classes cols 46 | conf = self.conf_matrix.copy() 47 | conf[:, self.ignore] = 0 48 | 49 | # get the clean stats 50 | tp = np.diag(conf) 51 | fp = conf.sum(axis=1) - tp 52 | fn = conf.sum(axis=0) - tp 53 | return tp, fp, fn 54 | 55 | def getIoU(self): 56 | tp, fp, fn = self.getStats() 57 | intersection = tp 58 | union = tp + fp + fn + 1e-15 59 | iou = intersection / union 60 | iou_mean = (intersection[self.include] / union[self.include]).mean() 61 | return iou_mean, iou # returns "iou mean", "iou per class" ALL CLASSES 62 | 63 | def getacc(self): 64 | tp, fp, fn = self.getStats() 65 | total_tp = tp.sum() 66 | total = tp[self.include].sum() + fp[self.include].sum() + 1e-15 67 | acc_mean = total_tp / total 68 | return acc_mean # returns "acc mean" 69 | 70 | def get_confusion(self): 71 | return self.conf_matrix.copy() 72 | 73 | 74 | if __name__ == "__main__": 75 | # mock problem 76 | nclasses = 2 77 | ignore = [] 78 | 79 | # test with 2 squares and a known IOU 80 | lbl = np.zeros((7, 7), dtype=np.int64) 81 | argmax = np.zeros((7, 7), dtype=np.int64) 82 | 83 | eval = iouEval(nclasses, ignore) 84 | 85 | eval.addBatch(argmax, lbl) 86 | 87 | # put squares 88 | lbl[2:4, 2:4] = 1 89 | argmax[3:5, 3:5] = 1 90 | 91 | # make evaluator 92 | 93 | # run 94 | eval.addBatch(argmax, lbl) 95 | 96 | m_iou, iou = eval.getIoU() 97 | print("IoU: ", m_iou) 98 | print("IoU class: ", iou) 99 | m_acc = eval.getacc() 100 | print("Acc: ", m_acc) 101 | -------------------------------------------------------------------------------- /utils/nuscenes2kitti.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import click 3 | import numpy as np 4 | import numpy.linalg as la 5 | import nuscenes.utils.geometry_utils as geoutils 6 | from nuscenes import NuScenes 7 | import os 8 | from nuscenes.utils import splits 9 | from PIL import Image 10 | from pyquaternion import Quaternion 11 | 12 | def convert_scenes(dataroot, output_dir, normalize_remission, save_images, mini, trainval=True): 13 | if mini: 14 | nusc = NuScenes(version='v1.0-mini',dataroot=dataroot,verbose=True) 15 | #Get sequence names 16 | scenes = splits.mini_train + splits.mini_val 17 | elif trainval: 18 | nusc = NuScenes(version='v1.0-trainval',dataroot=dataroot,verbose=True) 19 | #Get sequence names 20 | scenes = splits.train + splits.val 21 | else: 22 | nusc = NuScenes(version='v1.0-test',dataroot=dataroot,verbose=True) 23 | scenes = splits.test 24 | 25 | #Create sequences dirs 26 | for scene in scenes: 27 | dirname = scene[6:] 28 | output_seq_dir = os.path.join(output_dir,dirname) 29 | if not os.path.exists(output_seq_dir): 30 | os.makedirs(output_seq_dir, exist_ok=True) 31 | 32 | #Iterate over all scenes (sequences) 33 | for scene in nusc.scene: 34 | #Create directories: sequence, velodyne, labels 35 | dirname = scene['name'][6:] 36 | output_seq_dir = os.path.join(output_dir,dirname) 37 | pose_f = os.path.join(output_seq_dir,'poses.txt') 38 | calib_f = os.path.join(output_seq_dir,'calib.txt') 39 | vel_dir = os.path.join(output_seq_dir,'velodyne') 40 | lab_dir = os.path.join(output_seq_dir,'labels') 41 | if not os.path.exists(output_seq_dir): 42 | os.makedirs(output_seq_dir, exist_ok=True) 43 | if not os.path.exists(vel_dir): 44 | os.makedirs(vel_dir, exist_ok=True) 45 | if not os.path.exists(lab_dir): 46 | os.makedirs(lab_dir, exist_ok=True) 47 | 48 | #Create dummy calib file 49 | calib_file = open(calib_f, "w") 50 | calib_file.write("P0: 1 0 0 0 0 1 0 0 0 0 1 0\n") 51 | calib_file.write("P1: 1 0 0 0 0 1 0 0 0 0 1 0\n") 52 | calib_file.write("P2: 1 0 0 0 0 1 0 0 0 0 1 0\n") 53 | calib_file.write("P3: 1 0 0 0 0 1 0 0 0 0 1 0\n") 54 | calib_file.write("Tr: 1 0 0 0 0 1 0 0 0 0 1 0\n") 55 | calib_file.close() 56 | 57 | next_sample = scene['first_sample_token'] 58 | 59 | lidar_filenames = [] 60 | poses = [] 61 | files_mapping = [] 62 | lidar_tokens = [] 63 | 64 | #Iterate over all samples (scans) in the sequence 65 | while next_sample != '': 66 | #Current sample data 67 | sample = nusc.get('sample',next_sample) 68 | #Get token for the next sample 69 | next_sample = sample['next'] 70 | 71 | #Get lidar, semantic and panoptic filenames 72 | lidar_token = sample['data']['LIDAR_TOP'] 73 | lidar_data = nusc.get('sample_data', lidar_token) 74 | scan = np.fromfile(os.path.join(dataroot, lidar_data["filename"]), dtype=np.float32) 75 | #Save scan 76 | points = scan.reshape((-1, 5))[:, :4] 77 | if normalize_remission: 78 | # ensure that remission is in [0,1] 79 | max_remission = np.max(points[:, 3]) 80 | min_remission = np.min(points[:, 3]) 81 | points[:, 3] = (points[:, 3] - min_remission) / (max_remission - min_remission) 82 | #velodyne bin file 83 | output_filename = os.path.join(vel_dir, "{:06d}.bin".format(len(lidar_filenames))) 84 | points.tofile(output_filename) 85 | #Compute pose 86 | calib_data = nusc.get("calibrated_sensor", lidar_data["calibrated_sensor_token"]) 87 | egopose_data = nusc.get('ego_pose', lidar_data["ego_pose_token"]) 88 | car_to_velo = geoutils.transform_matrix(calib_data["translation"], Quaternion(calib_data['rotation'])) 89 | pose_car = geoutils.transform_matrix(egopose_data["translation"], Quaternion(egopose_data['rotation'])) 90 | pose = np.dot(pose_car, car_to_velo) 91 | poses.append(pose) 92 | 93 | #Compute labels for train and val scenes 94 | if trainval: 95 | sem_lab_f = nusc.get('lidarseg',lidar_token)['filename'] 96 | sem_lab = np.fromfile(os.path.join(dataroot,sem_lab_f),dtype=np.uint8) 97 | pan_lab_f = nusc.get('panoptic',lidar_token)['filename'] 98 | pan_lab = np.load(os.path.join(dataroot,pan_lab_f))['data'] 99 | #sem labels from panoptic labels 100 | sem_lab2 = (pan_lab // 1000).astype(np.uint8) 101 | #ins labels from panoptic labels 102 | ins_lab = pan_lab % 1000 103 | #Kitti style panoptic labels 104 | panoptic_labels = sem_lab.reshape(-1, 1) + ((ins_lab.astype(np.uint32) << 16) & 0xFFFF0000).reshape(-1, 1) 105 | 106 | #Save labels 107 | lab_output_filename = os.path.join(lab_dir, "{:06d}.label".format(len(lidar_filenames))) 108 | panoptic_labels.tofile(lab_output_filename) 109 | 110 | #Keep list of filenames and tokens 111 | files_mapping.append(lidar_data["filename"]) 112 | lidar_filenames.append(os.path.join(dataroot, lidar_data["filename"])) 113 | lidar_tokens.append(lidar_token) 114 | 115 | #Create pose file 116 | ref = la.inv(poses[0]) 117 | pose_file = open(pose_f, "w") 118 | for pose in poses: 119 | pose_str = [str(v) for v in (np.dot(ref, pose))[:3,:4].flatten()] 120 | pose_file.write(" ".join(pose_str)) 121 | pose_file.write("\n") 122 | pose_file.close() 123 | 124 | #Save filenames and tokens for each point cloud 125 | files_mapping_f = os.path.join(output_seq_dir,'files_mapping.txt') 126 | files_mapping_file = open(files_mapping_f, "w") 127 | for f in files_mapping: 128 | files_mapping_file.write(os.path.join(dataroot,f)) 129 | files_mapping_file.write("\n") 130 | files_mapping_file.close() 131 | 132 | lidar_tokens_f = os.path.join(output_seq_dir,'lidar_tokens.txt') 133 | lidar_tokens_file = open(lidar_tokens_f, "w") 134 | for token in lidar_tokens: 135 | lidar_tokens_file.write(token) 136 | lidar_tokens_file.write("\n") 137 | lidar_tokens_file.close() 138 | 139 | if save_images: 140 | image_dir = os.path.join(output_seq_dir, "image_2/") 141 | if not os.path.exists(image_dir): 142 | os.makedirs(image_dir, exist_ok=True) 143 | 144 | next_image = nusc.get('sample', scene["first_sample_token"])["data"]["CAM_FRONT"] 145 | original = [] 146 | image_filenames = [] 147 | # todo: get relative pose to velodyne. 148 | # poses = [] 149 | while next_image != "": 150 | image_data = nusc.get('sample_data', next_image) 151 | output_filename = os.path.join(image_dir, "{:06d}.png".format(len(image_filenames))) 152 | image = Image.open(os.path.join(dataroot, image_data["filename"])) # open jpg. 153 | image.save(output_filename) # and save as png. 154 | original.append(("{:05d}.png".format(len(image_filenames)), image_data["filename"])) 155 | image_filenames.append(os.path.join(dataroot, image_data["filename"])) 156 | next_image = image_data["next"] 157 | 158 | original_file = open(os.path.join(output_seq_dir, "original_images_2.txt"), "w") 159 | for pair in original: original_file.write(pair[0] + ":" + pair[1] + "\n") 160 | original_file.close() 161 | 162 | @click.command() 163 | @click.option('--nuscenes_dir', 164 | type=str, 165 | default=None, 166 | required=True, 167 | help='dataroot directory of nuscenes dataset') 168 | @click.option('--output_dir', 169 | type=str, 170 | default=None, 171 | required=True, 172 | help='directory where to save the sequences') 173 | @click.option('--normalize_remission', is_flag=True, help='normalize remission values in range [0,1]') 174 | @click.option('--mini', is_flag=True, help='convert only mini set') 175 | @click.option('--save_images', is_flag=True, help='save front camera images') 176 | def main(nuscenes_dir, output_dir, normalize_remission, mini, save_images): 177 | if mini: 178 | convert_scenes(nuscenes_dir, output_dir, normalize_remission, save_images, mini) 179 | else: 180 | convert_scenes(nuscenes_dir, output_dir, normalize_remission, save_images, mini) 181 | convert_scenes(nuscenes_dir, output_dir, normalize_remission, save_images, mini, trainval=False) 182 | 183 | if __name__ == "__main__": 184 | main() -------------------------------------------------------------------------------- /utils/torch_ioueval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This file is covered by the LICENSE file in the root of this project. 3 | 4 | import sys 5 | import torch 6 | import numpy as np 7 | 8 | 9 | class iouEval: 10 | def __init__(self, n_classes, ignore=None): 11 | # classes 12 | self.n_classes = n_classes 13 | 14 | # What to include and ignore from the means 15 | self.ignore = torch.tensor(ignore).long() 16 | self.include = torch.tensor( 17 | [n for n in range(self.n_classes) if n not in self.ignore]).long() 18 | print("[IOU EVAL] IGNORE: ", self.ignore) 19 | print("[IOU EVAL] INCLUDE: ", self.include) 20 | 21 | # get device 22 | self.device = torch.device('cpu') 23 | if torch.cuda.is_available(): 24 | self.device = torch.device('cuda') 25 | 26 | # reset the class counters 27 | self.reset() 28 | 29 | def num_classes(self): 30 | return self.n_classes 31 | 32 | def reset(self): 33 | self.conf_matrix = torch.zeros( 34 | (self.n_classes, self.n_classes), device=self.device).long() 35 | 36 | def addBatch(self, x, y): # x=preds, y=targets 37 | # to tensor 38 | x_row = torch.from_numpy(x).to(self.device).long() 39 | y_row = torch.from_numpy(y).to(self.device).long() 40 | 41 | # sizes should be matching 42 | x_row = x_row.reshape(-1) # de-batchify 43 | y_row = y_row.reshape(-1) # de-batchify 44 | 45 | # check 46 | assert(x_row.shape == x_row.shape) 47 | 48 | # idxs are labels and predictions 49 | idxs = torch.stack([x_row, y_row], dim=0) 50 | 51 | # ones is what I want to add to conf when I 52 | ones = torch.ones((idxs.shape[-1]), device=self.device).long() 53 | 54 | # make confusion matrix (cols = gt, rows = pred) 55 | self.conf_matrix = self.conf_matrix.index_put_( 56 | tuple(idxs), ones, accumulate=True) 57 | 58 | def getStats(self): 59 | # remove fp from confusion on the ignore classes cols 60 | conf = self.conf_matrix.clone().double() 61 | conf[:, self.ignore] = 0 62 | 63 | # get the clean stats 64 | tp = conf.diag() 65 | fp = conf.sum(dim=1) - tp 66 | fn = conf.sum(dim=0) - tp 67 | return tp, fp, fn 68 | 69 | def getIoU(self): 70 | tp, fp, fn = self.getStats() 71 | intersection = tp 72 | union = tp + fp + fn + 1e-15 73 | iou = intersection / union 74 | iou_mean = (intersection[self.include] / union[self.include]).mean() 75 | return iou_mean, iou # returns "iou mean", "iou per class" ALL CLASSES 76 | 77 | def getacc(self): 78 | tp, fp, fn = self.getStats() 79 | total_tp = tp.sum() 80 | total = tp[self.include].sum() + fp[self.include].sum() + 1e-15 81 | acc_mean = total_tp / total 82 | return acc_mean # returns "acc mean" 83 | 84 | 85 | if __name__ == "__main__": 86 | # mock problem 87 | nclasses = 2 88 | ignore = [] 89 | 90 | # test with 2 squares and a known IOU 91 | lbl = np.zeros((7, 7), dtype=np.int64) 92 | argmax = np.zeros((7, 7), dtype=np.int64) 93 | 94 | # put squares 95 | lbl[2:4, 2:4] = 1 96 | argmax[3:5, 3:5] = 1 97 | 98 | # make evaluator 99 | eval = iouEval(nclasses, ignore) 100 | 101 | # run 102 | eval.addBatch(argmax, lbl) 103 | m_iou, iou = eval.getIoU() 104 | print("IoU: ", m_iou) 105 | print("IoU class: ", iou) 106 | m_acc = eval.getacc() 107 | print("Acc: ", m_acc) 108 | -------------------------------------------------------------------------------- /utils/validate_submission.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | import argparse 5 | import json 6 | import os 7 | import shutil 8 | 9 | import numpy as np 10 | from tqdm import tqdm 11 | 12 | from nuscenes import NuScenes 13 | from nuscenes.eval.lidarseg.utils import LidarsegClassMapper, get_samples_in_eval_set 14 | from nuscenes.utils.data_classes import LidarPointCloud 15 | 16 | 17 | def validate_submission(nusc: NuScenes, 18 | results_folder: str, 19 | eval_set: str, 20 | verbose: bool = False, 21 | zip_out: str = None) -> None: 22 | """ 23 | Checks if a results folder is valid. The following checks are performed: 24 | - Check that the submission folder is according to that described in 25 | https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/eval/lidarseg/README.md 26 | - Check that the submission.json is of the following structure: 27 | {"meta": {"use_camera": false, 28 | "use_lidar": true, 29 | "use_radar": false, 30 | "use_map": false, 31 | "use_external": false}} 32 | - Check that each each lidar sample data in the evaluation set is present and valid. 33 | 34 | :param nusc: A NuScenes object. 35 | :param results_folder: Path to the folder. 36 | :param eval_set: The dataset split to evaluate on, e.g. train, val or test. 37 | :param verbose: Whether to print messages during the evaluation. 38 | :param zip_out: Path to zip results_folder to, if provided. 39 | """ 40 | mapper = LidarsegClassMapper(nusc) 41 | num_classes = len(mapper.coarse_name_2_coarse_idx_mapping) 42 | 43 | if verbose: 44 | print('Checking if folder structure of {} is correct...'.format(results_folder)) 45 | 46 | # Check that {results_folder}/{eval_set} exists. 47 | results_meta_folder = os.path.join(results_folder, eval_set) 48 | assert os.path.exists(results_meta_folder), \ 49 | 'Error: The folder containing the submission.json ({}) does not exist.'.format(results_meta_folder) 50 | 51 | # Check that {results_folder}/{eval_set}/submission.json exists. 52 | submisson_json_path = os.path.join(results_meta_folder, 'submission.json') 53 | assert os.path.exists(submisson_json_path), \ 54 | 'Error: submission.json ({}) does not exist.'.format(submisson_json_path) 55 | 56 | # Check that {results_folder}/lidarseg/{eval_set} exists. 57 | results_bin_folder = os.path.join(results_folder, 'lidarseg', eval_set) 58 | assert os.path.exists(results_bin_folder), \ 59 | 'Error: The folder containing the .bin files ({}) does not exist.'.format(results_bin_folder) 60 | 61 | if verbose: 62 | print('\tPassed.') 63 | 64 | if verbose: 65 | print('Checking contents of {}...'.format(submisson_json_path)) 66 | 67 | with open(submisson_json_path) as f: 68 | submission_meta = json.load(f) 69 | valid_meta = {"use_camera", "use_lidar", "use_radar", "use_map", "use_external"} 70 | assert valid_meta == set(submission_meta['meta'].keys()), \ 71 | '{} must contain {}.'.format(submisson_json_path, valid_meta) 72 | for meta_key in valid_meta: 73 | meta_key_type = type(submission_meta['meta'][meta_key]) 74 | assert meta_key_type == bool, 'Error: Value for {} should be bool, not {}.'.format(meta_key, meta_key_type) 75 | 76 | if verbose: 77 | print('\tPassed.') 78 | 79 | if verbose: 80 | print('Checking if all .bin files for {} exist and are valid...'.format(eval_set)) 81 | sample_tokens = get_samples_in_eval_set(nusc, eval_set) 82 | for sample_token in tqdm(sample_tokens, disable=not verbose): 83 | sample = nusc.get('sample', sample_token) 84 | 85 | # Get the sample data token of the point cloud. 86 | sd_token = sample['data']['LIDAR_TOP'] 87 | 88 | # Load the predictions for the point cloud. 89 | lidarseg_pred_filename = os.path.join(results_bin_folder, sd_token + '_lidarseg.bin') 90 | assert os.path.exists(lidarseg_pred_filename), \ 91 | 'Error: The prediction .bin file {} does not exist.'.format(lidarseg_pred_filename) 92 | lidarseg_pred = np.fromfile(lidarseg_pred_filename, dtype=np.uint8) 93 | 94 | # Check number of predictions for the point cloud. 95 | if len(nusc.lidarseg) > 0: # If ground truth exists, compare the no. of predictions with that of ground truth. 96 | lidarseg_label_filename = os.path.join(nusc.dataroot, nusc.get('lidarseg', sd_token)['filename']) 97 | assert os.path.exists(lidarseg_label_filename), \ 98 | 'Error: The ground truth .bin file {} does not exist.'.format(lidarseg_label_filename) 99 | lidarseg_label = np.fromfile(lidarseg_label_filename, dtype=np.uint8) 100 | num_points = len(lidarseg_label) 101 | else: # If no ground truth is available, compare the no. of predictions with that of points in a point cloud. 102 | pointsensor = nusc.get('sample_data', sd_token) 103 | pcl_path = os.path.join(nusc.dataroot, pointsensor['filename']) 104 | pc = LidarPointCloud.from_file(pcl_path) 105 | points = pc.points 106 | num_points = points.shape[1] 107 | 108 | assert num_points == len(lidarseg_pred), \ 109 | 'Error: There are {} predictions for lidar sample data token {} ' \ 110 | 'but there are only {} points in the point cloud.'\ 111 | .format(len(lidarseg_pred), sd_token, num_points) 112 | 113 | assert all((lidarseg_pred > 0) & (lidarseg_pred < num_classes)), \ 114 | "Error: Array for predictions in {} must be between 1 and {} (inclusive)."\ 115 | .format(lidarseg_pred_filename, num_classes - 1) 116 | 117 | if verbose: 118 | print('\tPassed.') 119 | 120 | if verbose: 121 | print('Results folder {} successfully validated!'.format(results_folder)) 122 | 123 | # Zip up results folder if desired. 124 | if zip_out: 125 | assert os.path.exists(zip_out), \ 126 | 'Error: The folder {} to zip the results to does not exist.'.format(zip_out) 127 | 128 | results_zip = os.path.join(zip_out, os.path.basename(os.path.normpath(results_folder))) 129 | results_zip_name = shutil.make_archive(results_zip, 'zip', results_folder) 130 | if verbose: 131 | print('Results folder {} zipped to {}'.format(results_folder, results_zip_name)) 132 | 133 | 134 | if __name__ == '__main__': 135 | # Settings. 136 | parser = argparse.ArgumentParser(description='Check if a results folder is valid.') 137 | parser.add_argument('--result_path', type=str, 138 | help='The path to the results folder.') 139 | parser.add_argument('--eval_set', type=str, default='val', 140 | help='Which dataset split to evaluate on, train, val or test.') 141 | parser.add_argument('--dataroot', type=str, default='/data/sets/nuscenes', 142 | help='Default nuScenes data directory.') 143 | parser.add_argument('--version', type=str, default='v1.0-trainval', 144 | help='Which version of the nuScenes dataset to evaluate on, e.g. v1.0-trainval.') 145 | parser.add_argument('--verbose', type=bool, default=False, 146 | help='Whether to print to stdout.') 147 | parser.add_argument('--zip_out', type=str, default=None, 148 | help='Path to zip the results folder to.') 149 | args = parser.parse_args() 150 | 151 | result_path_ = args.result_path 152 | eval_set_ = args.eval_set 153 | dataroot_ = args.dataroot 154 | version_ = args.version 155 | verbose_ = args.verbose 156 | zip_out_ = args.zip_out 157 | 158 | nusc_ = NuScenes(version=version_, dataroot=dataroot_, verbose=verbose_) 159 | validate_submission(nusc=nusc_, 160 | results_folder=result_path_, 161 | eval_set=eval_set_, 162 | verbose=verbose_, 163 | zip_out=zip_out_) -------------------------------------------------------------------------------- /visualize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This file is covered by the LICENSE file in the root of this project. 3 | 4 | import argparse 5 | import os 6 | import yaml 7 | 8 | 9 | if __name__ == '__main__': 10 | parser = argparse.ArgumentParser("./visualize.py") 11 | parser.add_argument( 12 | '--what', '-w', 13 | type=str, 14 | required=True, 15 | help='Dataset to visualize. No Default', 16 | ) 17 | parser.add_argument( 18 | '--dataset', '-d', 19 | type=str, 20 | required=True, 21 | help='Dataset to visualize. No Default', 22 | ) 23 | parser.add_argument( 24 | '--sequence', '-s', 25 | type=str, 26 | default="00", 27 | required=False, 28 | help='Sequence to visualize. Defaults to %(default)s', 29 | ) 30 | parser.add_argument( 31 | '--predictions', '-p', 32 | type=str, 33 | default=None, 34 | required=False, 35 | help='Alternate location for labels, to use predictions folder. ' 36 | 'Must point to directory containing the predictions in the proper format ' 37 | ' (see readme)' 38 | 'Defaults to %(default)s', 39 | ) 40 | parser.add_argument( 41 | '--ignore_semantics', '-i', 42 | dest='ignore_semantics', 43 | default=False, 44 | action='store_true', 45 | help='Ignore semantics. Visualizes uncolored pointclouds.' 46 | 'Defaults to %(default)s', 47 | ) 48 | parser.add_argument( 49 | '--offset', 50 | type=int, 51 | default=0, 52 | required=False, 53 | help='Sequence to start. Defaults to %(default)s', 54 | ) 55 | parser.add_argument( 56 | '--ignore_safety', 57 | dest='ignore_safety', 58 | default=False, 59 | action='store_true', 60 | help='Normally you want the number of labels and ptcls to be the same,' 61 | ', but if you are not done inferring this is not the case, so this disables' 62 | ' that safety.' 63 | 'Defaults to %(default)s', 64 | ) 65 | FLAGS, unparsed = parser.parse_known_args() 66 | 67 | # print summary of what we will do 68 | print("*" * 80) 69 | print("INTERFACE:") 70 | print('Dataset Type', FLAGS.what) 71 | print("Dataset", FLAGS.dataset) 72 | print("Sequence", FLAGS.sequence) 73 | print("Predictions", FLAGS.predictions) 74 | print("ignore_semantics", FLAGS.ignore_semantics) 75 | print("ignore_safety", FLAGS.ignore_safety) 76 | print("offset", FLAGS.offset) 77 | print("*" * 80) 78 | 79 | if FLAGS.what == "kitti": 80 | from common.laserscan import LaserScan, SemLaserScan 81 | from common.laserscanvis import LaserScanVis 82 | elif FLAGS.what == "poss": 83 | from common.posslaserscan import LaserScan, SemLaserScan 84 | from common.posslaserscanvis import LaserScanVis 85 | else: 86 | raise TypeError( 87 | "This type dataset doesn't exist (use kitti or poss)! Exiting...") 88 | 89 | # open config file 90 | try: 91 | if FLAGS.what == "kitti": 92 | print("Opening config file of KITTI") 93 | CFG = yaml.safe_load( 94 | open('config/labels/semantic-kitti.yaml', 'r')) 95 | elif FLAGS.what == "poss": 96 | print("Opening config file of POSS") 97 | CFG = yaml.safe_load(open('config/labels/semantic-poss.yaml', 'r')) 98 | else: 99 | raise TypeError( 100 | "This type dataset doesn't exist (use kitti or poss)! Exiting...") 101 | 102 | except Exception as e: 103 | raise TypeError("Error opening yaml file.") 104 | 105 | # fix sequence name 106 | FLAGS.sequence = '{0:02d}'.format(int(FLAGS.sequence)) 107 | 108 | # does sequence folder exist? 109 | scan_paths = os.path.join(FLAGS.dataset, "sequences", 110 | FLAGS.sequence, "velodyne") 111 | 112 | if os.path.isdir(scan_paths): 113 | print("Sequence folder exists! Using sequence from %s" % scan_paths) 114 | else: 115 | raise TypeError( 116 | "Sequence folder doesn't exist from %s! Exiting..." % scan_paths) 117 | 118 | # populate the pointclouds 119 | scan_names = [os.path.join(dp, f) for dp, dn, fn in os.walk( 120 | os.path.expanduser(scan_paths)) for f in fn] 121 | scan_names.sort() 122 | 123 | if FLAGS.what == "poss": 124 | tag_paths = os.path.join(FLAGS.dataset, "sequences", 125 | FLAGS.sequence, "tag") 126 | tag_names = [os.path.join(dp, f) for dp, dn, fn in os.walk( 127 | os.path.expanduser(tag_paths)) for f in fn] 128 | tag_names.sort() 129 | 130 | # does sequence folder exist? 131 | if not FLAGS.ignore_semantics: 132 | if FLAGS.predictions is not None: 133 | label_paths = os.path.join(FLAGS.predictions, "sequences", 134 | FLAGS.sequence, "predictions") 135 | else: 136 | label_paths = os.path.join(FLAGS.dataset, "sequences", 137 | FLAGS.sequence, "labels") 138 | if os.path.isdir(label_paths): 139 | print("Labels folder exists! Using labels from %s" % label_paths) 140 | else: 141 | raise TypeError( 142 | "Labels folder doesn't exist from %s ! Exiting..." % label_paths) 143 | 144 | # populate the pointclouds 145 | label_names = [os.path.join(dp, f) for dp, dn, fn in os.walk( 146 | os.path.expanduser(label_paths)) for f in fn] 147 | label_names.sort() 148 | 149 | # check that there are same amount of labels and scans 150 | if not FLAGS.ignore_safety: 151 | assert (len(label_names) == len(scan_names)) 152 | 153 | # create a scan 154 | if FLAGS.ignore_semantics: 155 | # project all opened scans to spheric proj 156 | scan = LaserScan(project=True) 157 | else: 158 | color_dict = CFG["color_map"] 159 | scan = SemLaserScan(color_dict, project=True) 160 | 161 | # create a visualizer 162 | semantics = not FLAGS.ignore_semantics 163 | if not semantics: 164 | label_names = None 165 | if FLAGS.what == "kitti": 166 | vis = LaserScanVis(scan=scan, 167 | scan_names=scan_names, 168 | label_names=label_names, 169 | offset=FLAGS.offset, 170 | semantics=semantics, 171 | instances=False) 172 | elif FLAGS.what == "poss": 173 | vis = LaserScanVis(scan=scan, 174 | scan_names=scan_names, 175 | tag_names=tag_names, 176 | label_names=label_names, 177 | offset=FLAGS.offset, 178 | semantics=semantics) 179 | # print instructions 180 | print("To navigate:") 181 | print("\tb: back (previous scan)") 182 | print("\tn: next (next scan)") 183 | print("\tq: quit (exit program)") 184 | 185 | # run the visualizer 186 | vis.run() 187 | --------------------------------------------------------------------------------