├── .gitignore
├── LICENSE
├── README.md
├── configs
    ├── deeplab_cityscapes_seg.yml
    ├── deeplab_kitti_depth.yml
    ├── deeplab_kitti_seg.yml
    ├── dispnet_cityscapes_seg.yml
    ├── dispnet_kitti_depth.yml
    ├── dispnet_kitti_seg.yml
    ├── fcn_cityscapes_seg.yml
    ├── fcn_kitti_depth.yml
    ├── fcn_kitti_seg.yml
    ├── fcrn_cityscapes_seg.yml
    ├── fcrn_kitti_depth.yml
    ├── fcrn_kitti_seg.yml
    ├── frrnA_cityscapes_seg.yml
    ├── frrnA_kitti_depth.yml
    ├── frrnA_kitti_seg.yml
    ├── segnet_cityscapes_seg.yml
    ├── segnet_kitti_depth.yml
    └── segnet_kitti_seg.yml
├── demo_depth.py
├── demo_saliency.py
├── demo_seg.py
├── fcrn_metric_0.png
├── kitti_depth_eval
    ├── __pycache__
    │   └── depth_evaluation_utils.cpython-36.pyc
    ├── depth_evaluation_utils.py
    └── test_files_eigen.txt
├── kitti_train_depth_prepare
    ├── __pycache__
    │   └── kitti_raw_loader.cpython-36.pyc
    ├── kitti_raw_loader.py
    ├── prepare_train_data.py
    ├── static_frames.txt
    └── test_scenes.txt
├── output_predict_img
    ├── deeplab_output_depth.png
    ├── deeplab_output_seg.png
    ├── dispnet_output_depth.png
    ├── dispnet_output_seg.png
    ├── fcn_output_depth.png
    ├── fcn_output_seg.png
    ├── fcrn_output_depth.png
    ├── fcrn_output_seg.png
    ├── frrn_output_depth.png
    ├── frrn_output_seg.png
    ├── segnet_output_depth.png
    └── segnet_output_seg.png
├── ptsemseg
    ├── augmentations
    │   ├── __init__.py
    │   └── augmentations.py
    ├── caffe_pb2.py
    ├── loader
    │   ├── __init__.py
    │   ├── cityscapes_loader_depth.py
    │   ├── cityscapes_loader_seg.py
    │   ├── kitti_loader_depth.py
    │   └── kitti_loader_seg.py
    ├── loss
    │   ├── __init__.py
    │   └── loss.py
    ├── metrics.py
    ├── models
    │   ├── __init__.py
    │   ├── deeplab_depth.py
    │   ├── deeplab_seg.py
    │   ├── dispnet_depth.py
    │   ├── dispnet_seg.py
    │   ├── fcn_depth.py
    │   ├── fcn_seg.py
    │   ├── fcrn_depth.py
    │   ├── fcrn_seg.py
    │   ├── frrn_depth.py
    │   ├── frrn_seg.py
    │   ├── segnet_depth.py
    │   ├── segnet_seg.py
    │   └── utils.py
    ├── optimizers
    │   └── __init__.py
    ├── schedulers
    │   ├── __init__.py
    │   └── schedulers.py
    └── utils.py
├── requirements.txt
├── saliency.py
├── saliency_analysis.py
├── saliency_class_val.py
├── saliency_eval.py
├── saliency_iou.py
├── saliency_results
    ├── BP_saliency_map_fcrn_seg.png
    └── image_pixel_locate_0.png
├── test_depth_cityscapes.py
├── test_depth_kitti.py
├── test_seg_cityscapes.py
├── train.py
└── validate_seg.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 sanweiliti
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/configs/deeplab_cityscapes_seg.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: deeplab
 3 | task: seg
 4 | data:
 5 |     dataset: cityscapes
 6 |     train_split: train
 7 |     val_split: val
 8 |     img_rows: 512
 9 |     img_cols: 1024
10 |     img_norm: True
11 | #    version: cityscapes
12 |     path: ../pytorch-semseg/datasets/cityscapes
13 | 
14 | training:
15 |     train_iters: 200000
16 |     batch_size: 2
17 |     val_interval: 1500
18 |     n_workers: 2
19 |     print_interval: 100
20 |     optimizer:
21 |         name: 'adam'
22 |         lr: 1.0e-4
23 |     loss:
24 |         name: 'cross_entropy'
25 |         size_average: True
26 | #    augmentations:
27 | #        rcrop: [256, 512]
28 |     lr_schedule:
29 |     resume:
30 | 


--------------------------------------------------------------------------------
/configs/deeplab_kitti_depth.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: deeplab
 3 | task: depth
 4 | data:
 5 |     dataset: kitti
 6 |     train_split: train
 7 |     val_split: val
 8 |     img_rows: 128
 9 |     img_cols: 416
10 |     img_norm: True
11 |     path: prepared_kitti_train_data
12 | 
13 | training:
14 |     train_iters: 100000000
15 |     batch_size: 4
16 |     val_interval: 2000
17 |     n_workers: 2
18 |     print_interval: 500
19 |     optimizer:
20 |         name: 'adam'
21 |         lr: 1.0e-4
22 |     loss:
23 |         name: 'scale_invariant_loss'
24 |         smooth: True
25 |     lr_schedule:
26 |     resume:
27 | 


--------------------------------------------------------------------------------
/configs/deeplab_kitti_seg.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: deeplab
 3 | task: seg
 4 | data:
 5 |     dataset: kitti
 6 |     train_split: train
 7 |     val_split: val
 8 |     img_rows: 256
 9 |     img_cols: 832
10 |     img_norm: True
11 | #    version: cityscapes
12 |     path: ../pytorch-semseg/datasets/kitti/semantics
13 | 
14 | training:
15 |     train_iters: 10000000
16 |     batch_size: 4
17 |     val_interval: 40
18 |     n_workers: 2
19 |     print_interval: 10
20 |     optimizer:
21 |         name: 'adam'
22 |         lr: 1.0e-5
23 |     loss:
24 |         name: 'cross_entropy'
25 |         size_average: True
26 | #    augmentations:
27 | #        rcrop: [256, 512]
28 |     lr_schedule:
29 |     resume: ../pytorch-semseg/runs/deeplab_cityscapes/11044/deeplab_cityscapes_best_model.pkl
30 | 


--------------------------------------------------------------------------------
/configs/dispnet_cityscapes_seg.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: dispnet
 3 | task: seg
 4 | data:
 5 |     dataset: cityscapes
 6 |     train_split: train
 7 |     val_split: val
 8 |     img_rows: 512
 9 |     img_cols: 1024
10 |     img_norm: True
11 | #    version: cityscapes
12 |     path: ../pytorch-semseg/datasets/cityscapes
13 | 
14 | training:
15 |     train_iters: 200000
16 |     batch_size: 4
17 |     val_interval: 750
18 |     n_workers: 2
19 |     print_interval: 150
20 |     optimizer:
21 |         name: 'adam'
22 |         lr: 1.0e-4
23 |     loss:
24 |         name: 'cross_entropy'
25 |         size_average: True
26 |     lr_schedule:
27 |     resume:
28 | 


--------------------------------------------------------------------------------
/configs/dispnet_kitti_depth.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: dispnet
 3 | task: depth
 4 | 
 5 | data:
 6 |     dataset: kitti
 7 |     train_split: train
 8 |     val_split: val
 9 |     test_split: test
10 |     img_rows: 128
11 |     img_cols: 416
12 |     img_norm: True
13 |     path: prepared_kitti_train_data
14 | 
15 | training:
16 |     train_iters: 1000000
17 |     batch_size: 4
18 |     val_interval: 2000
19 |     n_workers: 2
20 |     print_interval: 500
21 |     optimizer:
22 |         name: 'adam'
23 |         lr: 1.0e-4
24 |     loss:
25 |         name: 'scale_invariant_loss'
26 |         smooth: True
27 |     lr_schedule:
28 |     resume:
29 | 


--------------------------------------------------------------------------------
/configs/dispnet_kitti_seg.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: dispnet
 3 | task: seg
 4 | data:
 5 |     dataset: kitti
 6 |     train_split: train
 7 |     val_split: val
 8 |     img_rows: 256
 9 |     img_cols: 832
10 |     img_norm: True
11 | #    version: cityscapes
12 |     path: ../pytorch-semseg/datasets/kitti/semantics
13 | 
14 | training:
15 |     train_iters: 10000000
16 |     batch_size: 4
17 |     val_interval: 40
18 |     n_workers: 2
19 |     print_interval: 10
20 |     optimizer:
21 |         name: 'adam'
22 |         lr: 1.0e-5
23 |     loss:
24 |         name: 'cross_entropy'
25 |         size_average: True
26 |     lr_schedule:
27 |     resume:
28 | 


--------------------------------------------------------------------------------
/configs/fcn_cityscapes_seg.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: fcn
 3 | task: seg
 4 | data:
 5 |     dataset: cityscapes
 6 |     train_split: train
 7 |     val_split: val
 8 | #    test_split: test
 9 |     img_rows: 512
10 |     img_cols: 1024
11 |     img_norm: True
12 | #    version: cityscapes
13 |     path: ../pytorch-semseg/datasets/cityscapes
14 | training:
15 |     train_iters: 3000000
16 |     batch_size: 2     # for test, batch_size === 1
17 |     val_interval: 750
18 |     n_workers: 2
19 |     print_interval: 150
20 |     optimizer:
21 |         name: 'adam'
22 |         lr: 1.0e-4
23 |         #weight_decay: 0.0005
24 |         #momentum: 0.9
25 |     loss:
26 |         name: 'cross_entropy'
27 |         size_average: True
28 |     lr_schedule:
29 |     resume:
30 | #testing:
31 | #    trained_model: ./runs/fcn8s_cityscapes/67739_downsample1_batchSize1_withModel/fcn8s_cityscapes_best_model.pkl
32 | 


--------------------------------------------------------------------------------
/configs/fcn_kitti_depth.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: fcn
 3 | task: depth
 4 | data:
 5 |     dataset: kitti
 6 |     train_split: train
 7 |     val_split: val
 8 |     img_rows: 128
 9 |     img_cols: 416
10 |     img_norm: True
11 |     path: prepared_kitti_train_data
12 | 
13 | training:
14 |     train_iters: 100000000
15 |     batch_size: 4
16 |     val_interval: 2000
17 |     n_workers: 2
18 |     print_interval: 1
19 |     optimizer:
20 |         name: 'adam'
21 |         lr: 1.0e-4
22 |     loss:
23 |         name: 'scale_invariant_loss'
24 |         smooth: True
25 |     lr_schedule:
26 |     resume:
27 | 


--------------------------------------------------------------------------------
/configs/fcn_kitti_seg.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: fcn
 3 | task: seg
 4 | data:
 5 |     dataset: kitti
 6 |     train_split: train
 7 |     val_split: val
 8 |     img_rows: 256
 9 |     img_cols: 832
10 |     img_norm: True
11 | #    version: cityscapes
12 |     path: ../pytorch-semseg/datasets/kitti/semantics
13 | 
14 | training:
15 |     train_iters: 10000000
16 |     batch_size: 4
17 |     val_interval: 40
18 |     n_workers: 2
19 |     print_interval: 10
20 |     optimizer:
21 |         name: 'adam'
22 |         lr: 1.0e-5
23 |     loss:
24 |         name: 'cross_entropy'
25 |         size_average: True
26 |     lr_schedule:
27 |     resume: runs/fcn8s_cityscapes/37333/fcn8s_cityscapes_best_model.pkl
28 | 


--------------------------------------------------------------------------------
/configs/fcrn_cityscapes_seg.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: fcrn
 3 | task: seg
 4 | data:
 5 |     dataset: cityscapes
 6 |     train_split: train
 7 |     val_split: val
 8 |     img_rows: 512
 9 |     img_cols: 1024
10 |     img_norm: True
11 | #    version: cityscapes
12 |     path: datasets/cityscapes
13 | 
14 | training:
15 |     train_iters: 200000
16 |     batch_size: 4
17 |     val_interval: 750
18 |     n_workers: 2
19 |     print_interval: 150
20 |     optimizer:
21 |         name: 'adam'
22 |         lr: 1.0e-4
23 |     loss:
24 |         name: 'cross_entropy'
25 |         size_average: True
26 |     lr_schedule:
27 |     resume:
28 | 


--------------------------------------------------------------------------------
/configs/fcrn_kitti_depth.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: fcrn
 3 | task: depth
 4 | data:
 5 |     dataset: kitti
 6 |     train_split: train
 7 |     val_split: val
 8 |     img_rows: 128
 9 |     img_cols: 416
10 |     img_norm: True
11 |     path: prepared_kitti_train_data
12 | 
13 | training:
14 |     train_iters: 100000000
15 |     batch_size: 4
16 |     val_interval: 2000
17 |     n_workers: 2
18 |     print_interval: 1
19 |     optimizer:
20 |         name: 'adam'
21 |         lr: 1.0e-4
22 |     loss:
23 |         name: 'scale_invariant_loss'
24 |         smooth: True
25 |     lr_schedule:
26 |     resume:
27 | 


--------------------------------------------------------------------------------
/configs/fcrn_kitti_seg.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: fcrn
 3 | task: seg
 4 | data:
 5 |     dataset: kitti
 6 |     train_split: train
 7 |     val_split: val
 8 |     img_rows: 256
 9 |     img_cols: 832
10 |     img_norm: True
11 | #    version: cityscapes
12 |     path: datasets/kitti/semantics
13 | 
14 | training:
15 |     train_iters: 10000000
16 |     batch_size: 4
17 |     val_interval: 40
18 |     n_workers: 2
19 |     print_interval: 10
20 |     optimizer:
21 |         name: 'adam'
22 |         lr: 1.0e-5
23 |     loss:
24 |         name: 'cross_entropy'
25 |         size_average: True
26 |     lr_schedule:
27 |     resume: runs/frrnA_cityscapes_seg/73777/frrn_cityscapes_best_model.pkl


--------------------------------------------------------------------------------
/configs/frrnA_cityscapes_seg.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: frrn
 3 |     model_type: A
 4 | task: seg
 5 | data:
 6 |     dataset: cityscapes
 7 |     train_split: train
 8 |     val_split: val
 9 |     img_rows: 256
10 |     img_cols: 512
11 |     img_norm: True
12 | #    version: cityscapes
13 |     path: datasets/cityscapes
14 | training:
15 |     train_iters: 10000
16 |     batch_size: 3
17 |     val_interval: 1000
18 |     n_workers: 2
19 |     print_interval: 200
20 |     optimizer:
21 |         name: 'adam'
22 |         lr: 1.0e-3
23 |     loss:
24 |         name: 'bootstrapped_cross_entropy'
25 |         size_average: True
26 |         K:  16384 # 512*256/8
27 |     lr_schedule:
28 |     resume: runs/frrnA_cityscapes_seg/94260/frrn_cityscapes_best_model.pkl
29 | 


--------------------------------------------------------------------------------
/configs/frrnA_kitti_depth.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: frrn
 3 |     model_type: A
 4 | task: depth
 5 | data:
 6 |     dataset: kitti
 7 |     train_split: train
 8 |     val_split: val
 9 |     img_rows: 128
10 |     img_cols: 416
11 |     img_norm: True
12 |     path: prepared_kitti_train_data
13 | 
14 | training:
15 |     train_iters: 100000000
16 |     batch_size: 4
17 |     val_interval: 2000
18 |     n_workers: 2
19 |     print_interval: 500
20 |     optimizer:
21 |         name: 'adam'
22 |         lr: 1.0e-4
23 |     loss:
24 |         name: 'scale_invariant_loss'
25 |         smooth: True
26 |     lr_schedule:
27 |     resume:
28 | 


--------------------------------------------------------------------------------
/configs/frrnA_kitti_seg.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: frrn
 3 |     model_type: A
 4 | task: seg
 5 | data:
 6 |     dataset: kitti
 7 |     train_split: train
 8 |     val_split: val
 9 |     img_rows: 256
10 |     img_cols: 832
11 |     img_norm: True
12 | #    version: cityscapes
13 |     path: datasets/kitti/semantics
14 | training:
15 |     train_iters: 10000000
16 |     batch_size: 4
17 |     val_interval: 40
18 |     n_workers: 2
19 |     print_interval: 10
20 |     optimizer:
21 |         name: 'adam'
22 |         lr: 1.0e-5
23 |     loss:
24 |         name: 'bootstrapped_cross_entropy'
25 |         size_average: True
26 |         K:  26624 # 256*832/8
27 |     lr_schedule:
28 |     resume: # runs/frrnA_cityscapes/94430/frrnA_cityscapes_best_model.pkl
29 | 


--------------------------------------------------------------------------------
/configs/segnet_cityscapes_seg.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: segnet
 3 | task: seg
 4 | data:
 5 |     dataset: cityscapes
 6 |     train_split: train
 7 |     val_split: val
 8 |     img_rows: 512
 9 |     img_cols: 1024
10 |     img_norm: True
11 | #    version: cityscapes
12 |     path: datasets/cityscapes
13 | 
14 | training:
15 |     train_iters: 200000
16 |     batch_size: 2
17 |     val_interval: 1500
18 |     n_workers: 2
19 |     print_interval: 300
20 |     optimizer:
21 |         name: 'adam'
22 |         lr: 1.0e-4
23 |     loss:
24 |         name: 'cross_entropy'
25 |         size_average: True
26 |     lr_schedule:
27 |     resume:
28 | 


--------------------------------------------------------------------------------
/configs/segnet_kitti_depth.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: segnet
 3 | task: depth
 4 | data:
 5 |     dataset: kitti
 6 |     train_split: train
 7 |     val_split: val
 8 |     img_rows: 128
 9 |     img_cols: 416
10 |     img_norm: True
11 |     path: prepared_kitti_train_data
12 | 
13 | training:
14 |     train_iters: 100000000
15 |     batch_size: 4
16 |     val_interval: 2000
17 |     n_workers: 2
18 |     print_interval: 500
19 |     optimizer:
20 |         name: 'adam'
21 |         lr: 1.0e-4
22 |     loss:
23 |         name: 'scale_invariant_loss'
24 |         smooth: True
25 |     lr_schedule:
26 |     resume:
27 | 


--------------------------------------------------------------------------------
/configs/segnet_kitti_seg.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |     arch: segnet
 3 | task: seg
 4 | data:
 5 |     dataset: kitti
 6 |     train_split: train
 7 |     val_split: val
 8 |     img_rows: 256
 9 |     img_cols: 832
10 |     img_norm: True
11 | #    version: cityscapes
12 |     path: datasets/kitti/semantics
13 | 
14 | training:
15 |     train_iters: 10000000
16 |     batch_size: 4
17 |     val_interval: 40
18 |     n_workers: 2
19 |     print_interval: 10
20 |     optimizer:
21 |         name: 'adam'
22 |         lr: 1.0e-5
23 |     loss:
24 |         name: 'cross_entropy'
25 |         size_average: True
26 |     lr_schedule:
27 |     resume: ../pytorch-semseg/runs/segnet_cityscapes/66269/segnet_cityscapes_best_model.pkl
28 | 


--------------------------------------------------------------------------------
/demo_depth.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from scipy.misc import imresize
 4 | import argparse
 5 | import scipy.misc as m
 6 | import matplotlib.pyplot as plot
 7 | 
 8 | from ptsemseg.models.fcn_depth import *
 9 | from ptsemseg.models.segnet_depth import *
10 | from ptsemseg.models.frrn_depth import *
11 | from ptsemseg.models.deeplab_depth import *
12 | from ptsemseg.models.fcrn_depth import *
13 | from ptsemseg.models.dispnet_depth import *
14 | 
15 | 
16 | # depth demo
17 | # image resize height and width need to match the training settings of the pretrained model
18 | 
19 | parser = argparse.ArgumentParser()
20 | parser.add_argument("--dataset", default='cityscapes', type=str, choices=["cityscapes", "kitti"])
21 | 
22 | # datasets/kitti/semantics/training/image_2/000193_10.png
23 | parser.add_argument("--img_path", default='datasets/cityscapes/leftImg8bit/train/aachen/aachen_000005_000019_leftImg8bit.png',
24 |                     type=str,
25 |                     help='path to the input image')
26 | parser.add_argument("--model_name", type=str, default='fcrn', choices=["fcn", "frrnA", "segnet", "deeplab", "dispnet", "fcrn"])
27 | parser.add_argument("--model_path",
28 |                     default='runs/fcrn_cityscapes_depth/212_512_1024_bs2_smooth1000_berhuLoss/fcrn_cityscapes_best_model.pkl',
29 |                     type=str,
30 |                     help='path to the pretrained model')
31 | parser.add_argument("--height", type=int, default=512, help="image resize height") # 256 for kitti
32 | parser.add_argument("--width", type=int, default=1024, help="image resize width") # 832 for kitti
33 | parser.add_argument("--pred_disp", action='store_true',
34 |                     help="model predicts disparity instead of depth if selected")
35 | 
36 | args = parser.parse_args()
37 | 
38 | def get_model(model_name):
39 |     try:
40 |         return {
41 |             "fcn": fcn_depth(),
42 |             "frrnA": frrn_depth(model_type = "A"),
43 |             "segnet": segnet_depth(),
44 |             "deeplab": deeplab_depth(),
45 |             "dispnet": dispnet_depth(),
46 |             "fcrn": fcrn_depth(),
47 |         }[model_name]
48 |     except:
49 |         raise("Model {} not available".format(model_name))
50 | 
51 | @torch.no_grad()
52 | def main():
53 |     img = m.imread(args.img_path).astype(np.float32)
54 | 
55 |     # input image preprocessing, need to match the training settings of the pretrained model
56 |     img = imresize(img, (args.height, args.width)).astype(np.float32)  # [128, 416, 3]
57 |     img = ((img / 255 - 0.5) / 0.5)
58 |     img = np.transpose(img, (2, 0, 1))
59 |     img = torch.from_numpy(img).unsqueeze(0)  # tensor [1, 3, 128, 416]
60 | 
61 |     # load pretrained model
62 |     model = get_model(args.model_name)
63 |     weights = torch.load(args.model_path, map_location=lambda storage, loc: storage)
64 |     model.load_state_dict(weights['model_state'])
65 |     model.eval()
66 | 
67 |     output = model(img).cpu().numpy()[0,0]
68 | 
69 |     if args.dataset == "kitti":
70 |         y1, y2 = int(0.40810811 * output.shape[0]), int(0.99189189 * output.shape[0])
71 |         x1, x2 = int(0 * output.shape[1]), int(1 * output.shape[1])
72 |         output_cut = output[y1:y2, x1:x2]
73 |         output_cut = 1/output_cut  # TODO: not for dispnet
74 | 
75 |         output_upper = np.full((y1, args.width), np.min(output_cut), dtype=float)
76 |         output_cut = (output_cut - np.min(output_cut)) / np.max(output_cut)
77 |         output_final = np.concatenate((output_upper, output_cut), axis=0)
78 |         m.imsave("output_predict_img/dispnet_output_depth.png", output_final)  # for dispnet
79 | 
80 |     if args.dataset == "cityscapes":
81 |         y1, y2 = int(0.05 * output.shape[0]), int(0.80 * output.shape[0])
82 |         x1, x2 = int(0.05 * output.shape[1]), int(0.99 * output.shape[1])
83 |         output_cut = output[y1:y2, x1:x2]
84 |         output_cut = 1/output_cut # TODO: not for dispnet
85 |         m.imsave("city_depth.png", output_cut)
86 |         plot.imsave("city_depth.png", output_cut, cmap="viridis")
87 | 
88 | 
89 | 
90 | if __name__ == '__main__':
91 |     main()


--------------------------------------------------------------------------------
/demo_saliency.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | #
  4 | # Author:   Kazuto Nakashima
  5 | # URL:      http://kazuto1011.github.io
  6 | # Created:  2017-05-18
  7 | 
  8 | from __future__ import print_function
  9 | 
 10 | import argparse
 11 | import numpy as np
 12 | import torch
 13 | import scipy.misc as m
 14 | import cv2
 15 | import matplotlib.pyplot as plot
 16 | 
 17 | from ptsemseg.models.fcn_seg import *
 18 | from ptsemseg.models.segnet_seg import *
 19 | from ptsemseg.models.frrn_seg import *
 20 | from ptsemseg.models.deeplab_seg import *
 21 | from ptsemseg.models.fcrn_seg import *
 22 | from ptsemseg.models.dispnet_seg import *
 23 | 
 24 | from ptsemseg.models.fcn_depth import *
 25 | from ptsemseg.models.segnet_depth import *
 26 | from ptsemseg.models.frrn_depth import *
 27 | from ptsemseg.models.deeplab_depth import *
 28 | from ptsemseg.models.fcrn_depth import *
 29 | from ptsemseg.models.dispnet_depth import *
 30 | 
 31 | from saliency import BackPropagation
 32 | 
 33 | 
 34 | parser = argparse.ArgumentParser()
 35 | parser.add_argument("--image_path", default='datasets/kitti/semantics/training/image_2/000193_10.png', type=str,
 36 |                     help='path to test image')
 37 | 
 38 | parser.add_argument("--model_name", type=str, default='dispnet', choices=["fcn", "frrnA", "segnet", "deeplab", "dispnet", "fcrn"])
 39 | parser.add_argument("--task", type=str, default="seg", choices=["seg", "depth"])
 40 | parser.add_argument("--model_path", type=str,
 41 |                     default='runs/dispnet_kitti_seg/86732_256_832_cityscaperPretrained_lr5/dispnet_kitti_best_model.pkl',
 42 |                     help='path to pretrained model')
 43 | 
 44 | # the image resolution here should match the pretrained model training resolution
 45 | parser.add_argument("--height", type=int, default=256, help="image resize height")
 46 | parser.add_argument("--width", type=int, default=832, help="image resize width")
 47 | 
 48 | parser.add_argument("--pos_i", type=int, default=200, help="x coordinate for the pixel to test")
 49 | parser.add_argument("--pos_j", type=int, default=160, help="j coordinate for the pixel to test")
 50 | 
 51 | parser.add_argument("--topk", type=int, default=1,
 52 |                     help="top k classes to produce the saliency map for seg (shoud be set to 1 for depth)")
 53 | 
 54 | args = parser.parse_args()
 55 | 
 56 | class_names = [
 57 |     "road",
 58 |     "sidewalk",
 59 |     "building",
 60 |     "wall",
 61 |     "fence",
 62 |     "pole",
 63 |     "traffic_light",
 64 |     "traffic_sign",
 65 |     "vegetation",
 66 |     "terrain",
 67 |     "sky",
 68 |     "person",
 69 |     "rider",
 70 |     "car",
 71 |     "truck",
 72 |     "bus",
 73 |     "train",
 74 |     "motorcycle",
 75 |     "bicycle",
 76 | ]
 77 | 
 78 | def get_model(model_name, task):
 79 |     if task == "seg":
 80 |         try:
 81 |             return {
 82 |                 "fcn": fcn_seg(n_classes=19),
 83 |                 "frrnA": frrn_seg(model_type = "A", n_classes=19),
 84 |                 "segnet": segnet_seg(n_classes=19),
 85 |                 "deeplab": deeplab_seg(n_classes=19),
 86 |                 "dispnet": dispnet_seg(n_classes=19),
 87 |                 "fcrn": fcrn_seg(n_classes=19),
 88 |             }[model_name]
 89 |         except:
 90 |             raise("Model {} not available".format(model_name))
 91 |     elif task == "depth":  # TODO: add depth models
 92 |         try:
 93 |             return {
 94 |                 "fcn": fcn_depth(),
 95 |                 "frrnA": frrn_depth(model_type = "A"),
 96 |                 "segnet": segnet_depth(),
 97 |                 "deeplab": deeplab_depth(),
 98 |                 "dispnet": dispnet_depth(),
 99 |                 "fcrn": fcrn_depth(),
100 |             }[model_name]
101 |         except:
102 |             raise("Model {} not available".format(model_name))
103 | 
104 | 
105 | def image_process(img, task):
106 |     # image preprocessing need to match the training settings of the corresponding pretrained model
107 |     img = np.array(img, dtype=np.uint8)
108 |     img = m.imresize(img, (args.height, args.width))
109 | 
110 |     #img[args.pos_i, args.pos_j] = 0
111 | 
112 |     raw_img = img.astype(np.float)
113 |     if task == "seg":
114 |         img = img[:, :, ::-1]  # RGB -> BGR  shape: [h, w, 3]
115 |         img = img.astype(float) / 255.0  # norm to [0,1] for seg
116 |     if task == "depth":
117 |         img = ((img.astype(float) / 255 - 0.5) / 0.5)  # normalize to [-1, 1]
118 | 
119 |     # NHWC -> NCHW
120 |     img = img.transpose(2, 0, 1)  # [3, h, w]
121 |     img = torch.from_numpy(img).float()  # tensor, shape: [3, h, w]
122 |     img = img.unsqueeze(0)
123 |     return img, raw_img
124 | 
125 | 
126 | def pixel_locate(img, pos_i, pos_j):
127 |     for p in range(pos_i - 3, pos_i + 4):
128 |         for q in range(pos_j-3, pos_j+4):
129 |             img[p, q, 0] = 255
130 |             img[p, q, 1] = 255
131 |             img[p, q, 2] = 255
132 |     return img
133 | 
134 | 
135 | def main():
136 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
137 | 
138 |     # Model
139 |     model = get_model(args.model_name, args.task)
140 |     weights = torch.load(args.model_path, map_location=lambda storage, loc: storage)
141 |     model.load_state_dict(weights['model_state'])
142 |     model.to(device)
143 |     model.eval()
144 | 
145 |     # Image preprocessing
146 |     img = m.imread(args.image_path)
147 |     img, raw_img = image_process(img, args.task)
148 | 
149 |     # =========================================================================
150 |     print('Vanilla Backpropagation and saliency map')
151 |     # =========================================================================
152 |     bp = BackPropagation(model=model, task=args.task)
153 |     # preds, idx = bp.forward_demo(img.to(device), args.pos_i, args.pos_j)
154 |     pred_idx = bp.forward(img.to(device))
155 | 
156 |     for i in range(0, args.topk):
157 |         bp.backward(pos_i=args.pos_i, pos_j=args.pos_j, idx=pred_idx[args.pos_i, args.pos_j])
158 |         output_vanilla, output_saliency = bp.generate()  # [3, h, w]
159 |         # m.imsave('saliency_results/vanilla_BP_map_{}_{}.png'.format(args.model_name, args.task), output_vanilla)
160 |         for p in range(args.pos_i - 5, args.pos_i + 6):
161 |             for q in range(args.pos_j - 5, args.pos_j + 6):
162 |                 output_saliency[p, q] = np.max(output_saliency)
163 |                 output_saliency[p, q] = np.max(output_saliency)
164 |         plot.imsave('saliency_results/BP_saliency_map_{}_{}.png'.format(args.model_name, args.task), output_saliency, cmap="viridis")
165 |         m.imsave('saliency_results/image_pixel_locate.png', pixel_locate(raw_img, pos_i=args.pos_i, pos_j=args.pos_j))
166 | 
167 | 
168 | 
169 |         # output_saliency = (output_saliency - np.min(output_saliency)) / np.max(output_saliency)
170 |         # output_saliency = 1 - output_saliency
171 |         # heatmap = cv2.applyColorMap(np.uint8(255 * output_saliency), cv2.COLORMAP_JET)
172 |         # m.imsave('saliency_results/heatmap_{}_{}.png'.format(args.model_name, args.task), heatmap)
173 | 
174 |         # if args.task == "seg":
175 |         #     print('[{:.5f}] {}'.format(preds[i], class_names[idx[i]]))
176 | 
177 | 
178 | 
179 | if __name__ == '__main__':
180 |     main()
181 | 


--------------------------------------------------------------------------------
/demo_seg.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import torch
  3 | import scipy.misc as m
  4 | 
  5 | from ptsemseg.models.fcn_seg import *
  6 | from ptsemseg.models.segnet_seg import *
  7 | from ptsemseg.models.frrn_seg import *
  8 | from ptsemseg.models.deeplab_seg import *
  9 | from ptsemseg.models.fcrn_seg import *
 10 | from ptsemseg.models.dispnet_seg import *
 11 | 
 12 | # segmentation demo
 13 | # image resize height and width need to match the training settings of the pretrained model
 14 | 
 15 | parser = argparse.ArgumentParser()
 16 | parser.add_argument("--dataset", default='cityscapes', type=str, choices=["cityscapes", "kitti"])
 17 | # datasets/kitti/semantics/training/image_2/000193_10.png
 18 | parser.add_argument("--img_path", type=str,
 19 |                     default='datasets/cityscapes/leftImg8bit/train/aachen/aachen_000005_000019_leftImg8bit.png',
 20 |                     help='path to the input image')
 21 | parser.add_argument("--model_name", type=str, default='deeplab', choices=["fcn", "frrnA", "segnet", "deeplab", "dispnet", "fcrn"])
 22 | parser.add_argument("--model_path", type=str,
 23 |                     default='runs/deeplab_cityscapes_seg/11044_513_1025_train_requireF_adam_lr4_batchsize2/deeplab_cityscapes_best_model.pkl',
 24 |                     help='path to the pretrained model')
 25 | parser.add_argument("--height", type=int, default=512, help="image resize height") # 256 for kitti
 26 | parser.add_argument("--width", type=int, default=1024, help="image resize width") # 832 for kitti
 27 | 
 28 | args = parser.parse_args()
 29 | 
 30 | def get_model(model_name):
 31 |     try:
 32 |         return {
 33 |             "fcn": fcn_seg(n_classes=19),
 34 |             "frrnA": frrn_seg(n_classes=19, model_type="A"),
 35 |             "segnet": segnet_seg(n_classes=19),
 36 |             "deeplab": deeplab_seg(n_classes=19),
 37 |             "dispnet": dispnet_seg(n_classes=19),
 38 |             "fcrn": fcrn_seg(n_classes=19),
 39 |         }[model_name]
 40 |     except:
 41 |         raise("Model {} not available".format(model_name))
 42 | 
 43 | # 19classes, RGB of maskes
 44 | colors = [  # [  0,   0,   0],
 45 |     [128, 64, 128],
 46 |     [244, 35, 232],
 47 |     [70, 70, 70],
 48 |     [102, 102, 156],
 49 |     [190, 153, 153],
 50 |     [153, 153, 153],
 51 |     [250, 170, 30],
 52 |     [220, 220, 0],
 53 |     [107, 142, 35],
 54 |     [152, 251, 152],
 55 |     [0, 130, 180],
 56 |     [220, 20, 60],
 57 |     [255, 0, 0],
 58 |     [0, 0, 142],
 59 |     [0, 0, 70],
 60 |     [0, 60, 100],
 61 |     [0, 80, 100],
 62 |     [0, 0, 230],
 63 |     [119, 11, 32],
 64 | ]
 65 | 
 66 | label_colours = dict(zip(range(19), colors))
 67 | 
 68 | def decode_segmap_tocolor(temp, n_classes=19):
 69 |     r = temp.copy()
 70 |     g = temp.copy()
 71 |     b = temp.copy()
 72 |     for l in range(0, n_classes):
 73 |         r[temp == l] = label_colours[l][0]
 74 |         g[temp == l] = label_colours[l][1]
 75 |         b[temp == l] = label_colours[l][2]
 76 | 
 77 |     rgb = np.zeros((temp.shape[0], temp.shape[1], 3))
 78 |     rgb[:, :, 0] = r / 255.0
 79 |     rgb[:, :, 1] = g / 255.0
 80 |     rgb[:, :, 2] = b / 255.0
 81 |     return rgb
 82 | 
 83 | 
 84 | @torch.no_grad()
 85 | def main():
 86 |     img = m.imread(args.img_path)
 87 | 
 88 |     # input image preprocessing, need to match the training settings of the pretrained model
 89 |     img = m.imresize(img, (args.height, args.width)).astype(np.float32)
 90 |     img = img[:, :, ::-1]
 91 |     img = img / 255.0
 92 |     img = img.transpose(2, 0, 1)
 93 |     img = torch.from_numpy(img).float().unsqueeze(0)
 94 | 
 95 |     # load pretrained model
 96 |     model = get_model(args.model_name)
 97 |     # weights = torch.load(args.model_path)
 98 |     weights = torch.load(args.model_path, map_location=lambda storage, loc: storage)
 99 |     model.load_state_dict(weights['model_state'])
100 |     model.eval()
101 | 
102 |     output = model(img)
103 |     pred = np.squeeze(output.data.max(1)[1].cpu().numpy(), axis=0)
104 | 
105 |     decoded = decode_segmap_tocolor(pred, n_classes=19)
106 |     # m.imsave("output_predict_img/dispnet_output_seg.png", decoded)
107 |     m.imsave("city_seg.png", decoded)
108 | 
109 | 
110 | if __name__ == '__main__':
111 |     main()
112 | 
113 | 


--------------------------------------------------------------------------------
/fcrn_metric_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/fcrn_metric_0.png


--------------------------------------------------------------------------------
/kitti_depth_eval/__pycache__/depth_evaluation_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/kitti_depth_eval/__pycache__/depth_evaluation_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/kitti_depth_eval/depth_evaluation_utils.py:
--------------------------------------------------------------------------------
  1 | # Mostly based on the code written by Clement Godard:
  2 | # https://github.com/mrharicot/monodepth/blob/master/utils/evaluation_utils.py
  3 | import numpy as np
  4 | from collections import Counter
  5 | from path import Path
  6 | from scipy.misc import imread
  7 | from tqdm import tqdm
  8 | from scipy.interpolate import LinearNDInterpolator
  9 | import scipy.misc as m
 10 | 
 11 | 
 12 | class test_framework_KITTI(object):
 13 |     def __init__(self, root, test_files, seq_length=3, min_depth=1e-3, max_depth=100, step=1):
 14 |         self.root = root
 15 |         self.min_depth, self.max_depth = min_depth, max_depth
 16 |         self.calib_dirs, self.gt_files, self.img_files, self.cams = read_scene_data(self.root, test_files, seq_length, step)
 17 | 
 18 |     def __getitem__(self, i):
 19 |         tgt = imread(self.img_files[i]).astype(np.float32)  # input image
 20 |         depth, depth_fill = generate_depth_map(self.calib_dirs[i], self.gt_files[i], tgt.shape[:2], self.cams[i], interp=True)
 21 |         # m.imsave('depth_gt.png', depth)
 22 |         # m.imsave('depth_gt_filled.png', depth_fill)
 23 |         return {'tgt': tgt,
 24 |                 'path': self.img_files[i],
 25 |                 'gt_depth': depth,
 26 |                 'mask': generate_mask(depth, self.min_depth, self.max_depth)
 27 |                 }
 28 | 
 29 |     def __len__(self):
 30 |         return len(self.img_files)
 31 | 
 32 | 
 33 | ###############################################################################
 34 | #  EIGEN
 35 | #  generate depth ground truths
 36 | 
 37 | def read_scene_data(data_root, test_list):
 38 |     data_root = Path(data_root)
 39 |     gt_files = []
 40 |     calib_dirs = []
 41 |     im_files = []
 42 |     cams = []
 43 |     # displacements = []
 44 |     # demi_length = (seq_length - 1) // 2
 45 |     # shift_range = step * np.arange(-demi_length, demi_length + 1)
 46 | 
 47 |     print('getting test metadata ... ')
 48 |     for sample in tqdm(test_list):
 49 |         tgt_img_path = data_root/sample  # path for image
 50 |         # date: '2011_09_26', scence: '2011_09_26_drive_0002_sync', cam_id: 'img_02', index: '0000000069'
 51 |         date, scene, cam_id, _, index = sample[:-4].split('/')
 52 |         vel_path = data_root/date/scene/'velodyne_points'/'data'/'{}.bin'.format(index[:10])
 53 | 
 54 |         if tgt_img_path.isfile():
 55 |             gt_files.append(vel_path)
 56 |             calib_dirs.append(data_root/date)
 57 |             im_files.append(tgt_img_path)
 58 |             cams.append(int(cam_id[-2:]))
 59 |         else:
 60 |             print('{} missing'.format(tgt_img_path))
 61 |     return calib_dirs, gt_files, im_files, cams
 62 | 
 63 | 
 64 | def load_velodyne_points(file_name):
 65 |     # adapted from https://github.com/hunse/kitti
 66 |     points = np.fromfile(file_name, dtype=np.float32).reshape(-1, 4)
 67 |     points[:,3] = 1
 68 |     return points
 69 | 
 70 | 
 71 | def lin_interp(shape, xyd):
 72 |     # taken from https://github.com/hunse/kitti
 73 |     m, n = shape
 74 |     ij, d = xyd[:, 1::-1], xyd[:, 2]
 75 |     f = LinearNDInterpolator(ij, d, fill_value=0)
 76 |     J, I = np.meshgrid(np.arange(n), np.arange(m))
 77 |     IJ = np.vstack([I.flatten(), J.flatten()]).T
 78 |     disparity = f(IJ).reshape(shape)
 79 |     return disparity
 80 | 
 81 | 
 82 | def read_calib_file(path):
 83 |     # taken from https://github.com/hunse/kitti
 84 |     float_chars = set("0123456789.e+- ")
 85 |     data = {}
 86 |     with open(path, 'r') as f:
 87 |         for line in f.readlines():
 88 |             key, value = line.split(':', 1)
 89 |             value = value.strip()
 90 |             data[key] = value
 91 |             if float_chars.issuperset(value):
 92 |                 # try to cast to float array
 93 |                 try:
 94 |                     data[key] = np.array(list(map(float, value.split(' '))))
 95 |                 except ValueError:
 96 |                     # casting error: data[key] already eq. value, so pass
 97 |                     pass
 98 |     return data
 99 | 
100 | 
101 | def sub2ind(matrixSize, rowSub, colSub):
102 |     m, n = matrixSize
103 |     return rowSub * (n-1) + colSub - 1
104 | 
105 | 
106 | def generate_depth_map(calib_dir, velo_file_name, im_shape, cam=2, interp=False):
107 |     # load calibration files
108 |     cam2cam = read_calib_file(calib_dir/'calib_cam_to_cam.txt')
109 |     velo2cam = read_calib_file(calib_dir/'calib_velo_to_cam.txt')
110 |     velo2cam = np.hstack((velo2cam['R'].reshape(3,3), velo2cam['T'][..., np.newaxis]))
111 |     velo2cam = np.vstack((velo2cam, np.array([0, 0, 0, 1.0])))
112 | 
113 |     # compute projection matrix velodyne->image plane
114 |     R_cam2rect = np.eye(4)
115 |     R_cam2rect[:3,:3] = cam2cam['R_rect_00'].reshape(3,3)
116 |     P_rect = cam2cam['P_rect_0'+str(cam)].reshape(3,4)
117 |     P_velo2im = np.dot(np.dot(P_rect, R_cam2rect), velo2cam)
118 | 
119 |     # load velodyne points and remove all behind image plane (approximation)
120 |     # each row of the velodyne data is forward, left, up, reflectance
121 |     velo = load_velodyne_points(velo_file_name)
122 |     velo = velo[velo[:, 0] >= 0, :]
123 | 
124 |     # project the points to the camera
125 |     velo_pts_im = np.dot(P_velo2im, velo.T).T
126 |     velo_pts_im[:, :2] = velo_pts_im[:,:2] / velo_pts_im[:,-1:]
127 | 
128 |     # check if in bounds
129 |     # use minus 1 to get the exact same value as KITTI matlab code
130 |     velo_pts_im[:, 0] = np.round(velo_pts_im[:,0]) - 1
131 |     velo_pts_im[:, 1] = np.round(velo_pts_im[:,1]) - 1
132 |     val_inds = (velo_pts_im[:, 0] >= 0) & (velo_pts_im[:, 1] >= 0)
133 |     val_inds = val_inds & (velo_pts_im[:,0] < im_shape[1]) & (velo_pts_im[:,1] < im_shape[0])
134 |     velo_pts_im = velo_pts_im[val_inds, :]
135 | 
136 |     # project to image
137 |     depth = np.zeros((im_shape))
138 |     depth[velo_pts_im[:, 1].astype(np.int), velo_pts_im[:, 0].astype(np.int)] = velo_pts_im[:, 2]
139 | 
140 |     # find the duplicate points and choose the closest depth
141 |     inds = sub2ind(depth.shape, velo_pts_im[:, 1], velo_pts_im[:, 0])
142 |     dupe_inds = [item for item, count in Counter(inds).items() if count > 1]
143 |     for dd in dupe_inds:
144 |         pts = np.where(inds == dd)[0]
145 |         x_loc = int(velo_pts_im[pts[0], 0])
146 |         y_loc = int(velo_pts_im[pts[0], 1])
147 |         depth[y_loc, x_loc] = velo_pts_im[pts, 2].min()
148 |     depth[depth < 0] = 0
149 | 
150 |     if interp:
151 |         # interpolate the depth map to fill in holes
152 |         depth_interp = lin_interp(im_shape, velo_pts_im)
153 |         return depth, depth_interp
154 |     else:
155 |         return depth
156 | 
157 | 
158 | def generate_mask(gt_depth, min_depth, max_depth):
159 |     mask = np.logical_and(gt_depth > min_depth,
160 |                           gt_depth < max_depth)
161 |     # crop used by Garg ECCV16 to reprocude Eigen NIPS14 results
162 |     # for that the ground truth is not for the entire image
163 |     gt_height, gt_width = gt_depth.shape
164 |     crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height,
165 |                      0.03594771 * gt_width,  0.96405229 * gt_width]).astype(np.int32)
166 | 
167 |     crop_mask = np.zeros(mask.shape)
168 |     crop_mask[crop[0]:crop[1],crop[2]:crop[3]] = 1
169 |     mask = np.logical_and(mask, crop_mask)
170 |     return mask
171 | 


--------------------------------------------------------------------------------
/kitti_train_depth_prepare/__pycache__/kitti_raw_loader.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/kitti_train_depth_prepare/__pycache__/kitti_raw_loader.cpython-36.pyc


--------------------------------------------------------------------------------
/kitti_train_depth_prepare/kitti_raw_loader.py:
--------------------------------------------------------------------------------
  1 | # Modified from code of Clement Pinard
  2 | # https://github.com/ClementPinard/SfmLearner-Pytorch
  3 | 
  4 | from __future__ import division
  5 | import numpy as np
  6 | from path import Path
  7 | import scipy.misc
  8 | from collections import Counter
  9 | 
 10 | 
 11 | def read_calib_file(path):
 12 |     # taken from https://github.com/hunse/kitti
 13 |     float_chars = set("0123456789.e+- ")
 14 |     data = {}
 15 |     with open(path, 'r') as f:
 16 |         for line in f.readlines():
 17 |             key, value = line.split(':', 1)
 18 |             value = value.strip()
 19 |             data[key] = value
 20 |             if float_chars.issuperset(value):
 21 |                 # try to cast to float array
 22 |                 try:
 23 |                     data[key] = np.array(list(map(float, value.split(' '))))
 24 |                 except ValueError:
 25 |                     # casting error: data[key] already eq. value, so pass
 26 |                     pass
 27 | 
 28 |     return data
 29 | 
 30 | 
 31 | class KittiRawLoader(object):
 32 |     def __init__(self,
 33 |                  dataset_dir,
 34 |                  static_frames_file=None,
 35 |                  img_height=128,
 36 |                  img_width=416,
 37 |                  min_speed=2,
 38 |                  ):
 39 |         dir_path = Path(__file__).realpath().dirname()
 40 |         test_scene_file = dir_path/'test_scenes.txt'
 41 | 
 42 |         self.from_speed = static_frames_file is None
 43 |         if static_frames_file is not None:
 44 |             static_frames_file = Path(static_frames_file)
 45 |             self.collect_static_frames(static_frames_file)
 46 | 
 47 |         with open(test_scene_file, 'r') as f:
 48 |             test_scenes = f.readlines()
 49 |         self.test_scenes = [t[:-1] for t in test_scenes]
 50 |         self.dataset_dir = Path(dataset_dir)
 51 |         self.img_height = img_height
 52 |         self.img_width = img_width
 53 |         self.cam_ids = ['02', '03']
 54 |         self.date_list = ['2011_09_26', '2011_09_28', '2011_09_29', '2011_09_30', '2011_10_03']
 55 |         self.min_speed = min_speed
 56 |         self.collect_train_folders()
 57 | 
 58 |     def collect_static_frames(self, static_frames_file):
 59 |         with open(static_frames_file, 'r') as f:
 60 |             frames = f.readlines()
 61 |         self.static_frames = {}
 62 |         for fr in frames:
 63 |             if fr == '\n':
 64 |                 continue
 65 |             date, drive, frame_id = fr.split(' ')
 66 |             curr_fid = '%.10d' % (np.int(frame_id[:-1]))
 67 |             if drive not in self.static_frames.keys():
 68 |                 self.static_frames[drive] = []
 69 |             self.static_frames[drive].append(curr_fid)
 70 | 
 71 |     def collect_train_folders(self):
 72 |         self.scenes = []
 73 |         for date in self.date_list:
 74 |             drive_set = (self.dataset_dir/date).dirs()
 75 |             for dr in drive_set:
 76 |                 if dr.name[:-5] not in self.test_scenes:
 77 |                     self.scenes.append(dr)
 78 | 
 79 |     def collect_scenes(self, drive):
 80 |         train_scenes = []
 81 |         for c in self.cam_ids:
 82 |             oxts = sorted((drive/'oxts'/'data').files('*.txt'))
 83 |             scene_data = {'cid': c, 'dir': drive, 'speed': [], 'frame_id': [], 'rel_path': drive.name + '_' + c}
 84 |             scale = None
 85 | 
 86 |             for n, f in enumerate(oxts):
 87 |                 metadata = np.genfromtxt(f)
 88 |                 speed = metadata[8:11]
 89 |                 scene_data['speed'].append(speed)
 90 |                 scene_data['frame_id'].append('{:010d}'.format(n))
 91 |                 lat = metadata[0]
 92 | 
 93 |                 if scale is None:
 94 |                     scale = np.cos(lat * np.pi / 180.)
 95 | 
 96 |             sample = self.load_image(scene_data, 0)
 97 |             if sample is None:
 98 |                 return []
 99 |             scene_data['P_rect'] = self.get_P_rect(scene_data, sample[1], sample[2])
100 |             scene_data['intrinsics'] = scene_data['P_rect'][:,:3]
101 | 
102 |             train_scenes.append(scene_data)
103 |         return train_scenes
104 | 
105 |     def get_scene_imgs(self, scene_data):
106 |         def construct_sample(scene_data, i, frame_id):
107 |             sample = {"img":self.load_image(scene_data, i)[0], "id":frame_id}
108 |             sample['depth'] = self.generate_depth_map(scene_data, i)
109 |             return sample
110 | 
111 |         if self.from_speed:
112 |             cum_speed = np.zeros(3)
113 |             for i, speed in enumerate(scene_data['speed']):
114 |                 cum_speed += speed
115 |                 speed_mag = np.linalg.norm(cum_speed)
116 |                 if speed_mag > self.min_speed:
117 |                     frame_id = scene_data['frame_id'][i]
118 |                     yield construct_sample(scene_data, i, frame_id)
119 |                     cum_speed *= 0
120 |         else:  # from static frame file
121 |             drive = str(scene_data['dir'].name)
122 |             for (i,frame_id) in enumerate(scene_data['frame_id']):
123 |                 if (drive not in self.static_frames.keys()) or (frame_id not in self.static_frames[drive]):
124 |                     yield construct_sample(scene_data, i, frame_id)
125 | 
126 |     def get_P_rect(self, scene_data, zoom_x, zoom_y):
127 |         calib_file = scene_data['dir'].parent/'calib_cam_to_cam.txt'
128 | 
129 |         filedata = self.read_raw_calib_file(calib_file)
130 |         P_rect = np.reshape(filedata['P_rect_' + scene_data['cid']], (3, 4))
131 |         P_rect[0] *= zoom_x
132 |         P_rect[1] *= zoom_y
133 |         return P_rect
134 | 
135 |     def load_image(self, scene_data, tgt_idx):
136 |         img_file = scene_data['dir']/'image_{}'.format(scene_data['cid'])/'data'/scene_data['frame_id'][tgt_idx]+'.png'
137 |         if not img_file.isfile():
138 |             return None
139 |         img = scipy.misc.imread(img_file)
140 |         zoom_y = self.img_height/img.shape[0]
141 |         zoom_x = self.img_width/img.shape[1]
142 |         img = scipy.misc.imresize(img, (self.img_height, self.img_width))
143 |         return img, zoom_x, zoom_y
144 | 
145 |     def read_raw_calib_file(self, filepath):
146 |         # From https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py
147 |         """Read in a calibration file and parse into a dictionary."""
148 |         data = {}
149 | 
150 |         with open(filepath, 'r') as f:
151 |             for line in f.readlines():
152 |                 key, value = line.split(':', 1)
153 |                 # The only non-float values in these files are dates, which
154 |                 # we don't care about anyway
155 |                 try:
156 |                         data[key] = np.array([float(x) for x in value.split()])
157 |                 except ValueError:
158 |                         pass
159 |         return data
160 | 
161 |     def generate_depth_map(self, scene_data, tgt_idx):
162 |         # compute projection matrix velodyne->image plane
163 |         def sub2ind(matrixSize, rowSub, colSub):
164 |             m, n = matrixSize
165 |             return rowSub * (n-1) + colSub - 1
166 | 
167 |         R_cam2rect = np.eye(4)
168 | 
169 |         calib_dir = scene_data['dir'].parent
170 |         cam2cam = self.read_raw_calib_file(calib_dir/'calib_cam_to_cam.txt')
171 |         velo2cam = self.read_raw_calib_file(calib_dir/'calib_velo_to_cam.txt')
172 |         velo2cam = np.hstack((velo2cam['R'].reshape(3,3), velo2cam['T'][..., np.newaxis]))
173 |         velo2cam = np.vstack((velo2cam, np.array([0, 0, 0, 1.0])))
174 |         P_rect = np.copy(scene_data['P_rect'])
175 |         R_cam2rect[:3,:3] = cam2cam['R_rect_00'].reshape(3,3)
176 |         P_velo2im = np.dot(np.dot(P_rect, R_cam2rect), velo2cam)
177 | 
178 |         velo_file_name = scene_data['dir']/'velodyne_points'/'data'/'{}.bin'.format(scene_data['frame_id'][tgt_idx])
179 | 
180 |         # load velodyne points and remove all behind image plane (approximation)
181 |         # each row of the velodyne data is forward, left, up, reflectance
182 |         velo = np.fromfile(velo_file_name, dtype=np.float32).reshape(-1, 4)
183 |         velo[:,3] = 1
184 |         velo = velo[velo[:, 0] >= 0, :]
185 | 
186 |         # project the points to the camera
187 |         velo_pts_im = np.dot(P_velo2im, velo.T).T
188 |         velo_pts_im[:, :2] = velo_pts_im[:,:2] / velo_pts_im[:,-1:]
189 | 
190 |         # check if in bounds
191 |         # use minus 1 to get the exact same value as KITTI matlab code
192 |         velo_pts_im[:, 0] = np.round(velo_pts_im[:,0]) - 1
193 |         velo_pts_im[:, 1] = np.round(velo_pts_im[:,1]) - 1
194 | 
195 |         val_inds = (velo_pts_im[:, 0] >= 0) & (velo_pts_im[:, 1] >= 0)
196 |         val_inds = val_inds & (velo_pts_im[:, 0] < self.img_width)
197 |         val_inds = val_inds & (velo_pts_im[:, 1] < self.img_height)
198 |         velo_pts_im = velo_pts_im[val_inds, :]
199 | 
200 |         # project to image
201 |         depth = np.zeros((self.img_height, self.img_width)).astype(np.float32)
202 |         depth[velo_pts_im[:, 1].astype(np.int), velo_pts_im[:, 0].astype(np.int)] = velo_pts_im[:, 2]
203 | 
204 |         # find the duplicate points and choose the closest depth
205 |         inds = sub2ind(depth.shape, velo_pts_im[:, 1], velo_pts_im[:, 0])
206 |         dupe_inds = [item for item, count in Counter(inds).items() if count > 1]
207 |         for dd in dupe_inds:
208 |             pts = np.where(inds == dd)[0]
209 |             x_loc = int(velo_pts_im[pts[0], 0])
210 |             y_loc = int(velo_pts_im[pts[0], 1])
211 |             depth[y_loc, x_loc] = velo_pts_im[pts, 2].min()
212 |         depth[depth < 0] = 0
213 |         return depth
214 | 


--------------------------------------------------------------------------------
/kitti_train_depth_prepare/prepare_train_data.py:
--------------------------------------------------------------------------------
 1 | # Modified from code of Clement Pinard
 2 | # https://github.com/ClementPinard/SfmLearner-Pytorch
 3 | 
 4 | import argparse
 5 | import scipy.misc
 6 | import numpy as np
 7 | from joblib import Parallel, delayed
 8 | from tqdm import tqdm
 9 | from path import Path
10 | from kitti_raw_loader import KittiRawLoader
11 | 
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument("--dataset_dir", default='../../kitti', type=str,
14 |                     help='path to original dataset')
15 | parser.add_argument("--static-frames", default='static_frames.txt',
16 |                     help="list of imgs to discard for being static, if not set will discard them based on speed \
17 |                     (careful, on KITTI some frames have incorrect speed)")
18 | parser.add_argument("--dump-root", type=str, default='../prepared_kitti_train_data', help="Where to dump the data")
19 | parser.add_argument("--height", type=int, default=128, help="image height")
20 | parser.add_argument("--width", type=int, default=416, help="image width")
21 | parser.add_argument("--num-threads", type=int, default=1, help="number of threads to use")
22 | 
23 | args = parser.parse_args()
24 | 
25 | 
26 | def dump_example(scene, args):  # scene: 2011_0926_drive_0003_sync, ...
27 |     scene_list = data_loader.collect_scenes(scene)  # scene_list: ..._02, ..._03
28 |     # print(scene)
29 |     for scene_data in scene_list:
30 |         dump_dir = args.dump_root/scene_data['rel_path']
31 |         dump_dir.makedirs_p()
32 |         intrinsics = scene_data['intrinsics']
33 |         dump_cam_file = dump_dir/'cam.txt'
34 |         np.savetxt(dump_cam_file, intrinsics)
35 | 
36 |         # print(dump_dir)
37 |         for sample in data_loader.get_scene_imgs(scene_data):  # sample: img, id, depth
38 |             img, frame_nb = sample["img"], sample["id"]
39 |             dump_img_file = dump_dir/'{}.jpg'.format(frame_nb)
40 |             scipy.misc.imsave(dump_img_file, img)
41 |             dump_depth_file = dump_dir/'{}.npy'.format(frame_nb)
42 |             np.save(dump_depth_file, sample["depth"])
43 | 
44 |         if len(dump_dir.files('*.jpg')) < 3:
45 |             dump_dir.rmtree()
46 | 
47 | 
48 | def main():
49 |     args.dump_root = Path(args.dump_root)
50 |     args.dump_root.mkdir_p()
51 | 
52 |     global data_loader
53 | 
54 |     data_loader = KittiRawLoader(args.dataset_dir,
55 |                                  static_frames_file=args.static_frames,
56 |                                  img_height=args.height,
57 |                                  img_width=args.width,
58 |                                  )
59 | 
60 |     print('Retrieving frames')
61 |     for scene in data_loader.scenes:
62 |         print(scene)
63 |     if args.num_threads == 1:
64 |         for scene in tqdm(data_loader.scenes):
65 |             dump_example(scene, args)
66 |     else:
67 |         Parallel(n_jobs=args.num_threads)(delayed(dump_example)(scene, args) for scene in tqdm(data_loader.scenes))
68 | 
69 |     print('Generating train val lists')
70 |     np.random.seed(8964)
71 |     # to avoid data snooping, we will make two cameras of the same scene to fall in the same set, train or val
72 |     subdirs = args.dump_root.dirs()
73 |     canonic_prefixes = set([subdir.basename()[:-2] for subdir in subdirs])
74 |     with open(args.dump_root / 'train.txt', 'w') as tf:
75 |         with open(args.dump_root / 'val.txt', 'w') as vf:
76 |             for pr in tqdm(canonic_prefixes):
77 |                 corresponding_dirs = args.dump_root.dirs('{}*'.format(pr))
78 |                 if np.random.random() < 0.1:
79 |                     for s in corresponding_dirs:
80 |                         vf.write('{}\n'.format(s.name))
81 |                 else:
82 |                     for s in corresponding_dirs:
83 |                         tf.write('{}\n'.format(s.name))
84 | 
85 | 
86 | if __name__ == '__main__':
87 |     main()
88 | 


--------------------------------------------------------------------------------
/kitti_train_depth_prepare/test_scenes.txt:
--------------------------------------------------------------------------------
 1 | 2011_09_26_drive_0117
 2 | 2011_09_28_drive_0002
 3 | 2011_09_26_drive_0052
 4 | 2011_09_30_drive_0016
 5 | 2011_09_26_drive_0059
 6 | 2011_09_26_drive_0027
 7 | 2011_09_26_drive_0020
 8 | 2011_09_26_drive_0009
 9 | 2011_09_26_drive_0013
10 | 2011_09_26_drive_0101
11 | 2011_09_26_drive_0046
12 | 2011_09_26_drive_0029
13 | 2011_09_26_drive_0064
14 | 2011_09_26_drive_0048
15 | 2011_10_03_drive_0027
16 | 2011_09_26_drive_0002
17 | 2011_09_26_drive_0036
18 | 2011_09_29_drive_0071
19 | 2011_10_03_drive_0047
20 | 2011_09_30_drive_0027
21 | 2011_09_26_drive_0086
22 | 2011_09_26_drive_0084
23 | 2011_09_26_drive_0096
24 | 2011_09_30_drive_0018
25 | 2011_09_26_drive_0106
26 | 2011_09_26_drive_0056
27 | 2011_09_26_drive_0023
28 | 2011_09_26_drive_0093
29 | 


--------------------------------------------------------------------------------
/output_predict_img/deeplab_output_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/output_predict_img/deeplab_output_depth.png


--------------------------------------------------------------------------------
/output_predict_img/deeplab_output_seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/output_predict_img/deeplab_output_seg.png


--------------------------------------------------------------------------------
/output_predict_img/dispnet_output_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/output_predict_img/dispnet_output_depth.png


--------------------------------------------------------------------------------
/output_predict_img/dispnet_output_seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/output_predict_img/dispnet_output_seg.png


--------------------------------------------------------------------------------
/output_predict_img/fcn_output_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/output_predict_img/fcn_output_depth.png


--------------------------------------------------------------------------------
/output_predict_img/fcn_output_seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/output_predict_img/fcn_output_seg.png


--------------------------------------------------------------------------------
/output_predict_img/fcrn_output_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/output_predict_img/fcrn_output_depth.png


--------------------------------------------------------------------------------
/output_predict_img/fcrn_output_seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/output_predict_img/fcrn_output_seg.png


--------------------------------------------------------------------------------
/output_predict_img/frrn_output_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/output_predict_img/frrn_output_depth.png


--------------------------------------------------------------------------------
/output_predict_img/frrn_output_seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/output_predict_img/frrn_output_seg.png


--------------------------------------------------------------------------------
/output_predict_img/segnet_output_depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/output_predict_img/segnet_output_depth.png


--------------------------------------------------------------------------------
/output_predict_img/segnet_output_seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/output_predict_img/segnet_output_seg.png


--------------------------------------------------------------------------------
/ptsemseg/augmentations/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from ptsemseg.augmentations.augmentations import *
 3 | 
 4 | logger = logging.getLogger('ptsemseg')
 5 | 
 6 | key2aug = {'gamma': AdjustGamma,
 7 |            'hue': AdjustHue,
 8 |            'brightness': AdjustBrightness,
 9 |            'saturation': AdjustSaturation,
10 |            'contrast': AdjustContrast,
11 |            'rcrop': RandomCrop,
12 |            'hflip': RandomHorizontallyFlip,
13 |            'vflip': RandomVerticallyFlip,
14 |            'scale': Scale,
15 |            'rsize': RandomSized,
16 |            'rsizecrop': RandomSizedCrop,
17 |            'rotate': RandomRotate,
18 |            'translate': RandomTranslate,
19 |            'ccrop': CenterCrop,}
20 | 
21 | def get_composed_augmentations(aug_dict):
22 |     if aug_dict is None:
23 |         logger.info("Using No Augmentations")
24 |         return None
25 | 
26 |     augmentations = []
27 |     for aug_key, aug_param in aug_dict.items():
28 |         augmentations.append(key2aug[aug_key](aug_param))
29 |         logger.info("Using {} aug with params {}".format(aug_key, aug_param))
30 |     return Compose(augmentations)
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/ptsemseg/loader/__init__.py:
--------------------------------------------------------------------------------
 1 | from ptsemseg.loader.cityscapes_loader_seg import cityscapesLoader_seg
 2 | from ptsemseg.loader.kitti_loader_seg import kittiLoader_seg
 3 | from ptsemseg.loader.kitti_loader_depth import kittiLoader_depth
 4 | 
 5 | 
 6 | def get_loader(name, task):
 7 |     if task == "seg":
 8 |         return {
 9 |             "cityscapes": cityscapesLoader_seg,
10 |             "kitti": kittiLoader_seg
11 |         }[name]
12 |     elif task == "depth":
13 |         return {
14 |             "kitti": kittiLoader_depth
15 |         }[name]
16 |     else:
17 |         print("task undefined!")
18 | 


--------------------------------------------------------------------------------
/ptsemseg/loader/cityscapes_loader_depth.py:
--------------------------------------------------------------------------------
  1 | # Mostly borrowed from https://github.com/meetshah1995/pytorch-semseg
  2 | 
  3 | import os
  4 | import torch
  5 | import sys
  6 | import numpy as np
  7 | import scipy.misc as m
  8 | import cv2
  9 | import copy
 10 | 
 11 | from torch.utils import data
 12 | 
 13 | from ptsemseg.utils import recursive_glob
 14 | from ptsemseg.augmentations import *
 15 | 
 16 | 
 17 | class cityscapesLoader_depth(data.Dataset):
 18 |     def __init__(
 19 |         self,
 20 |         root,
 21 |         split="train",
 22 |         is_transform=True,
 23 |         img_size=(1024, 2048),
 24 |         augmentations=None,
 25 |         img_norm=True,
 26 |         # version="cityscapes",
 27 |     ):
 28 |         """__init__
 29 | 
 30 |         :param root:
 31 |         :param split:
 32 |         :param is_transform:
 33 |         :param img_size:
 34 |         :param augmentations
 35 |         """
 36 |         self.root = root
 37 |         self.split = split
 38 |         self.is_transform = is_transform
 39 |         self.augmentations = augmentations
 40 |         self.img_norm = img_norm
 41 |         self.img_size = (
 42 |             img_size if isinstance(img_size, tuple) else (img_size, img_size)
 43 |         )
 44 |         # self.mean = np.array(self.mean_rgb[version])
 45 |         self.files = {}
 46 | 
 47 |         self.images_base = os.path.join(self.root, "leftImg8bit", self.split)
 48 |         self.annotations_base = os.path.join(
 49 |             self.root, "disparity", self.split
 50 |         )
 51 | 
 52 |         self.files[split] = recursive_glob(rootdir=self.images_base, suffix=".png")
 53 | 
 54 |         if not self.files[split]:
 55 |             raise Exception(
 56 |                 "No files for split=[%s] found in %s" % (split, self.images_base)
 57 |             )
 58 | 
 59 |         print("Found %d %s images" % (len(self.files[split]), split))
 60 |         sys.stdout.flush()
 61 | 
 62 |     def __len__(self):
 63 |         """__len__"""
 64 |         return len(self.files[self.split])
 65 | 
 66 |     def __getitem__(self, index):
 67 |         """__getitem__
 68 | 
 69 |         :param index:
 70 |         """
 71 |         img_path = self.files[self.split][index].rstrip()
 72 |         disp_path = os.path.join(
 73 |             self.annotations_base,
 74 |             img_path.split(os.sep)[-2],
 75 |             os.path.basename(img_path)[:-15] + "disparity.png",
 76 |         )
 77 | 
 78 |         img = m.imread(img_path)  # original image size: 1024*2048*3
 79 |         img = np.array(img, dtype=np.uint8)
 80 | 
 81 |         disp = cv2.imread(disp_path, cv2.IMREAD_UNCHANGED).astype(np.float32) # disparity map: [1024, 2056]
 82 |         disp[disp > 0] = (disp[disp > 0] - 1) / 256
 83 |         depth = copy.copy(disp)
 84 |         depth[depth > 0] = (0.209313 * 2262.52) / depth[depth > 0]
 85 |         depth[depth >= 85] = 0
 86 | 
 87 |         if self.augmentations is not None:
 88 |             img, depth = self.augmentations(img, depth)
 89 | 
 90 |         if self.is_transform:
 91 |             img, depth = self.transform(img, depth)
 92 | 
 93 |         return img, depth, img_path
 94 | 
 95 |     def transform(self, img, depth):
 96 |         """transform
 97 | 
 98 |         :param img:
 99 |         :param depth:
100 |         """
101 |         img = m.imresize(img, (self.img_size[0], self.img_size[1]))  # uint8 with RGB mode
102 |         # img = img[:, :, ::-1]  # RGB -> BGR  [h, w, 3] do not exist for depth task
103 |         img = img.astype(np.float32)
104 |         if self.img_norm:
105 |             img = ((img / 255 - 0.5) / 0.5)  # normalize to [-1, 1], different from segmentation
106 |         img = img.transpose(2, 0, 1)  # [3, h, w]
107 | 
108 |         depth = depth.astype(np.float32)
109 |         depth = m.imresize(depth, (self.img_size[0], self.img_size[1]), "nearest", mode="F")
110 |         depth = np.expand_dims(depth, axis=0)
111 | 
112 |         img = torch.from_numpy(img).float()  # tensor, shape: [3, h, w]
113 |         depth = torch.from_numpy(depth).float()   # tensor, shape: [1, h, w]
114 | 
115 |         return img, depth
116 | 
117 | 


--------------------------------------------------------------------------------
/ptsemseg/loader/cityscapes_loader_seg.py:
--------------------------------------------------------------------------------
  1 | # Mostly borrowed from https://github.com/meetshah1995/pytorch-semseg
  2 | 
  3 | import os
  4 | import torch
  5 | import sys
  6 | import numpy as np
  7 | import scipy.misc as m
  8 | 
  9 | from torch.utils import data
 10 | 
 11 | from ptsemseg.utils import recursive_glob
 12 | from ptsemseg.augmentations import *
 13 | 
 14 | 
 15 | class cityscapesLoader_seg(data.Dataset):
 16 |     # 19classes, RGB of maskes
 17 |     colors = [  # [  0,   0,   0],
 18 |         [128, 64, 128],
 19 |         [244, 35, 232],
 20 |         [70, 70, 70],
 21 |         [102, 102, 156],
 22 |         [190, 153, 153],
 23 |         [153, 153, 153],
 24 |         [250, 170, 30],
 25 |         [220, 220, 0],
 26 |         [107, 142, 35],
 27 |         [152, 251, 152],
 28 |         [0, 130, 180],
 29 |         [220, 20, 60],
 30 |         [255, 0, 0],
 31 |         [0, 0, 142],
 32 |         [0, 0, 70],
 33 |         [0, 60, 100],
 34 |         [0, 80, 100],
 35 |         [0, 0, 230],
 36 |         [119, 11, 32],
 37 |     ]
 38 | 
 39 |     label_colours = dict(zip(range(19), colors))
 40 | 
 41 |     # mean_rgb = {
 42 |     #     "pascal": [103.939, 116.779, 123.68],
 43 |     #     "cityscapes": [0.0, 0.0, 0.0],
 44 |     # }  # pascal mean for PSPNet and ICNet pre-trained model
 45 | 
 46 |     def __init__(
 47 |         self,
 48 |         root,
 49 |         split="train",
 50 |         is_transform=True,
 51 |         img_size=(1024, 2048),
 52 |         augmentations=None,
 53 |         img_norm=True,
 54 |         saliency_eval_depth=False,
 55 |         # version="cityscapes",
 56 |     ):
 57 |         """__init__
 58 | 
 59 |         :param root:
 60 |         :param split:
 61 |         :param is_transform:
 62 |         :param img_size:
 63 |         :param augmentations 
 64 |         """
 65 |         self.root = root
 66 |         self.split = split
 67 |         self.is_transform = is_transform
 68 |         self.augmentations = augmentations
 69 |         self.img_norm = img_norm
 70 |         self.n_classes = 19
 71 |         self.img_size = (
 72 |             img_size if isinstance(img_size, tuple) else (img_size, img_size)
 73 |         )
 74 |         # self.mean = np.array(self.mean_rgb[version])
 75 |         self.files = {}
 76 |         self.saliency_eval_depth = saliency_eval_depth
 77 | 
 78 |         self.images_base = os.path.join(self.root, "leftImg8bit", self.split)
 79 |         self.annotations_base = os.path.join(
 80 |             self.root, "gtFine", self.split
 81 |         )
 82 | 
 83 |         self.files[split] = recursive_glob(rootdir=self.images_base, suffix=".png")
 84 | 
 85 |         self.void_classes = [0, 1, 2, 3, 4, 5, 6, 9, 10, 14, 15, 16, 18, 29, 30, -1]
 86 |         self.valid_classes = [
 87 |             7,
 88 |             8,
 89 |             11,
 90 |             12,
 91 |             13,
 92 |             17,
 93 |             19,
 94 |             20,
 95 |             21,
 96 |             22,
 97 |             23,
 98 |             24,
 99 |             25,
100 |             26,
101 |             27,
102 |             28,
103 |             31,
104 |             32,
105 |             33,
106 |         ]
107 |         self.class_names = [
108 |             "unlabelled",
109 |             "road",
110 |             "sidewalk",
111 |             "building",
112 |             "wall",
113 |             "fence",
114 |             "pole",
115 |             "traffic_light",
116 |             "traffic_sign",
117 |             "vegetation",
118 |             "terrain",
119 |             "sky",
120 |             "person",
121 |             "rider",
122 |             "car",
123 |             "truck",
124 |             "bus",
125 |             "train",
126 |             "motorcycle",
127 |             "bicycle",
128 |         ]
129 | 
130 |         self.ignore_index = 250
131 |         self.class_map = dict(zip(self.valid_classes, range(19)))
132 |         self.decode_class_map = dict(zip(range(19), self.valid_classes))
133 | 
134 |         if not self.files[split]:
135 |             raise Exception(
136 |                 "No files for split=[%s] found in %s" % (split, self.images_base)
137 |             )
138 | 
139 |         print("Found %d %s images" % (len(self.files[split]), split))
140 |         sys.stdout.flush()
141 | 
142 |     def __len__(self):
143 |         """__len__"""
144 |         return len(self.files[self.split])
145 | 
146 |     def __getitem__(self, index):
147 |         """__getitem__
148 | 
149 |         :param index:
150 |         """
151 |         img_path = self.files[self.split][index].rstrip()
152 |         lbl_path = os.path.join(
153 |             self.annotations_base,
154 |             img_path.split(os.sep)[-2],
155 |             os.path.basename(img_path)[:-15] + "gtFine_labelIds.png",
156 |         )
157 | 
158 |         img = m.imread(img_path)  # original image size: 1024*2048*3
159 |         img = np.array(img, dtype=np.uint8)
160 | 
161 |         lbl = m.imread(lbl_path)  # original label size: 1024*2048
162 |         lbl = self.encode_segmap(np.array(lbl, dtype=np.uint8))
163 | 
164 |         if self.augmentations is not None:
165 |             img, lbl = self.augmentations(img, lbl)
166 | 
167 |         if self.is_transform:
168 |             img, lbl = self.transform(img, lbl)
169 | 
170 |         return img, lbl, img_path
171 | 
172 |     def transform(self, img, lbl):
173 |         """transform
174 | 
175 |         :param img:
176 |         :param lbl:
177 |         """
178 |         img = m.imresize(
179 |             img, (self.img_size[0], self.img_size[1])
180 |         )  # uint8 with RGB mode
181 |         if self.saliency_eval_depth == False:
182 |             img = img[:, :, ::-1]  # RGB -> BGR  shape: [h, w, 3]
183 |         img = img.astype(np.float64)
184 |         # img -= self.mean
185 |         if self.img_norm:
186 |             if self.saliency_eval_depth == False:
187 |                 img = img.astype(float) / 255.0
188 |             else:
189 |                 img = ((img / 255 - 0.5) / 0.5)
190 |         img = img.transpose(2, 0, 1)  # NHWC -> NCHW [3, h, w]
191 | 
192 |         classes = np.unique(lbl)  # all classes included in this label image
193 |         lbl = lbl.astype(float)
194 |         lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), "nearest", mode="F")
195 |         lbl = lbl.astype(int)
196 | 
197 |         if not np.all(classes == np.unique(lbl)):
198 |             print("WARN: resizing labels yielded fewer classes")
199 | 
200 |         if not np.all(np.unique(lbl[lbl != self.ignore_index]) < self.n_classes):
201 |             print("after det", classes, np.unique(lbl))
202 |             raise ValueError("Segmentation map contained invalid class values")
203 | 
204 |         img = torch.from_numpy(img).float()  # tensor, shape: [3, h, w]
205 |         lbl = torch.from_numpy(lbl).long()   # tensor, shape: [h, w]
206 | 
207 |         return img, lbl
208 | 
209 |     def decode_segmap_tocolor(self, temp):
210 |         r = temp.copy()
211 |         g = temp.copy()
212 |         b = temp.copy()
213 |         for l in range(0, self.n_classes):
214 |             r[temp == l] = self.label_colours[l][0]
215 |             g[temp == l] = self.label_colours[l][1]
216 |             b[temp == l] = self.label_colours[l][2]
217 | 
218 |         rgb = np.zeros((temp.shape[0], temp.shape[1], 3))
219 |         rgb[:, :, 0] = r / 255.0
220 |         rgb[:, :, 1] = g / 255.0
221 |         rgb[:, :, 2] = b / 255.0
222 |         return rgb
223 | 
224 |     def decode_segmap_tolabelId(self, temp):
225 |         labels_ID = temp.copy()
226 |         for i in range(19):
227 |             labels_ID[temp == i] = self.valid_classes[i]
228 |         return labels_ID
229 | 
230 |     def encode_segmap(self, mask):
231 |         # Put all void classes to 250
232 |         # map valid classes to 0~18
233 |         for _voidc in self.void_classes:
234 |             mask[mask == _voidc] = self.ignore_index
235 |         for _validc in self.valid_classes:
236 |             mask[mask == _validc] = self.class_map[_validc]
237 |         return mask


--------------------------------------------------------------------------------
/ptsemseg/loader/kitti_loader_depth.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data as data
 2 | import numpy as np
 3 | from path import Path
 4 | import scipy.misc as m
 5 | import torch
 6 | from ptsemseg.augmentations import *
 7 | 
 8 | 
 9 | def crawl_folders(folders_list):
10 |     # taken from https://github.com/ClementPinard/SfmLearner-Pytorch
11 |     imgs = []
12 |     depth = []
13 |     for folder in folders_list:
14 |         current_imgs = sorted(folder.files('*.jpg'))
15 |         current_depth = []
16 |         for img in current_imgs:
17 |             d = img.dirname()/(img.name[:-4] + '.npy')
18 |             assert(d.isfile()), "depth file {} not found".format(str(d))
19 |             depth.append(d)
20 |         imgs.extend(current_imgs)
21 |         depth.extend(current_depth)
22 |     return imgs, depth
23 | 
24 | 
25 | class kittiLoader_depth(data.Dataset):
26 |     """A sequence data loader where the files are arranged in this way:
27 |         root/scene_1/0000000.jpg
28 |         root/scene_1/0000000.npy
29 |         root/scene_1/0000001.jpg
30 |         root/scene_1/0000001.npy
31 |         ..
32 |         root/scene_2/0000000.jpg
33 |         root/scene_2/0000000.npy
34 |         .
35 | 
36 |         transform functions must take in a list a images and a numpy array which can be None
37 |     """
38 | 
39 |     def __init__(
40 |             self,
41 |             root,
42 |             split="train",
43 |             is_transform=True,
44 |             img_size=(128, 416),
45 |             augmentations=None,
46 |             img_norm=True,
47 |     ):
48 |         self.root = Path(root)
49 |         scene_list_path = self.root/'{}.txt'.format(split)
50 |         self.scenes = [self.root/folder[:-1] for folder in open(scene_list_path)]
51 |         self.imgs, self.depth = crawl_folders(self.scenes)
52 |         self.is_transform = is_transform
53 |         self.augmentations = augmentations
54 |         self.img_norm = img_norm
55 |         self.img_size = (
56 |             img_size if isinstance(img_size, tuple) else (img_size, img_size)
57 |         )
58 |         print("number of {} images:".format(split), len(self.imgs))
59 | 
60 |     def __len__(self):
61 |         return len(self.imgs)
62 | 
63 |     def __getitem__(self, index):
64 |         img = m.imread(self.imgs[index])   # img: [h, w, 3], shape determined by img_height, img_width arguments of prepare_train_data.py
65 |         img = np.array(img, dtype=np.uint8)
66 |         depth = np.load(self.depth[index])  # depth: [h, w]
67 | 
68 |         if self.augmentations is not None:
69 |             img, depth = self.augmentations(img, depth)
70 | 
71 |         if self.is_transform:
72 |             img, depth = self.transform(img, depth)
73 | 
74 |         return img, depth, self.imgs[index]
75 | 
76 |     def transform(self, img, depth):
77 |         img = m.imresize(img, (self.img_size[0], self.img_size[1]))  # uint8 with RGB mode
78 |         img = img.astype(np.float32)
79 |         img = np.transpose(img, (2, 0, 1))  # [3, h, w]
80 | 
81 |         depth = depth.astype(np.float32)
82 |         depth = m.imresize(depth, (self.img_size[0], self.img_size[1]), "nearest", mode="F")
83 |         depth = np.expand_dims(depth, axis=0)
84 | 
85 |         if self.img_norm:
86 |             img = ((img / 255 - 0.5) / 0.5)  # normalize to [-1, 1]
87 | 
88 |         img = torch.from_numpy(img).float()   # [3, h, w]
89 |         depth = torch.from_numpy(depth).float()  # [1, h, w]
90 | 
91 |         return img, depth
92 | 
93 | 
94 | 
95 | 
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/ptsemseg/loader/kitti_loader_seg.py:
--------------------------------------------------------------------------------
  1 | # adapted from https://github.com/meetshah1995/pytorch-semseg
  2 | 
  3 | import os
  4 | import torch
  5 | import sys
  6 | import numpy as np
  7 | import scipy.misc as m
  8 | 
  9 | from torch.utils import data
 10 | 
 11 | from ptsemseg.augmentations import *
 12 | 
 13 | class kittiLoader_seg(data.Dataset):
 14 |     # 19classes, RGB of maskes
 15 |     colors = [  # [  0,   0,   0],
 16 |         [128, 64, 128],
 17 |         [244, 35, 232],
 18 |         [70, 70, 70],
 19 |         [102, 102, 156],
 20 |         [190, 153, 153],
 21 |         [153, 153, 153],
 22 |         [250, 170, 30],
 23 |         [220, 220, 0],
 24 |         [107, 142, 35],
 25 |         [152, 251, 152],
 26 |         [0, 130, 180],
 27 |         [220, 20, 60],
 28 |         [255, 0, 0],
 29 |         [0, 0, 142],
 30 |         [0, 0, 70],
 31 |         [0, 60, 100],
 32 |         [0, 80, 100],
 33 |         [0, 0, 230],
 34 |         [119, 11, 32],
 35 |     ]
 36 | 
 37 |     label_colours = dict(zip(range(19), colors))
 38 | 
 39 |     # mean_rgb = {
 40 |     #     "pascal": [103.939, 116.779, 123.68],
 41 |     #     "cityscapes": [0.0, 0.0, 0.0],
 42 |     # }  # pascal mean for PSPNet and ICNet pre-trained model
 43 | 
 44 |     def __init__(
 45 |             self,
 46 |             root,
 47 |             split="train",
 48 |             is_transform=True,
 49 |             img_size=(375, 1242),
 50 |             augmentations=None,
 51 |             img_norm=True,
 52 |             saliency_eval_depth = False
 53 |             # version="cityscapes",
 54 |     ):
 55 |         """__init__
 56 | 
 57 |                 :param root:
 58 |                 :param split:
 59 |                 :param is_transform:
 60 |                 :param img_size:
 61 |                 :param augmentations
 62 |                 """
 63 |         self.root = root
 64 |         self.split = split
 65 |         self.is_transform = is_transform
 66 |         self.augmentations = augmentations
 67 |         self.img_norm = img_norm
 68 |         self.n_classes = 19
 69 |         self.img_size = (
 70 |             img_size if isinstance(img_size, tuple) else (img_size, img_size)
 71 |         )
 72 |         # self.mean = np.array(self.mean_rgb[version])
 73 |         self.files = {}
 74 |         self.saliency_eval_depth = saliency_eval_depth    # for later saliency evaluation on depth, always set to False for KITTI segmentation
 75 | 
 76 |         if self.split == "test":
 77 |             self.images_base = os.path.join(self.root, "testing", "image_2")
 78 |             self.annotations_base = os.path.join(self.root, "training", "semantic")  # invalid
 79 |         else:
 80 |             self.images_base = os.path.join(self.root, "training", "image_2")
 81 |             self.annotations_base = os.path.join(self.root, "training", "semantic")
 82 | 
 83 |         self.all_files = os.listdir(self.images_base)
 84 |         self.all_files.sort()
 85 | 
 86 |         # split 40 images from the training set as the val set
 87 |         if self.split == "val":
 88 |             self.files[split] = self.all_files[::5]  # select one img from every 5 imgs into the val set
 89 |         # 160 training images
 90 |         if self.split == "train":
 91 |             self.files[split] = [file_name for file_name in self.all_files if file_name not in self.all_files[::5]]
 92 |         if self.split == "test":
 93 |             self.files[split] = self.all_files
 94 | 
 95 | 
 96 |         self.void_classes = [0, 1, 2, 3, 4, 5, 6, 9, 10, 14, 15, 16, 18, 29, 30, -1]
 97 |         self.valid_classes = [
 98 |             7,
 99 |             8,
100 |             11,
101 |             12,
102 |             13,
103 |             17,
104 |             19,
105 |             20,
106 |             21,
107 |             22,
108 |             23,
109 |             24,
110 |             25,
111 |             26,
112 |             27,
113 |             28,
114 |             31,
115 |             32,
116 |             33,
117 |         ]
118 |         self.class_names = [
119 |             "unlabelled",
120 |             "road",
121 |             "sidewalk",
122 |             "building",
123 |             "wall",
124 |             "fence",
125 |             "pole",
126 |             "traffic_light",
127 |             "traffic_sign",
128 |             "vegetation",
129 |             "terrain",
130 |             "sky",
131 |             "person",
132 |             "rider",
133 |             "car",
134 |             "truck",
135 |             "bus",
136 |             "train",
137 |             "motorcycle",
138 |             "bicycle",
139 |         ]
140 | 
141 |         self.ignore_index = 250
142 |         self.class_map = dict(zip(self.valid_classes, range(19)))
143 |         self.decode_class_map = dict(zip(range(19), self.valid_classes))
144 | 
145 |         if not self.files[split]:
146 |             raise Exception(
147 |                 "No files for split=[%s] found in %s" % (split, self.images_base)
148 |             )
149 | 
150 |         print("Found %d %s images" % (len(self.files[split]), split))
151 |         sys.stdout.flush()
152 | 
153 |     def __len__(self):
154 |         """__len__"""
155 |         return len(self.files[self.split])
156 | 
157 |     def __getitem__(self, index):
158 |         """__getitem__
159 | 
160 |         :param index:
161 |         """
162 |         path = self.files[self.split][index].rstrip()
163 |         img_path = os.path.join(self.images_base, path)
164 |         lbl_path = os.path.join(self.annotations_base, path)
165 | 
166 |         img = m.imread(img_path)  # original image size: 375*1242*3
167 |         img = np.array(img, dtype=np.uint8)
168 | 
169 |         lbl = m.imread(lbl_path)  # original label size: 375*1242
170 |         lbl = self.encode_segmap(np.array(lbl, dtype=np.uint8))
171 | 
172 |         if self.augmentations is not None:
173 |             img, lbl = self.augmentations(img, lbl)
174 | 
175 |         if self.is_transform:
176 |             img, lbl = self.transform(img, lbl)
177 | 
178 |         return img, lbl, img_path
179 | 
180 |     def transform(self, img, lbl):
181 |         """transform
182 | 
183 |         :param img:
184 |         :param lbl:
185 |         """
186 |         # img: shape: [h, w, 3]
187 |         img = m.imresize(img, (self.img_size[0], self.img_size[1]))  # uint8 with RGB mode
188 |         if self.saliency_eval_depth == False:
189 |             img = img[:, :, ::-1]  # RGB -> BGR  shape: [h, w, 3]
190 |         img = img.astype(np.float64)
191 |         # img -= self.mean
192 |         if self.img_norm:
193 |             if self.saliency_eval_depth == False:
194 |                 img = img.astype(float) / 255.0
195 |             else:
196 |                 img = ((img / 255 - 0.5) / 0.5)
197 |         # NHWC -> NCHW
198 |         img = img.transpose(2, 0, 1)  # shape: [3, h, w]
199 | 
200 |         classes = np.unique(lbl)  # all classes included in this label image
201 |         lbl = lbl.astype(float)
202 |         lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), "nearest", mode="F")
203 |         lbl = lbl.astype(int)
204 | 
205 |         if not np.all(classes == np.unique(lbl)):
206 |             print("WARN: resizing labels yielded fewer classes")
207 |             # sys.stdout.flush()
208 | 
209 |         if not np.all(np.unique(lbl[lbl != self.ignore_index]) < self.n_classes):
210 |             print("after det", classes, np.unique(lbl))
211 |             raise ValueError("Segmentation map contained invalid class values")
212 | 
213 |         img = torch.from_numpy(img).float()  # tensor, shape: [3, h, w]
214 |         lbl = torch.from_numpy(lbl).long()  # tensor, shape: [h, w]
215 | 
216 |         return img, lbl
217 | 
218 |     def decode_segmap_tocolor(self, temp):
219 |         r = temp.copy()
220 |         g = temp.copy()
221 |         b = temp.copy()
222 |         for l in range(0, self.n_classes):
223 |             r[temp == l] = self.label_colours[l][0]
224 |             g[temp == l] = self.label_colours[l][1]
225 |             b[temp == l] = self.label_colours[l][2]
226 | 
227 |         rgb = np.zeros((temp.shape[0], temp.shape[1], 3))
228 |         rgb[:, :, 0] = r / 255.0
229 |         rgb[:, :, 1] = g / 255.0
230 |         rgb[:, :, 2] = b / 255.0
231 |         return rgb
232 | 
233 |     def decode_segmap_tolabelId(self, temp):
234 |         labels_ID = temp.copy()
235 |         for i in range(19):
236 |             labels_ID[temp == i] = self.valid_classes[i]
237 |         return labels_ID
238 | 
239 |     def encode_segmap(self, mask):
240 |         # Put all void classes to 250
241 |         # map valid classes to 0~18
242 |         for _voidc in self.void_classes:
243 |             mask[mask == _voidc] = self.ignore_index
244 |         for _validc in self.valid_classes:
245 |             mask[mask == _validc] = self.class_map[_validc]
246 |         return mask


--------------------------------------------------------------------------------
/ptsemseg/loss/__init__.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import logging
 3 | import functools
 4 | 
 5 | from ptsemseg.loss.loss import cross_entropy2d
 6 | from ptsemseg.loss.loss import bootstrapped_cross_entropy2d
 7 | 
 8 | from ptsemseg.loss.loss import l1_loss
 9 | from ptsemseg.loss.loss import Berhu_loss
10 | from ptsemseg.loss.loss import Huber_loss
11 | from ptsemseg.loss.loss import scale_invariant_loss
12 | 
13 | logger = logging.getLogger('ptsemseg')
14 | 
15 | key2loss = {'cross_entropy': cross_entropy2d,
16 |             'bootstrapped_cross_entropy': bootstrapped_cross_entropy2d,
17 |             'l1_loss': l1_loss,
18 |             'berhu_loss': Berhu_loss,
19 |             'huber_loss': Huber_loss,
20 |             'scale_invariant_loss': scale_invariant_loss}
21 | 
22 | def get_loss_function(cfg):
23 |     if cfg['training']['loss'] is None:
24 |         if cfg['task'] == "seg":
25 |             logger.info("Using default cross entropy loss for segmentation")
26 |             return cross_entropy2d
27 |         elif cfg['task'] == "depth":
28 |             logger.info("Using default scale invariant loss for depth")
29 |             return scale_invariant_loss
30 |         else:
31 |             print("Please specify the loss!")
32 | 
33 |     else:
34 |         loss_dict = cfg['training']['loss']
35 |         loss_name = loss_dict['name']
36 |         loss_params = {k:v for k,v in loss_dict.items() if k != 'name'}
37 | 
38 |         if loss_name not in key2loss:
39 |             raise NotImplementedError('Loss {} not implemented'.format(loss_name))
40 | 
41 |         logger.info('Using {} with {} params'.format(loss_name, 
42 |                                                      loss_params))
43 |         return functools.partial(key2loss[loss_name], **loss_params)
44 | 


--------------------------------------------------------------------------------
/ptsemseg/loss/loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  7 | 
  8 | ########################### for segmentation ####################
  9 | 
 10 | 
 11 | def cross_entropy2d(input, target, weight=None, size_average=True):
 12 |     # taken from https://github.com/meetshah1995/pytorch-semseg
 13 |     n, c, h, w = input.size()
 14 |     nt, ht, wt = target.size()
 15 | 
 16 |     # Handle inconsistent size between input and target
 17 |     if h > ht and w > wt:  # upsample labels
 18 |         target = target.unsequeeze(1)
 19 |         target = F.upsample(target, size=(h, w), mode="nearest")
 20 |         target = target.sequeeze(1)
 21 |     elif h < ht and w < wt:  # upsample images
 22 |         input = F.upsample(input, size=(ht, wt), mode="bilinear")
 23 |     elif h != ht and w != wt:
 24 |         raise Exception("Only support upsampling")
 25 | 
 26 |     loss = F.cross_entropy(
 27 |         input, target, weight=weight, size_average=size_average, ignore_index=250
 28 |     )
 29 |     return loss
 30 | 
 31 | 
 32 | def bootstrapped_cross_entropy2d(input,
 33 |                                   target, 
 34 |                                   K, 
 35 |                                   weight=None, 
 36 |                                   size_average=True):
 37 |     # taken from https://github.com/meetshah1995/pytorch-semseg
 38 |     batch_size = input.size()[0]
 39 | 
 40 |     def _bootstrap_xentropy_single(input, 
 41 |                                    target, 
 42 |                                    K, 
 43 |                                    weight=None,
 44 |                                    size_average=True):
 45 | 
 46 |         n, c, h, w = input.size()
 47 |         loss = F.cross_entropy(input, 
 48 |                                target, 
 49 |                                weight=weight, 
 50 |                                reduce=False,
 51 |                                size_average=False, 
 52 |                                ignore_index=250)
 53 |         loss = loss.view(-1)
 54 |         topk_loss, _ = loss.topk(K)
 55 |         reduced_topk_loss = topk_loss.sum() / K
 56 | 
 57 |         return reduced_topk_loss
 58 | 
 59 |     loss = 0.0
 60 |     # Bootstrap from each image not entire batch
 61 |     for i in range(batch_size):
 62 |         loss += _bootstrap_xentropy_single(
 63 |             input=torch.unsqueeze(input[i], 0),
 64 |             target=torch.unsqueeze(target[i], 0),
 65 |             K=K,
 66 |             weight=weight,
 67 |             size_average=size_average,
 68 |         )
 69 |     return loss / float(batch_size)
 70 | 
 71 | 
 72 | ############################ for depth ###########################
 73 | 
 74 | 
 75 | def compute_mask(input, target):
 76 |     # mask out depth values in predicted and target depth which are <= 0
 77 |     mask = np.logical_and(input.data.cpu().numpy() > 0, target.data.cpu().numpy() > 0)
 78 |     total_pixel = np.prod(input.size(), dtype=np.float32).item()
 79 |     total_pixel = total_pixel - np.sum(mask)
 80 |     mask = torch.from_numpy(mask.astype(int)).float().to(device)
 81 |     return mask, total_pixel
 82 | 
 83 | 
 84 | def l1_loss(input, target, smooth=True):
 85 |     if not input.size() == target.size():
 86 |         _, _, H, W = target.size()
 87 |         input = F.upsample(input, size=(H, W), mode='bilinear')
 88 | 
 89 |     # mask out depth values in input and target which are <= 0
 90 |     mask, total_pixel = compute_mask(input, target)
 91 |     diff = torch.abs(target - input)
 92 |     diff = diff * mask
 93 |     loss = torch.sum(diff) / total_pixel
 94 |     if smooth:
 95 |         loss = loss + smooth_loss(input=input) / 1000.0   # empirical weight for smooth loss
 96 |     return loss
 97 | 
 98 | 
 99 | def Berhu_loss(input, target, smooth=True):
100 |     if not input.size() == target.size():
101 |         _, _, H, W = target.size()
102 |         input = F.upsample(input, size=(H, W), mode='bilinear')
103 | 
104 |     # mask out depth values in input and target which are <= 0
105 |     mask, total_pixel = compute_mask(input, target)
106 |     diff = torch.abs(target - input)
107 |     c = torch.max(diff).item() / 5
108 |     leq = (diff <= c).float()
109 |     l2_losses = (diff ** 2 + c ** 2) / (2 * c)
110 |     losses = leq * diff + (1 - leq) * l2_losses
111 |     losses = losses * mask
112 |     loss = torch.sum(losses) / total_pixel
113 |     if smooth:
114 |         loss = loss + smooth_loss(input=input) / 1000.0
115 |     return loss
116 | 
117 | 
118 | def Huber_loss(input, target, smooth=True):
119 |     if not input.size() == target.size():
120 |         _, _, H, W = target.size()
121 |         input = F.upsample(input, size=(H, W), mode='bilinear')
122 | 
123 |     # mask out depth values in input and target which are <= 0
124 |     mask, total_pixel = compute_mask(input, target)
125 |     diff = target - input
126 |     leq = (diff < 1).float()
127 |     l2_losses = diff ** 2 / 2
128 |     losses = leq * l2_losses + (1-leq) * (diff - 0.5)
129 |     losses = losses * mask
130 |     loss = torch.sum(losses) / total_pixel
131 |     if smooth:
132 |         loss = loss + smooth_loss(input=input) / 1000.0
133 |     return loss
134 | 
135 | 
136 | # input, target: [batch_size, 1, h, w]
137 | def scale_invariant_loss(input, target, smooth=True):
138 |     if not input.size() == target.size():
139 |         _, _, H, W = target.size()
140 |         input = F.upsample(input, size=(H, W), mode='bilinear')
141 | 
142 |     # mask out depth values in input and target which are <= 0
143 |     mask, total_pixel = compute_mask(input, target)
144 | 
145 |     first_log = torch.log(torch.clamp(input, min=1e-3))
146 |     second_log = torch.log(torch.clamp(target, min=1e-3))
147 |     diff = first_log - second_log
148 |     diff = diff * mask
149 |     loss = torch.sum((diff ** 2))/total_pixel - (torch.sum(diff) ** 2)/(total_pixel ** 2)
150 |     if smooth:
151 |         loss = loss + smooth_loss(input=input) / 1000.0
152 |     return loss
153 | 
154 | 
155 | def gradient(pred):
156 |     D_dy = pred[:, :, 1:] - pred[:, :, :-1]
157 |     D_dx = pred[:, :, :, 1:] - pred[:, :, :, :-1]
158 |     return D_dx, D_dy
159 | 
160 | 
161 | def smooth_loss(input):
162 |     dx, dy = gradient(input)
163 |     dx2, dxdy = gradient(dx)
164 |     dydx, dy2 = gradient(dy)
165 |     loss = dx2.abs().mean() + dxdy.abs().mean() + dydx.abs().mean() + dy2.abs().mean()
166 |     return loss


--------------------------------------------------------------------------------
/ptsemseg/metrics.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class runningScoreSeg(object):
  5 |     # Adapted from https://github.com/meetshah1995/pytorch-semseg
  6 |     def __init__(self, n_classes):
  7 |         self.n_classes = n_classes
  8 |         self.confusion_matrix = np.zeros((n_classes, n_classes))
  9 | 
 10 |     def _fast_hist(self, label_true, label_pred, n_class):  # label_true / label_pred: length (width*height)
 11 |         mask = (label_true >= 0) & (label_true < n_class)  # remove invalid class (class 250)
 12 |         hist = np.bincount(
 13 |             n_class * label_true[mask].astype(int) + label_pred[mask],
 14 |             minlength=n_class ** 2,
 15 |         ).reshape(n_class, n_class)   # [n_classes, n_classes]
 16 |         return hist
 17 | 
 18 |     def update(self, gt, pred):  # [batch_size, height, width]
 19 |         for lt, lp in zip(gt, pred):
 20 |             self.confusion_matrix += self._fast_hist(
 21 |                 lt.flatten(), lp.flatten(), self.n_classes
 22 |             )
 23 | 
 24 |     def get_scores(self):
 25 |         """Returns accuracy score evaluation result.
 26 |             - overall accuracy
 27 |             - mean accuracy
 28 |             - mean IU
 29 |             - fwavacc
 30 |         """
 31 |         hist = self.confusion_matrix
 32 |         acc = np.diag(hist).sum() / hist.sum()
 33 |         acc_cls = np.diag(hist) / hist.sum(axis=1)
 34 |         acc_cls = np.nanmean(acc_cls)
 35 |         iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
 36 |         mean_iu = np.nanmean(iu)
 37 |         freq = hist.sum(axis=1) / hist.sum()
 38 |         fwaviu = (freq[freq > 0] * iu[freq > 0]).sum()
 39 |         cls_iu = dict(zip(range(self.n_classes), iu))
 40 | 
 41 |         return (
 42 |             {
 43 |                 "Overall Acc: \t": acc,
 44 |                 "Mean Acc : \t": acc_cls,
 45 |                 "FreqW IoU : \t": fwaviu,
 46 |                 "Mean IoU : \t": mean_iu,
 47 |             },
 48 |             cls_iu,
 49 |         )
 50 | 
 51 |     def reset(self):
 52 |         self.confusion_matrix = np.zeros((self.n_classes, self.n_classes))
 53 | 
 54 | 
 55 | class runningScoreDepth(object):
 56 |     def __init__(self, dataset):
 57 |         self.error_names = ['abs_diff', 'abs_rel', 'sq_rel', 'rmse', 'rmse_log', 'a1', 'a2', 'a3']
 58 |         self.metric_len = len(self.error_names)
 59 |         self.error_metric = [0 for i in range(self.metric_len)]  # [0,0,0,0,...,0]
 60 |         self.dataset = dataset
 61 |         self.reset()
 62 | 
 63 |     def compute_errors_depth(self, gt, pred, crop=True):  # input gt, pred: numpy array, shape: [batch_size, h, w]
 64 |         abs_diff, abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3 = 0, 0, 0, 0, 0, 0, 0, 0
 65 |         batch_size = gt.shape[0]
 66 | 
 67 |         '''
 68 |         crop used by Garg ECCV16 to reprocude Eigen NIPS14 results
 69 |         construct a mask of False values, with the same size as target
 70 |         and then set to True values inside the crop
 71 |         '''
 72 |         if crop:
 73 |             crop_mask = gt[0] != gt[0]
 74 |             if self.dataset == 'kitti':
 75 |                 y1, y2 = int(0.40810811 * gt.shape[1]), int(0.99189189 * gt.shape[1])
 76 |                 x1, x2 = int(0.03594771 * gt.shape[2]), int(0.96405229 * gt.shape[2])
 77 |             elif self.dataset == 'cityscapes':
 78 |                 y1, y2 = int(0.05 * gt.shape[1]), int(0.80 * gt.shape[1])
 79 |                 x1, x2 = int(0.05 * gt.shape[2]), int(0.99 * gt.shape[2])
 80 |             crop_mask[y1:y2, x1:x2] = 1
 81 | 
 82 |         for current_gt, current_pred in zip(gt, pred):  # for each image in a batch
 83 |             valid = (current_gt > 0) & (current_gt < 80) & (current_pred > 0) & (
 84 |                         current_pred < 80)  # mask out depth not in (0, 80)
 85 |             if crop:
 86 |                 valid = valid & crop_mask
 87 | 
 88 |             valid_gt = current_gt[valid]
 89 |             valid_pred = current_pred[valid]
 90 | 
 91 |             # valid_pred = valid_pred * np.median(valid_gt)/np.median(valid_pred)
 92 | 
 93 |             thresh = np.maximum((valid_gt / valid_pred), (valid_pred / valid_gt))
 94 |             a1 += (thresh < 1.25).mean()
 95 |             a2 += (thresh < 1.25 ** 2).mean()
 96 |             a3 += (thresh < 1.25 ** 3).mean()
 97 | 
 98 |             rmse += np.sqrt(np.mean((valid_gt - valid_pred) ** 2))
 99 |             rmse_log += np.sqrt(np.mean((np.log(valid_gt) - np.log(valid_pred)) ** 2))
100 | 
101 |             abs_diff += np.mean(np.abs(valid_gt - valid_pred))
102 |             abs_rel += np.mean(np.abs(valid_gt - valid_pred) / valid_gt)
103 | 
104 |             sq_rel += np.mean(((valid_gt - valid_pred) ** 2) / valid_gt)
105 | 
106 |         return [metric.item() / batch_size for metric in [abs_diff, abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3]]
107 | 
108 |     def update(self, gt, pred):
109 |         self.error_metric = self.compute_errors_depth(gt, pred)
110 |         n = 1
111 |         self.count += n
112 |         for i, v in enumerate(self.error_metric):
113 |             self.val[i] = v
114 |             self.sum[i] += v * n
115 |             self.avg[i] = self.sum[i] / self.count
116 | 
117 |     def reset(self):
118 |         self.val = [0.0]*self.metric_len
119 |         self.avg = [0.0]*self.metric_len
120 |         self.sum = [0.0]*self.metric_len
121 |         self.count = 0
122 | 
123 |     def get_scores(self):
124 |         return ({
125 |                 "abs diff: \t": self.avg[0],
126 |                 "abs rel : \t": self.avg[1],
127 |                 "sq rel : \t": self.avg[2],
128 |                 "rmse : \t": self.avg[3],
129 |                 "rmse log : \t": self.avg[4],
130 |                 "threshold 1 : \t": self.avg[5],
131 |                 "threshold 2 : \t": self.avg[6],
132 |                 "threshold 3 : \t": self.avg[7]}
133 |         )
134 | 
135 | 
136 | class averageMeter(object):
137 |     """Computes and stores the average and current value"""
138 |     def __init__(self):
139 |         self.reset()
140 | 
141 |     def reset(self):
142 |         self.val = 0
143 |         self.avg = 0
144 |         self.sum = 0
145 |         self.count = 0
146 | 
147 |     def update(self, val, n=1):
148 |         self.val = val
149 |         self.sum += val * n
150 |         self.count += n
151 |         self.avg = self.sum / self.count
152 | 
153 | 


--------------------------------------------------------------------------------
/ptsemseg/models/__init__.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import torchvision.models as models
  3 | from collections import OrderedDict
  4 | 
  5 | from ptsemseg.models.fcn_seg import *
  6 | from ptsemseg.models.segnet_seg import *
  7 | from ptsemseg.models.frrn_seg import *
  8 | from ptsemseg.models.deeplab_seg import *
  9 | from ptsemseg.models.fcrn_seg import *
 10 | from ptsemseg.models.dispnet_seg import *
 11 | 
 12 | from ptsemseg.models.fcn_depth import *
 13 | from ptsemseg.models.segnet_depth import *
 14 | from ptsemseg.models.frrn_depth import *
 15 | from ptsemseg.models.deeplab_depth import *
 16 | from ptsemseg.models.fcrn_depth import *
 17 | from ptsemseg.models.dispnet_depth import *
 18 | 
 19 | 
 20 | def get_model(model_dict, task, n_classes):
 21 |     name = model_dict['arch']
 22 |     model = _get_model_instance(name, task)  # model: an instance of class fcn8s
 23 |     param_dict = copy.deepcopy(model_dict)
 24 |     param_dict.pop('arch')
 25 | 
 26 |     if task == "seg":
 27 |         model = model(n_classes=n_classes, **param_dict)
 28 |     elif task == "depth":
 29 |         model = model(**param_dict)
 30 | 
 31 |     if name == "frrn":
 32 |         pass
 33 | 
 34 |     elif name == "fcn":
 35 |         vgg16 = models.vgg16(pretrained=True)
 36 |         model.init_vgg16_params(vgg16)
 37 | 
 38 |         # if you want to load from downloaded pretrained model:
 39 |         # vgg16 = models.vgg16(pretrained=False)
 40 |         # vgg16.load_state_dict(torch.load("pretrained_models/vgg16-imagenet.pth"))
 41 |         # model.init_vgg16_params(vgg16)
 42 | 
 43 |     elif name == "segnet":
 44 |         vgg16 = models.vgg16(pretrained=True)
 45 |         model.init_vgg16_params(vgg16)
 46 | 
 47 |         # if you want to load from downloaded pretrained model:
 48 |         # vgg16 = models.vgg16(pretrained=False)
 49 |         # vgg16.load_state_dict(torch.load("pretrained_models/vgg16-imagenet.pth"))
 50 |         # model.init_vgg16_params(vgg16)
 51 | 
 52 |     elif name == "dispnet":
 53 |         model.init_weights()
 54 | 
 55 |     elif name == "deeplab":
 56 |         resnet101 = models.resnet101(pretrained=True)
 57 |         initial_state_dict = model.init_resnet101_params(resnet101)
 58 |         model.load_state_dict(initial_state_dict, strict=False)
 59 | 
 60 |         # if you want to load from downloaded pretrained model:
 61 |         # model_path = 'pretrained_models/resnet101-imagenet.pth'
 62 |         # new_state_dict = model.init_resnet101_params(model_path)
 63 |         # model.load_state_dict(new_state_dict, strict=False)
 64 | 
 65 |     elif name == "fcrn":
 66 |         resnet50 = models.resnet50(pretrained=True)
 67 |         init_state_dict = model.init_resnet50_params(resnet50)
 68 |         model.load_state_dict(init_state_dict, strict=False)
 69 | 
 70 |         # if you want to load from downloaded pretrained model:
 71 |         # model_path = 'pretrained_models/resnet50-imagenet.pth'
 72 |         # init_state_dict = model.init_resnet50_params(model_path)
 73 |         # model.load_state_dict(init_state_dict, strict=False)
 74 | 
 75 |     else:
 76 |         print("Model {} not available".format(name))
 77 | 
 78 |     return model
 79 | 
 80 | 
 81 | def _get_model_instance(name, task):
 82 |     try:
 83 |         if task == "seg":
 84 |             return {
 85 |                 "fcn": fcn_seg,
 86 |                 "segnet": segnet_seg,
 87 |                 "frrn": frrn_seg,
 88 |                 "dispnet": dispnet_seg,
 89 |                 "deeplab": deeplab_seg,
 90 |                 "fcrn": fcrn_seg,
 91 |             }[name]
 92 |         elif task == "depth":
 93 |             return {
 94 |                 "fcn": fcn_depth,
 95 |                 "segnet": segnet_depth,
 96 |                 "frrn": frrn_depth,
 97 |                 "dispnet": dispnet_depth,
 98 |                 "deeplab": deeplab_depth,
 99 |                 "fcrn": fcrn_depth,
100 |             }[name]
101 |     except:
102 |         raise("Model {} not available".format(name))
103 | 


--------------------------------------------------------------------------------
/ptsemseg/models/deeplab_depth.py:
--------------------------------------------------------------------------------
  1 | # deeplab v2: ResNet101 + ASPP, no multi scale input, for depth
  2 | 
  3 | import torch.nn as nn
  4 | import math
  5 | import torch
  6 | import numpy as np
  7 | import torch.nn.functional as F
  8 | from collections import OrderedDict
  9 | 
 10 | affine_par = True # allow weights and bias in batch normalization layers or not
 11 | learnable_bn_weights = False # allow learnable weights and bias in batch normalization layers or not
 12 | 
 13 | 
 14 | def conv3x3(in_planes, out_planes, stride=1):
 15 |     "3x3 convolution with padding"
 16 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 17 |                      padding=1, bias=False)
 18 | 
 19 | 
 20 | class BasicBlock(nn.Module):
 21 |     expansion = 1
 22 | 
 23 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 24 |         super(BasicBlock, self).__init__()
 25 |         self.conv1 = conv3x3(inplanes, planes, stride)
 26 |         self.bn1 = nn.BatchNorm2d(planes, affine = affine_par)
 27 |         self.relu = nn.ReLU(inplace=True)
 28 |         self.conv2 = conv3x3(planes, planes)
 29 |         self.bn2 = nn.BatchNorm2d(planes, affine = affine_par)
 30 |         self.downsample = downsample
 31 |         self.stride = stride
 32 | 
 33 |     def forward(self, x):
 34 |         residual = x
 35 | 
 36 |         out = self.conv1(x)
 37 |         out = self.bn1(out)
 38 |         out = self.relu(out)
 39 | 
 40 |         out = self.conv2(out)
 41 |         out = self.bn2(out)
 42 | 
 43 |         if self.downsample is not None:
 44 |             residual = self.downsample(x)
 45 | 
 46 |         out += residual
 47 |         out = self.relu(out)
 48 | 
 49 |         return out
 50 | 
 51 | 
 52 | class Bottleneck(nn.Module):
 53 |     expansion = 4
 54 | 
 55 |     def __init__(self, inplanes, planes, stride=1,  dilation_ = 1, downsample=None):
 56 |         super(Bottleneck, self).__init__()
 57 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change
 58 |         self.bn1 = nn.BatchNorm2d(planes,affine = affine_par)
 59 |         if learnable_bn_weights == False:
 60 |             for i in self.bn1.parameters():
 61 |                 i.requires_grad = False
 62 |         padding = 1
 63 |         if dilation_ == 2:
 64 |             padding = 2
 65 |         elif dilation_ == 4:
 66 |             padding = 4
 67 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change
 68 |                                padding=padding, bias=False, dilation = dilation_)
 69 |         self.bn2 = nn.BatchNorm2d(planes,affine = affine_par)
 70 |         if learnable_bn_weights == False:
 71 |             for i in self.bn2.parameters():
 72 |                 i.requires_grad = False
 73 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 74 |         self.bn3 = nn.BatchNorm2d(planes * 4, affine = affine_par)
 75 |         if learnable_bn_weights == False:
 76 |             for i in self.bn3.parameters():
 77 |                 i.requires_grad = False
 78 |         self.relu = nn.ReLU(inplace=True)
 79 |         self.downsample = downsample
 80 |         self.stride = stride
 81 | 
 82 | 
 83 | 
 84 |     def forward(self, x):
 85 |         residual = x
 86 | 
 87 |         out = self.conv1(x)
 88 |         out = self.bn1(out)
 89 |         out = self.relu(out)
 90 | 
 91 |         out = self.conv2(out)
 92 |         out = self.bn2(out)
 93 |         out = self.relu(out)
 94 | 
 95 |         out = self.conv3(out)
 96 |         out = self.bn3(out)
 97 | 
 98 |         if self.downsample is not None:
 99 |             residual = self.downsample(x)
100 | 
101 |         out += residual
102 |         out = self.relu(out)
103 | 
104 |         return out
105 | 
106 | class Classifier_Module(nn.Module):
107 | 
108 |     def __init__(self,dilation_series,padding_series):
109 |         super(Classifier_Module, self).__init__()
110 |         self.conv2d_list = nn.ModuleList()
111 |         for dilation,padding in zip(dilation_series,padding_series):
112 |             self.conv2d_list.append(nn.Conv2d(2048,1,kernel_size=3,stride=1, padding =padding, dilation = dilation,bias = True))
113 | 
114 |         for m in self.conv2d_list:
115 |             m.weight.data.normal_(0, 0.01)
116 | 
117 | 
118 |     def forward(self, x):
119 |         out = self.conv2d_list[0](x)
120 |         for i in range(len(self.conv2d_list)-1):
121 |             out += self.conv2d_list[i+1](x)
122 |         return out
123 | 
124 | 
125 | 
126 | class ResNet(nn.Module):
127 |     def __init__(self, block, layers):
128 |         self.inplanes = 64
129 |         super(ResNet, self).__init__()
130 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
131 |                                bias=False)
132 |         self.bn1 = nn.BatchNorm2d(64,affine = affine_par)
133 |         if learnable_bn_weights == False:
134 |             for i in self.bn1.parameters():
135 |                 i.requires_grad = False
136 |         self.relu = nn.ReLU(inplace=True)
137 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) # change
138 |         self.layer1 = self._make_layer(block, 64, layers[0])
139 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
140 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation__ = 2)
141 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation__ = 4)
142 |         self.layer5 = self._make_pred_layer(Classifier_Module, [6,12,18,24],[6,12,18,24]) # ASPP
143 | 
144 |         for m in self.modules():
145 |             if isinstance(m, nn.Conv2d):
146 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
147 |                 m.weight.data.normal_(0, 0.01)
148 |             elif isinstance(m, nn.BatchNorm2d):
149 |                 m.weight.data.fill_(1)
150 |                 m.bias.data.zero_()
151 | 
152 |     def _make_layer(self, block, planes, blocks, stride=1,dilation__ = 1):
153 |         downsample = None
154 |         if stride != 1 or self.inplanes != planes * block.expansion or dilation__ == 2 or dilation__ == 4:
155 |             downsample = nn.Sequential(
156 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
157 |                           kernel_size=1, stride=stride, bias=False),
158 |                 nn.BatchNorm2d(planes * block.expansion,affine = affine_par),
159 |             )
160 |         if learnable_bn_weights == False:
161 |             for i in downsample._modules['1'].parameters():
162 |                 i.requires_grad = False
163 |         layers = []
164 |         layers.append(block(self.inplanes, planes, stride,dilation_=dilation__, downsample = downsample ))
165 |         self.inplanes = planes * block.expansion
166 |         for i in range(1, blocks):
167 |             layers.append(block(self.inplanes, planes,dilation_=dilation__))
168 | 
169 |         return nn.Sequential(*layers)
170 | 
171 |     def _make_pred_layer(self,block, dilation_series, padding_series):
172 |         return block(dilation_series,padding_series)
173 | 
174 |     def forward(self, x):
175 |         x = self.conv1(x)
176 |         x = self.bn1(x)
177 |         x = self.relu(x)
178 |         x = self.maxpool(x)
179 |         x = self.layer1(x)
180 |         x = self.layer2(x)
181 |         x = self.layer3(x)
182 |         x = self.layer4(x)
183 |         x = self.layer5(x)
184 | 
185 |         return x
186 | 
187 | class deeplab_depth(nn.Module):
188 |     def __init__(self):
189 |         super(deeplab_depth,self).__init__()
190 |         self.Scale = ResNet(Bottleneck,[3, 4, 23, 3])
191 | 
192 |     def forward(self,x):
193 |         input_size = x.size()[2:]  # x: [batch_size, 3, h, w]
194 |         out = self.Scale(x)  # for original scale
195 |         out = F.interpolate(out, size=input_size, mode='bilinear', align_corners=True)
196 |         return out
197 | 
198 |     # load pretrained resnet101 weights from torchvision resnet101 model
199 |     def init_resnet101_params(self, resnet101):
200 |         initial_state_dict = resnet101.state_dict()
201 |         new_state_dict = OrderedDict()
202 |         for k, v in initial_state_dict.items():
203 |             k = 'Scale.' + k
204 |             new_state_dict[k] = v
205 |         return new_state_dict
206 | 
207 |     # if you want to load from downloaded pretrained model:
208 |     # model_path: path to the downloaded model
209 |     # def init_resnet101_params(self, model_path):
210 |     #     saved_state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)
211 |     #     new_state_dict = OrderedDict()
212 |     #     for k, v in saved_state_dict.items():
213 |     #         k = 'Scale.' + k
214 |     #         new_state_dict[k] = v
215 |     #     return new_state_dict
216 | 
217 | 
218 | 


--------------------------------------------------------------------------------
/ptsemseg/models/deeplab_seg.py:
--------------------------------------------------------------------------------
  1 | # deeplab v2: ResNet101 + ASPP, no multi scale input (for cityscapes)
  2 | # for segmentation
  3 | 
  4 | import torch.nn as nn
  5 | import math
  6 | import torch
  7 | import numpy as np
  8 | import torch.nn.functional as F
  9 | from collections import OrderedDict
 10 | 
 11 | affine_par = True # allow weights and bias in batch normalization layers or not
 12 | learnable_bn_weights = False # allow learnable weights and bias in batch normalization layers or not
 13 | 
 14 | 
 15 | def conv3x3(in_planes, out_planes, stride=1):
 16 |     "3x3 convolution with padding"
 17 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 18 |                      padding=1, bias=False)
 19 | 
 20 | 
 21 | class BasicBlock(nn.Module):
 22 |     expansion = 1
 23 | 
 24 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 25 |         super(BasicBlock, self).__init__()
 26 |         self.conv1 = conv3x3(inplanes, planes, stride)
 27 |         self.bn1 = nn.BatchNorm2d(planes, affine = affine_par)
 28 |         self.relu = nn.ReLU(inplace=True)
 29 |         self.conv2 = conv3x3(planes, planes)
 30 |         self.bn2 = nn.BatchNorm2d(planes, affine = affine_par)
 31 |         self.downsample = downsample
 32 |         self.stride = stride
 33 | 
 34 |     def forward(self, x):
 35 |         residual = x
 36 | 
 37 |         out = self.conv1(x)
 38 |         out = self.bn1(out)
 39 |         out = self.relu(out)
 40 | 
 41 |         out = self.conv2(out)
 42 |         out = self.bn2(out)
 43 | 
 44 |         if self.downsample is not None:
 45 |             residual = self.downsample(x)
 46 | 
 47 |         out += residual
 48 |         out = self.relu(out)
 49 | 
 50 |         return out
 51 | 
 52 | 
 53 | class Bottleneck(nn.Module):
 54 |     expansion = 4
 55 | 
 56 |     def __init__(self, inplanes, planes, stride=1,  dilation_ = 1, downsample=None):
 57 |         super(Bottleneck, self).__init__()
 58 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change
 59 |         self.bn1 = nn.BatchNorm2d(planes,affine = affine_par)
 60 |         if learnable_bn_weights == False:
 61 |             for i in self.bn1.parameters():
 62 |                 i.requires_grad = False
 63 |         padding = 1
 64 |         if dilation_ == 2:
 65 |             padding = 2
 66 |         elif dilation_ == 4:
 67 |             padding = 4
 68 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change
 69 |                                padding=padding, bias=False, dilation = dilation_)
 70 |         self.bn2 = nn.BatchNorm2d(planes,affine = affine_par)
 71 |         if learnable_bn_weights == False:
 72 |             for i in self.bn2.parameters():
 73 |                 i.requires_grad = False
 74 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 75 |         self.bn3 = nn.BatchNorm2d(planes * 4, affine = affine_par)
 76 |         if learnable_bn_weights == False:
 77 |             for i in self.bn3.parameters():
 78 |                 i.requires_grad = False
 79 |         self.relu = nn.ReLU(inplace=True)
 80 |         self.downsample = downsample
 81 |         self.stride = stride
 82 | 
 83 | 
 84 | 
 85 |     def forward(self, x):
 86 |         residual = x
 87 | 
 88 |         out = self.conv1(x)
 89 |         out = self.bn1(out)
 90 |         out = self.relu(out)
 91 | 
 92 |         out = self.conv2(out)
 93 |         out = self.bn2(out)
 94 |         out = self.relu(out)
 95 | 
 96 |         out = self.conv3(out)
 97 |         out = self.bn3(out)
 98 | 
 99 |         if self.downsample is not None:
100 |             residual = self.downsample(x)
101 | 
102 |         out += residual
103 |         out = self.relu(out)
104 | 
105 |         return out
106 | 
107 | class Classifier_Module(nn.Module):
108 | 
109 |     def __init__(self,dilation_series,padding_series,n_classes):
110 |         super(Classifier_Module, self).__init__()
111 |         self.conv2d_list = nn.ModuleList()
112 |         for dilation,padding in zip(dilation_series,padding_series):
113 |             self.conv2d_list.append(nn.Conv2d(2048,n_classes,kernel_size=3,stride=1, padding =padding, dilation = dilation,bias = True))
114 | 
115 |         for m in self.conv2d_list:
116 |             m.weight.data.normal_(0, 0.01)
117 | 
118 | 
119 |     def forward(self, x):
120 |         out = self.conv2d_list[0](x)
121 |         for i in range(len(self.conv2d_list)-1):
122 |             out += self.conv2d_list[i+1](x)
123 |         return out
124 | 
125 | 
126 | 
127 | class ResNet(nn.Module):
128 |     def __init__(self, block, layers, n_classes):
129 |         self.inplanes = 64
130 |         super(ResNet, self).__init__()
131 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
132 |                                bias=False)
133 |         self.bn1 = nn.BatchNorm2d(64,affine = affine_par)
134 |         if learnable_bn_weights == False:
135 |             for i in self.bn1.parameters():
136 |                 i.requires_grad = False
137 |         self.relu = nn.ReLU(inplace=True)
138 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) # change
139 |         self.layer1 = self._make_layer(block, 64, layers[0])
140 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
141 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation__ = 2)
142 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation__ = 4)
143 |         self.layer5 = self._make_pred_layer(Classifier_Module, [6,12,18,24],[6,12,18,24],n_classes) # ASPP
144 | 
145 |         for m in self.modules():
146 |             if isinstance(m, nn.Conv2d):
147 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
148 |                 m.weight.data.normal_(0, 0.01)
149 |             elif isinstance(m, nn.BatchNorm2d):
150 |                 m.weight.data.fill_(1)
151 |                 m.bias.data.zero_()
152 | 
153 |     def _make_layer(self, block, planes, blocks, stride=1,dilation__ = 1):
154 |         downsample = None
155 |         if stride != 1 or self.inplanes != planes * block.expansion or dilation__ == 2 or dilation__ == 4:
156 |             downsample = nn.Sequential(
157 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
158 |                           kernel_size=1, stride=stride, bias=False),
159 |                 nn.BatchNorm2d(planes * block.expansion,affine = affine_par),
160 |             )
161 |         if learnable_bn_weights == False:
162 |             for i in downsample._modules['1'].parameters():
163 |                 i.requires_grad = False
164 |         layers = []
165 |         layers.append(block(self.inplanes, planes, stride,dilation_=dilation__, downsample = downsample ))
166 |         self.inplanes = planes * block.expansion
167 |         for i in range(1, blocks):
168 |             layers.append(block(self.inplanes, planes,dilation_=dilation__))
169 | 
170 |         return nn.Sequential(*layers)
171 | 
172 |     def _make_pred_layer(self,block, dilation_series, padding_series,n_classes):
173 |         return block(dilation_series,padding_series,n_classes)
174 | 
175 |     def forward(self, x):
176 |         x = self.conv1(x)
177 |         x = self.bn1(x)
178 |         x = self.relu(x)
179 |         x = self.maxpool(x)
180 |         x = self.layer1(x)
181 |         x = self.layer2(x)
182 |         x = self.layer3(x)
183 |         x = self.layer4(x)
184 |         x = self.layer5(x)
185 | 
186 |         return x
187 | 
188 | class deeplab_seg(nn.Module):
189 |     def __init__(self,n_classes=19):
190 |         super(deeplab_seg,self).__init__()
191 |         self.Scale = ResNet(Bottleneck,[3, 4, 23, 3],n_classes)
192 | 
193 |     def forward(self,x):
194 |         input_size = x.size()[2:]  # x: [batch_size, 3, h, w]
195 |         out = self.Scale(x)  # for original scale
196 |         out = F.interpolate(out, size=input_size, mode='bilinear', align_corners=True)
197 |         return out
198 | 
199 |     def init_resnet101_params(self, resnet101):
200 |         initial_state_dict = resnet101.state_dict()
201 |         new_state_dict = OrderedDict()
202 |         for k, v in initial_state_dict.items():
203 |             k = 'Scale.' + k
204 |             new_state_dict[k] = v
205 |         return new_state_dict
206 | 
207 |     # if you want to load from downloaded pretrained model:
208 |     # model_path: path to the downloaded model
209 |     # def init_resnet101_params(self, model_path):
210 |     #     saved_state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)
211 |     #     new_state_dict = OrderedDict()
212 |     #     for k, v in saved_state_dict.items():
213 |     #         k = 'Scale.' + k
214 |     #         new_state_dict[k] = v
215 |     #     return new_state_dict
216 | 
217 | 
218 | 
219 | 


--------------------------------------------------------------------------------
/ptsemseg/models/dispnet_depth.py:
--------------------------------------------------------------------------------
  1 | # taken from https://github.com/ClementPinard/SfmLearner-Pytorch
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | from torch.nn.init import xavier_uniform_, zeros_
  7 | 
  8 | # DispNetS for depth
  9 | 
 10 | def downsample_conv(in_planes, out_planes, kernel_size=3):
 11 |     return nn.Sequential(
 12 |         nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=2, padding=(kernel_size-1)//2),
 13 |         nn.ReLU(inplace=True),
 14 |         nn.Conv2d(out_planes, out_planes, kernel_size=kernel_size, padding=(kernel_size-1)//2),
 15 |         nn.ReLU(inplace=True)
 16 |     )
 17 | 
 18 | 
 19 | def predict_disp(in_planes):
 20 |     return nn.Sequential(
 21 |         nn.Conv2d(in_planes, 1, kernel_size=1, padding=0),
 22 |         nn.Sigmoid()
 23 |     )
 24 | 
 25 | 
 26 | def conv(in_planes, out_planes):
 27 |     return nn.Sequential(
 28 |         nn.Conv2d(in_planes, out_planes, kernel_size=3, padding=1),
 29 |         nn.ReLU(inplace=True)
 30 |     )
 31 | 
 32 | 
 33 | def upconv(in_planes, out_planes):
 34 |     return nn.Sequential(
 35 |         nn.ConvTranspose2d(in_planes, out_planes, kernel_size=3, stride=2, padding=1, output_padding=1),
 36 |         nn.ReLU(inplace=True)
 37 |     )
 38 | 
 39 | 
 40 | def crop_like(input, ref):
 41 |     assert(input.size(2) >= ref.size(2) and input.size(3) >= ref.size(3))
 42 |     return input[:, :, :ref.size(2), :ref.size(3)]
 43 | 
 44 | 
 45 | class dispnet_depth(nn.Module):
 46 | 
 47 |     def __init__(self, alpha=10, beta=0.01):
 48 |         super(dispnet_depth, self).__init__()
 49 | 
 50 |         self.alpha = alpha
 51 |         self.beta = beta
 52 | 
 53 |         conv_planes = [32, 64, 128, 256, 512, 512, 512]
 54 |         self.conv1 = downsample_conv(3,              conv_planes[0], kernel_size=7)
 55 |         self.conv2 = downsample_conv(conv_planes[0], conv_planes[1], kernel_size=5)
 56 |         self.conv3 = downsample_conv(conv_planes[1], conv_planes[2])
 57 |         self.conv4 = downsample_conv(conv_planes[2], conv_planes[3])
 58 |         self.conv5 = downsample_conv(conv_planes[3], conv_planes[4])
 59 |         self.conv6 = downsample_conv(conv_planes[4], conv_planes[5])
 60 |         self.conv7 = downsample_conv(conv_planes[5], conv_planes[6])
 61 | 
 62 |         upconv_planes = [512, 512, 256, 128, 64, 32, 16]
 63 |         self.upconv7 = upconv(conv_planes[6],   upconv_planes[0])
 64 |         self.upconv6 = upconv(upconv_planes[0], upconv_planes[1])
 65 |         self.upconv5 = upconv(upconv_planes[1], upconv_planes[2])
 66 |         self.upconv4 = upconv(upconv_planes[2], upconv_planes[3])
 67 |         self.upconv3 = upconv(upconv_planes[3], upconv_planes[4])
 68 |         self.upconv2 = upconv(upconv_planes[4], upconv_planes[5])
 69 |         self.upconv1 = upconv(upconv_planes[5], upconv_planes[6])
 70 | 
 71 |         self.iconv7 = conv(upconv_planes[0] + conv_planes[5], upconv_planes[0])
 72 |         self.iconv6 = conv(upconv_planes[1] + conv_planes[4], upconv_planes[1])
 73 |         self.iconv5 = conv(upconv_planes[2] + conv_planes[3], upconv_planes[2])
 74 |         self.iconv4 = conv(upconv_planes[3] + conv_planes[2], upconv_planes[3])
 75 |         self.iconv3 = conv(1 + upconv_planes[4] + conv_planes[1], upconv_planes[4])
 76 |         self.iconv2 = conv(1 + upconv_planes[5] + conv_planes[0], upconv_planes[5])
 77 |         self.iconv1 = conv(1 + upconv_planes[6], upconv_planes[6])
 78 | 
 79 |         self.predict_disp4 = predict_disp(upconv_planes[3])
 80 |         self.predict_disp3 = predict_disp(upconv_planes[4])
 81 |         self.predict_disp2 = predict_disp(upconv_planes[5])
 82 |         self.predict_disp1 = predict_disp(upconv_planes[6])
 83 | 
 84 |     def init_weights(self):
 85 |         for m in self.modules():
 86 |             if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
 87 |                 xavier_uniform_(m.weight)
 88 |                 if m.bias is not None:
 89 |                     zeros_(m.bias)
 90 | 
 91 |     def forward(self, x):
 92 |         out_conv1 = self.conv1(x)
 93 |         out_conv2 = self.conv2(out_conv1)
 94 |         out_conv3 = self.conv3(out_conv2)
 95 |         out_conv4 = self.conv4(out_conv3)
 96 |         out_conv5 = self.conv5(out_conv4)
 97 |         out_conv6 = self.conv6(out_conv5)
 98 |         out_conv7 = self.conv7(out_conv6)
 99 | 
100 |         out_upconv7 = crop_like(self.upconv7(out_conv7), out_conv6)
101 |         concat7 = torch.cat((out_upconv7, out_conv6), 1)
102 |         out_iconv7 = self.iconv7(concat7)
103 | 
104 |         out_upconv6 = crop_like(self.upconv6(out_iconv7), out_conv5)
105 |         concat6 = torch.cat((out_upconv6, out_conv5), 1)
106 |         out_iconv6 = self.iconv6(concat6)
107 | 
108 |         out_upconv5 = crop_like(self.upconv5(out_iconv6), out_conv4)
109 |         concat5 = torch.cat((out_upconv5, out_conv4), 1)
110 |         out_iconv5 = self.iconv5(concat5)
111 | 
112 |         out_upconv4 = crop_like(self.upconv4(out_iconv5), out_conv3)
113 |         concat4 = torch.cat((out_upconv4, out_conv3), 1)
114 |         out_iconv4 = self.iconv4(concat4)
115 |         disp4 = self.alpha * self.predict_disp4(out_iconv4) + self.beta
116 | 
117 |         out_upconv3 = crop_like(self.upconv3(out_iconv4), out_conv2)
118 |         disp4_up = crop_like(F.interpolate(disp4, scale_factor=2, mode='bilinear', align_corners=True), out_conv2)
119 |         concat3 = torch.cat((out_upconv3, out_conv2, disp4_up), 1)
120 |         out_iconv3 = self.iconv3(concat3)
121 |         disp3 = self.alpha * self.predict_disp3(out_iconv3) + self.beta
122 | 
123 |         out_upconv2 = crop_like(self.upconv2(out_iconv3), out_conv1)
124 |         disp3_up = crop_like(F.interpolate(disp3, scale_factor=2, mode='bilinear', align_corners=True), out_conv1)
125 |         concat2 = torch.cat((out_upconv2, out_conv1, disp3_up), 1)
126 |         out_iconv2 = self.iconv2(concat2)
127 |         disp2 = self.alpha * self.predict_disp2(out_iconv2) + self.beta
128 | 
129 |         out_upconv1 = crop_like(self.upconv1(out_iconv2), x)
130 |         disp2_up = crop_like(F.interpolate(disp2, scale_factor=2, mode='bilinear', align_corners=True), x)
131 |         concat1 = torch.cat((out_upconv1, disp2_up), 1)
132 |         out_iconv1 = self.iconv1(concat1)
133 |         disp1 = self.alpha * self.predict_disp1(out_iconv1) + self.beta
134 | 
135 | 
136 |         return disp1
137 | 


--------------------------------------------------------------------------------
/ptsemseg/models/dispnet_seg.py:
--------------------------------------------------------------------------------
  1 | # modified from https://github.com/ClementPinard/SfmLearner-Pytorch
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | from torch.nn.init import xavier_uniform_, zeros_
  7 | 
  8 | ## DispNetS for segmentation
  9 | 
 10 | def downsample_conv(in_planes, out_planes, kernel_size=3):
 11 |     return nn.Sequential(
 12 |         nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=2, padding=(kernel_size-1)//2),
 13 |         nn.ReLU(inplace=True),
 14 |         nn.Conv2d(out_planes, out_planes, kernel_size=kernel_size, padding=(kernel_size-1)//2),
 15 |         nn.ReLU(inplace=True)
 16 |     )
 17 | 
 18 | 
 19 | def predict_disp(in_planes, n_classes=19):
 20 |     return nn.Sequential(
 21 |         nn.Conv2d(in_planes, n_classes, kernel_size=1, padding=0),
 22 |         nn.ReLU(inplace=True)  # different from dispnet for depth
 23 |     )
 24 | 
 25 | 
 26 | def conv(in_planes, out_planes):
 27 |     return nn.Sequential(
 28 |         nn.Conv2d(in_planes, out_planes, kernel_size=3, padding=1),
 29 |         nn.ReLU(inplace=True)
 30 |     )
 31 | 
 32 | 
 33 | def upconv(in_planes, out_planes):
 34 |     return nn.Sequential(
 35 |         nn.ConvTranspose2d(in_planes, out_planes, kernel_size=3, stride=2, padding=1, output_padding=1),
 36 |         nn.ReLU(inplace=True)
 37 |     )
 38 | 
 39 | 
 40 | def crop_like(input, ref):
 41 |     assert(input.size(2) >= ref.size(2) and input.size(3) >= ref.size(3))
 42 |     return input[:, :, :ref.size(2), :ref.size(3)]
 43 | 
 44 | 
 45 | class dispnet_seg(nn.Module):
 46 | 
 47 |     def __init__(self, alpha=10.0, beta=0, n_classes=19):
 48 |         super(dispnet_seg, self).__init__()
 49 | 
 50 |         self.alpha = alpha
 51 |         self.beta = beta
 52 |         self.n_classes = n_classes
 53 | 
 54 |         conv_planes = [32, 64, 128, 256, 512, 512, 512]
 55 |         self.conv1 = downsample_conv(3,              conv_planes[0], kernel_size=7)
 56 |         self.conv2 = downsample_conv(conv_planes[0], conv_planes[1], kernel_size=5)
 57 |         self.conv3 = downsample_conv(conv_planes[1], conv_planes[2])
 58 |         self.conv4 = downsample_conv(conv_planes[2], conv_planes[3])
 59 |         self.conv5 = downsample_conv(conv_planes[3], conv_planes[4])
 60 |         self.conv6 = downsample_conv(conv_planes[4], conv_planes[5])
 61 |         self.conv7 = downsample_conv(conv_planes[5], conv_planes[6])
 62 | 
 63 |         upconv_planes = [512, 512, 256, 128, 64, 32, 16]
 64 |         self.upconv7 = upconv(conv_planes[6],   upconv_planes[0])
 65 |         self.upconv6 = upconv(upconv_planes[0], upconv_planes[1])
 66 |         self.upconv5 = upconv(upconv_planes[1], upconv_planes[2])
 67 |         self.upconv4 = upconv(upconv_planes[2], upconv_planes[3])
 68 |         self.upconv3 = upconv(upconv_planes[3], upconv_planes[4])
 69 |         self.upconv2 = upconv(upconv_planes[4], upconv_planes[5])
 70 |         self.upconv1 = upconv(upconv_planes[5], upconv_planes[6])
 71 | 
 72 |         self.iconv7 = conv(upconv_planes[0] + conv_planes[5], upconv_planes[0])
 73 |         self.iconv6 = conv(upconv_planes[1] + conv_planes[4], upconv_planes[1])
 74 |         self.iconv5 = conv(upconv_planes[2] + conv_planes[3], upconv_planes[2])
 75 |         self.iconv4 = conv(upconv_planes[3] + conv_planes[2], upconv_planes[3])
 76 |         self.iconv3 = conv(self.n_classes + upconv_planes[4] + conv_planes[1], upconv_planes[4])
 77 |         self.iconv2 = conv(self.n_classes + upconv_planes[5] + conv_planes[0], upconv_planes[5])
 78 |         self.iconv1 = conv(self.n_classes + upconv_planes[6], upconv_planes[6])
 79 | 
 80 |         self.predict_disp4 = predict_disp(upconv_planes[3], n_classes=self.n_classes)
 81 |         self.predict_disp3 = predict_disp(upconv_planes[4], n_classes=self.n_classes)
 82 |         self.predict_disp2 = predict_disp(upconv_planes[5], n_classes=self.n_classes)
 83 |         self.predict_disp1 = predict_disp(upconv_planes[6], n_classes=self.n_classes)
 84 | 
 85 |     def init_weights(self):
 86 |         for m in self.modules():
 87 |             if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
 88 |                 xavier_uniform_(m.weight)
 89 |                 if m.bias is not None:
 90 |                     zeros_(m.bias)
 91 | 
 92 |     def forward(self, x):
 93 |         out_conv1 = self.conv1(x)
 94 |         out_conv2 = self.conv2(out_conv1)
 95 |         out_conv3 = self.conv3(out_conv2)
 96 |         out_conv4 = self.conv4(out_conv3)
 97 |         out_conv5 = self.conv5(out_conv4)
 98 |         out_conv6 = self.conv6(out_conv5)
 99 |         out_conv7 = self.conv7(out_conv6)
100 | 
101 |         out_upconv7 = crop_like(self.upconv7(out_conv7), out_conv6)
102 |         concat7 = torch.cat((out_upconv7, out_conv6), 1)
103 |         out_iconv7 = self.iconv7(concat7)
104 | 
105 |         out_upconv6 = crop_like(self.upconv6(out_iconv7), out_conv5)
106 |         concat6 = torch.cat((out_upconv6, out_conv5), 1)
107 |         out_iconv6 = self.iconv6(concat6)
108 | 
109 |         out_upconv5 = crop_like(self.upconv5(out_iconv6), out_conv4)
110 |         concat5 = torch.cat((out_upconv5, out_conv4), 1)
111 |         out_iconv5 = self.iconv5(concat5)
112 | 
113 |         out_upconv4 = crop_like(self.upconv4(out_iconv5), out_conv3)
114 |         concat4 = torch.cat((out_upconv4, out_conv3), 1)
115 |         out_iconv4 = self.iconv4(concat4)
116 |         disp4 = self.alpha * self.predict_disp4(out_iconv4) + self.beta
117 | 
118 |         out_upconv3 = crop_like(self.upconv3(out_iconv4), out_conv2)
119 |         disp4_up = crop_like(F.interpolate(disp4, scale_factor=2, mode='bilinear', align_corners=True), out_conv2)
120 |         concat3 = torch.cat((out_upconv3, out_conv2, disp4_up), 1)
121 |         out_iconv3 = self.iconv3(concat3)
122 |         disp3 = self.alpha * self.predict_disp3(out_iconv3) + self.beta
123 | 
124 |         out_upconv2 = crop_like(self.upconv2(out_iconv3), out_conv1)
125 |         disp3_up = crop_like(F.interpolate(disp3, scale_factor=2, mode='bilinear', align_corners=True), out_conv1)
126 |         concat2 = torch.cat((out_upconv2, out_conv1, disp3_up), 1)
127 |         out_iconv2 = self.iconv2(concat2)
128 |         disp2 = self.alpha * self.predict_disp2(out_iconv2) + self.beta
129 | 
130 |         out_upconv1 = crop_like(self.upconv1(out_iconv2), x)
131 |         disp2_up = crop_like(F.interpolate(disp2, scale_factor=2, mode='bilinear', align_corners=True), x)
132 |         concat1 = torch.cat((out_upconv1, disp2_up), 1)
133 |         out_iconv1 = self.iconv1(concat1)
134 |         disp1 = self.alpha * self.predict_disp1(out_iconv1) + self.beta
135 | 
136 | 
137 |         return disp1
138 | 


--------------------------------------------------------------------------------
/ptsemseg/models/fcn_depth.py:
--------------------------------------------------------------------------------
  1 | # adapted from https://github.com/meetshah1995/pytorch-semseg
  2 | 
  3 | import functools
  4 | 
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | from ptsemseg.models.utils import get_upsampling_weight
  9 | 
 10 | # FCN 8s for depth
 11 | class fcn_depth(nn.Module):
 12 |     def __init__(self):
 13 |         super(fcn_depth, self).__init__()
 14 | 
 15 |         self.conv_block1 = nn.Sequential(
 16 |             nn.Conv2d(3, 64, 3, padding=100),
 17 |             nn.ReLU(inplace=True),
 18 |             nn.Conv2d(64, 64, 3, padding=1),
 19 |             nn.ReLU(inplace=True),
 20 |             nn.MaxPool2d(2, stride=2, ceil_mode=True),
 21 |         )
 22 | 
 23 |         self.conv_block2 = nn.Sequential(
 24 |             nn.Conv2d(64, 128, 3, padding=1),
 25 |             nn.ReLU(inplace=True),
 26 |             nn.Conv2d(128, 128, 3, padding=1),
 27 |             nn.ReLU(inplace=True),
 28 |             nn.MaxPool2d(2, stride=2, ceil_mode=True),
 29 |         )
 30 | 
 31 |         self.conv_block3 = nn.Sequential(
 32 |             nn.Conv2d(128, 256, 3, padding=1),
 33 |             nn.ReLU(inplace=True),
 34 |             nn.Conv2d(256, 256, 3, padding=1),
 35 |             nn.ReLU(inplace=True),
 36 |             nn.Conv2d(256, 256, 3, padding=1),
 37 |             nn.ReLU(inplace=True),
 38 |             nn.MaxPool2d(2, stride=2, ceil_mode=True),
 39 |         )
 40 | 
 41 |         self.conv_block4 = nn.Sequential(
 42 |             nn.Conv2d(256, 512, 3, padding=1),
 43 |             nn.ReLU(inplace=True),
 44 |             nn.Conv2d(512, 512, 3, padding=1),
 45 |             nn.ReLU(inplace=True),
 46 |             nn.Conv2d(512, 512, 3, padding=1),
 47 |             nn.ReLU(inplace=True),
 48 |             nn.MaxPool2d(2, stride=2, ceil_mode=True),
 49 |         )
 50 | 
 51 |         self.conv_block5 = nn.Sequential(
 52 |             nn.Conv2d(512, 512, 3, padding=1),
 53 |             nn.ReLU(inplace=True),
 54 |             nn.Conv2d(512, 512, 3, padding=1),
 55 |             nn.ReLU(inplace=True),
 56 |             nn.Conv2d(512, 512, 3, padding=1),
 57 |             nn.ReLU(inplace=True),
 58 |             nn.MaxPool2d(2, stride=2, ceil_mode=True),
 59 |         )
 60 | 
 61 |         self.classifier = nn.Sequential(
 62 |             nn.Conv2d(512, 2048, 7),
 63 |             nn.ReLU(inplace=True),
 64 |             # nn.Dropout2d(),
 65 |             nn.Conv2d(2048, 1024, 1),
 66 |             nn.ReLU(inplace=True),
 67 |             # nn.Dropout2d(),
 68 |             nn.Conv2d(1024, 64, 1),
 69 |             nn.ReLU(inplace=True),
 70 |             # nn.Dropout2d(),
 71 |         )
 72 | 
 73 |         self.score_pool4 = nn.Conv2d(512, 64, 1)
 74 |         self.score_pool3 = nn.Conv2d(256, 64, 1)
 75 | 
 76 |         self.conv3 = nn.Conv2d(64, 1, 1, padding=0)
 77 |         self.relu = nn.ReLU(inplace=True)
 78 | 
 79 |         # deconvolution
 80 |         self.upscore2 = nn.ConvTranspose2d(64, 64, 4,
 81 |                                            stride=2, bias=False)
 82 |         self.upscore4 = nn.ConvTranspose2d(64, 64, 4,
 83 |                                            stride=2, bias=False)
 84 |         self.upscore8 = nn.ConvTranspose2d(64, 64, 16,
 85 |                                            stride=8, bias=False)
 86 | 
 87 |         for m in self.modules():
 88 |             if isinstance(m, nn.ConvTranspose2d):
 89 |                 m.weight.data.copy_(get_upsampling_weight(m.in_channels, 
 90 |                                                           m.out_channels, 
 91 |                                                           m.kernel_size[0]))
 92 | 
 93 | 
 94 |     def forward(self, x):
 95 |         conv1 = self.conv_block1(x)
 96 |         conv2 = self.conv_block2(conv1)
 97 |         conv3 = self.conv_block3(conv2)
 98 |         conv4 = self.conv_block4(conv3)
 99 |         conv5 = self.conv_block5(conv4)
100 | 
101 |         score = self.classifier(conv5)
102 | 
103 |         upscore2 = self.upscore2(score)
104 |         score_pool4c = self.score_pool4(conv4)[:, :, 5:5+upscore2.size()[2],
105 |                                                      5:5+upscore2.size()[3]]
106 |         upscore_pool4 = self.upscore4(upscore2 + score_pool4c)
107 | 
108 |         score_pool3c = self.score_pool3(conv3)[:, :, 9:9+upscore_pool4.size()[2],
109 |                                                      9:9+upscore_pool4.size()[3]]
110 | 
111 |         out = self.upscore8(score_pool3c + upscore_pool4)[:, :, 31:31+x.size()[2],
112 |                                                                 31:31+x.size()[3]]
113 |         out = self.conv3(out)
114 |         out = self.relu(out)
115 |         return out
116 |                                                          
117 | 
118 | 
119 | 
120 |     def init_vgg16_params(self, vgg16, copy_fc8=True):
121 |         blocks = [
122 |             self.conv_block1,
123 |             self.conv_block2,
124 |             self.conv_block3,
125 |             self.conv_block4,
126 |             self.conv_block5,
127 |         ]
128 | 
129 |         ranges = [[0, 4], [5, 9], [10, 16], [17, 23], [24, 29]]
130 |         features = list(vgg16.features.children())
131 | 
132 |         for idx, conv_block in enumerate(blocks):
133 |             for l1, l2 in zip(features[ranges[idx][0] : ranges[idx][1]], conv_block):
134 |                 if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d):
135 |                     assert l1.weight.size() == l2.weight.size()
136 |                     assert l1.bias.size() == l2.bias.size()
137 |                     l2.weight.data = l1.weight.data
138 |                     l2.bias.data = l1.bias.data


--------------------------------------------------------------------------------
/ptsemseg/models/fcn_seg.py:
--------------------------------------------------------------------------------
  1 | # taken from https://github.com/meetshah1995/pytorch-semseg
  2 | 
  3 | import functools
  4 | 
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | from ptsemseg.models.utils import get_upsampling_weight
  9 | from ptsemseg.loss import cross_entropy2d
 10 | 
 11 | # FCN 8s
 12 | # for segmentation
 13 | class fcn_seg(nn.Module):
 14 |     def __init__(self, n_classes=21, learned_billinear=True):
 15 |         super(fcn_seg, self).__init__()
 16 |         self.learned_billinear = learned_billinear
 17 |         self.n_classes = n_classes
 18 |         self.loss = functools.partial(cross_entropy2d,
 19 |                                       size_average=True)
 20 | 
 21 |         self.conv_block1 = nn.Sequential(
 22 |             nn.Conv2d(3, 64, 3, padding=100),
 23 |             nn.ReLU(inplace=True),
 24 |             nn.Conv2d(64, 64, 3, padding=1),
 25 |             nn.ReLU(inplace=True),
 26 |             nn.MaxPool2d(2, stride=2, ceil_mode=True),
 27 |         )
 28 | 
 29 |         self.conv_block2 = nn.Sequential(
 30 |             nn.Conv2d(64, 128, 3, padding=1),
 31 |             nn.ReLU(inplace=True),
 32 |             nn.Conv2d(128, 128, 3, padding=1),
 33 |             nn.ReLU(inplace=True),
 34 |             nn.MaxPool2d(2, stride=2, ceil_mode=True),
 35 |         )
 36 | 
 37 |         self.conv_block3 = nn.Sequential(
 38 |             nn.Conv2d(128, 256, 3, padding=1),
 39 |             nn.ReLU(inplace=True),
 40 |             nn.Conv2d(256, 256, 3, padding=1),
 41 |             nn.ReLU(inplace=True),
 42 |             nn.Conv2d(256, 256, 3, padding=1),
 43 |             nn.ReLU(inplace=True),
 44 |             nn.MaxPool2d(2, stride=2, ceil_mode=True),
 45 |         )
 46 | 
 47 |         self.conv_block4 = nn.Sequential(
 48 |             nn.Conv2d(256, 512, 3, padding=1),
 49 |             nn.ReLU(inplace=True),
 50 |             nn.Conv2d(512, 512, 3, padding=1),
 51 |             nn.ReLU(inplace=True),
 52 |             nn.Conv2d(512, 512, 3, padding=1),
 53 |             nn.ReLU(inplace=True),
 54 |             nn.MaxPool2d(2, stride=2, ceil_mode=True),
 55 |         )
 56 | 
 57 |         self.conv_block5 = nn.Sequential(
 58 |             nn.Conv2d(512, 512, 3, padding=1),
 59 |             nn.ReLU(inplace=True),
 60 |             nn.Conv2d(512, 512, 3, padding=1),
 61 |             nn.ReLU(inplace=True),
 62 |             nn.Conv2d(512, 512, 3, padding=1),
 63 |             nn.ReLU(inplace=True),
 64 |             nn.MaxPool2d(2, stride=2, ceil_mode=True),
 65 |         )
 66 | 
 67 |         self.classifier = nn.Sequential(
 68 |             nn.Conv2d(512, 4096, 7),
 69 |             nn.ReLU(inplace=True),
 70 |             nn.Dropout2d(),
 71 |             nn.Conv2d(4096, 4096, 1),
 72 |             nn.ReLU(inplace=True),
 73 |             nn.Dropout2d(),
 74 |             nn.Conv2d(4096, self.n_classes, 1),
 75 |         )
 76 | 
 77 |         self.score_pool4 = nn.Conv2d(512, self.n_classes, 1)
 78 |         self.score_pool3 = nn.Conv2d(256, self.n_classes, 1)
 79 | 
 80 |         if self.learned_billinear:
 81 |             # deconvolution
 82 |             self.upscore2 = nn.ConvTranspose2d(self.n_classes, self.n_classes, 4,
 83 |                                                stride=2, bias=False)
 84 |             self.upscore4 = nn.ConvTranspose2d(self.n_classes, self.n_classes, 4,
 85 |                                                stride=2, bias=False)
 86 |             self.upscore8 = nn.ConvTranspose2d(self.n_classes, self.n_classes, 16,
 87 |                                                stride=8, bias=False)
 88 | 
 89 |         for m in self.modules():
 90 |             if isinstance(m, nn.ConvTranspose2d):
 91 |                 m.weight.data.copy_(get_upsampling_weight(m.in_channels, 
 92 |                                                           m.out_channels, 
 93 |                                                           m.kernel_size[0]))
 94 | 
 95 | 
 96 |     def forward(self, x):
 97 |         conv1 = self.conv_block1(x)
 98 |         conv2 = self.conv_block2(conv1)
 99 |         conv3 = self.conv_block3(conv2)
100 |         conv4 = self.conv_block4(conv3)
101 |         conv5 = self.conv_block5(conv4)
102 | 
103 |         score = self.classifier(conv5)
104 | 
105 |         if self.learned_billinear:
106 |             upscore2 = self.upscore2(score)
107 |             score_pool4c = self.score_pool4(conv4)[:, :, 5:5+upscore2.size()[2],
108 |                                                          5:5+upscore2.size()[3]]
109 |             upscore_pool4 = self.upscore4(upscore2 + score_pool4c)
110 |             
111 |             score_pool3c = self.score_pool3(conv3)[:, :, 9:9+upscore_pool4.size()[2],
112 |                                                          9:9+upscore_pool4.size()[3]]
113 | 
114 |             out = self.upscore8(score_pool3c + upscore_pool4)[:, :, 31:31+x.size()[2],
115 |                                                                     31:31+x.size()[3]]
116 |             return out.contiguous()
117 |                                                          
118 | 
119 |         else:
120 |             score_pool4 = self.score_pool4(conv4)
121 |             score_pool3 = self.score_pool3(conv3)
122 |             score = F.upsample(score, score_pool4.size()[2:])
123 |             score += score_pool4
124 |             score = F.upsample(score, score_pool3.size()[2:])
125 |             score += score_pool3
126 |             out = F.upsample(score, x.size()[2:])
127 | 
128 |         return out
129 | 
130 |     def init_vgg16_params(self, vgg16, copy_fc8=True):
131 |         blocks = [
132 |             self.conv_block1,
133 |             self.conv_block2,
134 |             self.conv_block3,
135 |             self.conv_block4,
136 |             self.conv_block5,
137 |         ]
138 | 
139 |         ranges = [[0, 4], [5, 9], [10, 16], [17, 23], [24, 29]]
140 |         features = list(vgg16.features.children())
141 | 
142 |         for idx, conv_block in enumerate(blocks):
143 |             for l1, l2 in zip(features[ranges[idx][0] : ranges[idx][1]], conv_block):
144 |                 if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d):
145 |                     assert l1.weight.size() == l2.weight.size()
146 |                     assert l1.bias.size() == l2.bias.size()
147 |                     l2.weight.data = l1.weight.data
148 |                     l2.bias.data = l1.bias.data
149 |         for i1, i2 in zip([0, 3], [0, 3]):
150 |             l1 = vgg16.classifier[i1]
151 |             l2 = self.classifier[i2]
152 |             l2.weight.data = l1.weight.data.view(l2.weight.size())
153 |             l2.bias.data = l1.bias.data.view(l2.bias.size())
154 |         n_class = self.classifier[6].weight.size()[0]
155 |         if copy_fc8:
156 |             l1 = vgg16.classifier[6]
157 |             l2 = self.classifier[6]
158 |             l2.weight.data = l1.weight.data[:n_class, :].view(l2.weight.size())
159 |             l2.bias.data = l1.bias.data[:n_class]
160 | 


--------------------------------------------------------------------------------
/ptsemseg/models/fcrn_depth.py:
--------------------------------------------------------------------------------
  1 | # adapted from https://github.com/XPFly1989/FCRN
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional
  6 | import math
  7 | 
  8 | ### fcrn for depth ###
  9 | # based on ResNet50
 10 | class Bottleneck(nn.Module):
 11 |     expansion = 4
 12 | 
 13 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 14 |         super(Bottleneck, self).__init__()
 15 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 16 |         self.bn1 = nn.BatchNorm2d(planes)
 17 | 
 18 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1,
 19 |                                bias=False)
 20 |         self.bn2 = nn.BatchNorm2d(planes)
 21 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 22 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 23 |         self.relu = nn.ReLU(inplace=True)
 24 |         self.downsample = downsample
 25 |         self.stride = stride
 26 | 
 27 |     def forward(self, x):
 28 |         residual = x
 29 | 
 30 |         out = self.conv1(x)
 31 |         out = self.bn1(out)
 32 |         out = self.relu(out)
 33 | 
 34 |         out = self.conv2(out)
 35 |         out = self.bn2(out)
 36 |         out = self.relu(out)
 37 | 
 38 |         out = self.conv3(out)
 39 |         out = self.bn3(out)
 40 | 
 41 |         if self.downsample is not None:
 42 |             residual = self.downsample(x)
 43 | 
 44 |         out += residual
 45 |         out = self.relu(out)
 46 | 
 47 |         return out
 48 | 
 49 | 
 50 | class UpProject(nn.Module):
 51 | 
 52 |     def __init__(self, in_channels, out_channels):
 53 |         super(UpProject, self).__init__()
 54 | 
 55 |         self.conv1_1 = nn.Conv2d(in_channels, out_channels, 3)
 56 |         self.conv1_2 = nn.Conv2d(in_channels, out_channels, (2, 3))
 57 |         self.conv1_3 = nn.Conv2d(in_channels, out_channels, (3, 2))
 58 |         self.conv1_4 = nn.Conv2d(in_channels, out_channels, 2)
 59 | 
 60 |         self.conv2_1 = nn.Conv2d(in_channels, out_channels, 3)
 61 |         self.conv2_2 = nn.Conv2d(in_channels, out_channels, (2, 3))
 62 |         self.conv2_3 = nn.Conv2d(in_channels, out_channels, (3, 2))
 63 |         self.conv2_4 = nn.Conv2d(in_channels, out_channels, 2)
 64 | 
 65 |         self.bn1_1 = nn.BatchNorm2d(out_channels)
 66 |         self.bn1_2 = nn.BatchNorm2d(out_channels)
 67 | 
 68 |         self.relu = nn.ReLU(inplace=True)
 69 | 
 70 |         self.conv3 = nn.Conv2d(out_channels, out_channels, 3, padding=1)
 71 | 
 72 |         self.bn2 = nn.BatchNorm2d(out_channels)
 73 | 
 74 |     def forward(self, x):
 75 |         batch_size = x.size()[0]
 76 |         out1_1 = self.conv1_1(nn.functional.pad(x, (1, 1, 1, 1)))
 77 |         #out1_2 = self.conv1_2(nn.functional.pad(x, (1, 1, 0, 1)))#right interleaving padding
 78 |         out1_2 = self.conv1_2(nn.functional.pad(x, (1, 1, 1, 0)))#author's interleaving pading in github
 79 |         #out1_3 = self.conv1_3(nn.functional.pad(x, (0, 1, 1, 1)))#right interleaving padding
 80 |         out1_3 = self.conv1_3(nn.functional.pad(x, (1, 0, 1, 1)))#author's interleaving pading in github
 81 |         #out1_4 = self.conv1_4(nn.functional.pad(x, (0, 1, 0, 1)))#right interleaving padding
 82 |         out1_4 = self.conv1_4(nn.functional.pad(x, (1, 0, 1, 0)))#author's interleaving pading in github
 83 | 
 84 |         out2_1 = self.conv2_1(nn.functional.pad(x, (1, 1, 1, 1)))
 85 |         #out2_2 = self.conv2_2(nn.functional.pad(x, (1, 1, 0, 1)))#right interleaving padding
 86 |         out2_2 = self.conv2_2(nn.functional.pad(x, (1, 1, 1, 0)))#author's interleaving pading in github
 87 |         #out2_3 = self.conv2_3(nn.functional.pad(x, (0, 1, 1, 1)))#right interleaving padding
 88 |         out2_3 = self.conv2_3(nn.functional.pad(x, (1, 0, 1, 1)))#author's interleaving pading in github
 89 |         #out2_4 = self.conv2_4(nn.functional.pad(x, (0, 1, 0, 1)))#right interleaving padding
 90 |         out2_4 = self.conv2_4(nn.functional.pad(x, (1, 0, 1, 0)))#author's interleaving pading in github
 91 | 
 92 |         height = out1_1.size()[2]
 93 |         width = out1_1.size()[3]
 94 | 
 95 |         out1_1_2 = torch.stack((out1_1, out1_2), dim=-3).permute(0, 1, 3, 4, 2).contiguous().view(
 96 |             batch_size, -1, height, width * 2)
 97 |         out1_3_4 = torch.stack((out1_3, out1_4), dim=-3).permute(0, 1, 3, 4, 2).contiguous().view(
 98 |             batch_size, -1, height, width * 2)
 99 | 
100 |         out1_1234 = torch.stack((out1_1_2, out1_3_4), dim=-3).permute(0, 1, 3, 2, 4).contiguous().view(
101 |             batch_size, -1, height * 2, width * 2)
102 | 
103 |         out2_1_2 = torch.stack((out2_1, out2_2), dim=-3).permute(0, 1, 3, 4, 2).contiguous().view(
104 |             batch_size, -1, height, width * 2)
105 |         out2_3_4 = torch.stack((out2_3, out2_4), dim=-3).permute(0, 1, 3, 4, 2).contiguous().view(
106 |             batch_size, -1, height, width * 2)
107 | 
108 |         out2_1234 = torch.stack((out2_1_2, out2_3_4), dim=-3).permute(0, 1, 3, 2, 4).contiguous().view(
109 |             batch_size, -1, height * 2, width * 2)
110 | 
111 |         out1 = self.bn1_1(out1_1234)
112 |         out1 = self.relu(out1)
113 |         out1 = self.conv3(out1)
114 |         out1 = self.bn2(out1)
115 | 
116 |         out2 = self.bn1_2(out2_1234)
117 | 
118 |         out = out1 + out2
119 |         out = self.relu(out)
120 | 
121 |         return out
122 | 
123 | 
124 | class fcrn_depth(nn.Module):
125 | 
126 |     def __init__(self):
127 |         super(fcrn_depth, self).__init__()
128 |         self.inplanes = 64
129 |         # self.n_classes = n_classes
130 | 
131 |         # ResNet with out avrgpool & fc
132 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
133 |         self.bn1 = nn.BatchNorm2d(64)
134 |         self.relu = nn.ReLU(inplace=True)
135 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
136 |         self.layer1 = self._make_layer(Bottleneck, 64, 3, stride=1)
137 |         self.layer2 = self._make_layer(Bottleneck, 128, 4, stride=2)
138 |         self.layer3 = self._make_layer(Bottleneck, 256, 6, stride=2)
139 |         self.layer4 = self._make_layer(Bottleneck, 512, 3, stride=2)
140 | 
141 |         # Up-Conv layers
142 |         self.conv2 = nn.Conv2d(2048, 1024, kernel_size=1, bias=False)
143 |         self.bn2 = nn.BatchNorm2d(1024)
144 | 
145 |         self.up1 = self._make_upproj_layer(UpProject, 1024, 512)
146 |         self.up2 = self._make_upproj_layer(UpProject, 512, 256)
147 |         self.up3 = self._make_upproj_layer(UpProject, 256, 128)
148 |         self.up4 = self._make_upproj_layer(UpProject, 128, 64)
149 | 
150 |         self.drop = nn.Dropout2d()
151 | 
152 |         self.conv3 = nn.Conv2d(64, 1, 3, padding=1)
153 | 
154 |         # initialize
155 |         if True:
156 |             for m in self.modules():
157 |                 if isinstance(m, nn.Conv2d):
158 |                     n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
159 |                     m.weight.data.normal_(0, math.sqrt(2. / n))
160 |                 elif isinstance(m, nn.BatchNorm2d):
161 |                     m.weight.data.fill_(1)
162 |                     m.bias.data.zero_()
163 | 
164 |     def _make_layer(self, block, planes, blocks, stride=1):
165 |         downsample = None
166 |         if stride != 1 or self.inplanes != planes * block.expansion:
167 |             downsample = nn.Sequential(
168 |                 nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1,
169 |                           stride=stride, bias=False),
170 |                 nn.BatchNorm2d(planes * block.expansion),
171 |             )
172 | 
173 |         layers = []
174 |         layers.append(block(self.inplanes, planes, stride, downsample))
175 |         self.inplanes = planes * block.expansion
176 |         for i in range(1, blocks):
177 |             layers.append(block(self.inplanes, planes))
178 | 
179 |         return nn.Sequential(*layers)
180 | 
181 |     def _make_upproj_layer(self, block, in_channels, out_channels):
182 |         return block(in_channels, out_channels)
183 | 
184 |     def forward(self, x):
185 |         inp_shape = x.shape[2:]
186 | 
187 |         x = self.conv1(x)
188 |         x = self.bn1(x)
189 |         x = self.relu(x)
190 |         x = self.maxpool(x)
191 | 
192 |         x = self.layer1(x)
193 |         x = self.layer2(x)
194 |         x = self.layer3(x)
195 |         x = self.layer4(x)
196 | 
197 |         x = self.conv2(x)
198 |         x = self.bn2(x)
199 | 
200 |         x = self.up1(x)
201 |         x = self.up2(x)
202 |         x = self.up3(x)
203 |         x = self.up4(x)
204 | 
205 |         x = self.drop(x)
206 | 
207 |         x = self.conv3(x)
208 |         x = self.relu(x)
209 | 
210 |         x = nn.functional.interpolate(x, size=inp_shape, mode='bilinear', align_corners=True)
211 | 
212 |         return x
213 | 
214 |     def init_resnet50_params(self, resnet50):
215 |         initial_state_dict = resnet50.state_dict()
216 |         return initial_state_dict
217 | 
218 |     # if you want to load from downloaded pretrained model:
219 |     # def init_resnet50_params(self, model_path):
220 |     #     init_state_dict = torch.load(model_path)
221 |     #     return init_state_dict


--------------------------------------------------------------------------------
/ptsemseg/models/fcrn_seg.py:
--------------------------------------------------------------------------------
  1 | # adapted from https://github.com/XPFly1989/FCRN
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional
  6 | import math
  7 | 
  8 | ### fcrn for semantic segmentation ###
  9 | # based on ResNet50
 10 | 
 11 | class Bottleneck(nn.Module):
 12 |     expansion = 4
 13 | 
 14 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 15 |         super(Bottleneck, self).__init__()
 16 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 17 |         self.bn1 = nn.BatchNorm2d(planes)
 18 | 
 19 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1,
 20 |                                bias=False)
 21 |         self.bn2 = nn.BatchNorm2d(planes)
 22 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 23 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 24 |         self.relu = nn.ReLU(inplace=True)
 25 |         self.downsample = downsample
 26 |         self.stride = stride
 27 | 
 28 |     def forward(self, x):
 29 |         residual = x
 30 | 
 31 |         out = self.conv1(x)
 32 |         out = self.bn1(out)
 33 |         out = self.relu(out)
 34 | 
 35 |         out = self.conv2(out)
 36 |         out = self.bn2(out)
 37 |         out = self.relu(out)
 38 | 
 39 |         out = self.conv3(out)
 40 |         out = self.bn3(out)
 41 | 
 42 |         if self.downsample is not None:
 43 |             residual = self.downsample(x)
 44 | 
 45 |         out += residual
 46 |         out = self.relu(out)
 47 | 
 48 |         return out
 49 | 
 50 | 
 51 | class UpProject(nn.Module):
 52 | 
 53 |     def __init__(self, in_channels, out_channels):
 54 |         super(UpProject, self).__init__()
 55 | 
 56 |         self.conv1_1 = nn.Conv2d(in_channels, out_channels, 3)
 57 |         self.conv1_2 = nn.Conv2d(in_channels, out_channels, (2, 3))
 58 |         self.conv1_3 = nn.Conv2d(in_channels, out_channels, (3, 2))
 59 |         self.conv1_4 = nn.Conv2d(in_channels, out_channels, 2)
 60 | 
 61 |         self.conv2_1 = nn.Conv2d(in_channels, out_channels, 3)
 62 |         self.conv2_2 = nn.Conv2d(in_channels, out_channels, (2, 3))
 63 |         self.conv2_3 = nn.Conv2d(in_channels, out_channels, (3, 2))
 64 |         self.conv2_4 = nn.Conv2d(in_channels, out_channels, 2)
 65 | 
 66 |         self.bn1_1 = nn.BatchNorm2d(out_channels)
 67 |         self.bn1_2 = nn.BatchNorm2d(out_channels)
 68 | 
 69 |         self.relu = nn.ReLU(inplace=True)
 70 | 
 71 |         self.conv3 = nn.Conv2d(out_channels, out_channels, 3, padding=1)
 72 | 
 73 |         self.bn2 = nn.BatchNorm2d(out_channels)
 74 | 
 75 |     def forward(self, x):
 76 |         batch_size = x.size()[0]
 77 |         out1_1 = self.conv1_1(nn.functional.pad(x, (1, 1, 1, 1)))
 78 |         #out1_2 = self.conv1_2(nn.functional.pad(x, (1, 1, 0, 1)))#right interleaving padding
 79 |         out1_2 = self.conv1_2(nn.functional.pad(x, (1, 1, 1, 0)))#author's interleaving pading in github
 80 |         #out1_3 = self.conv1_3(nn.functional.pad(x, (0, 1, 1, 1)))#right interleaving padding
 81 |         out1_3 = self.conv1_3(nn.functional.pad(x, (1, 0, 1, 1)))#author's interleaving pading in github
 82 |         #out1_4 = self.conv1_4(nn.functional.pad(x, (0, 1, 0, 1)))#right interleaving padding
 83 |         out1_4 = self.conv1_4(nn.functional.pad(x, (1, 0, 1, 0)))#author's interleaving pading in github
 84 | 
 85 |         out2_1 = self.conv2_1(nn.functional.pad(x, (1, 1, 1, 1)))
 86 |         #out2_2 = self.conv2_2(nn.functional.pad(x, (1, 1, 0, 1)))#right interleaving padding
 87 |         out2_2 = self.conv2_2(nn.functional.pad(x, (1, 1, 1, 0)))#author's interleaving pading in github
 88 |         #out2_3 = self.conv2_3(nn.functional.pad(x, (0, 1, 1, 1)))#right interleaving padding
 89 |         out2_3 = self.conv2_3(nn.functional.pad(x, (1, 0, 1, 1)))#author's interleaving pading in github
 90 |         #out2_4 = self.conv2_4(nn.functional.pad(x, (0, 1, 0, 1)))#right interleaving padding
 91 |         out2_4 = self.conv2_4(nn.functional.pad(x, (1, 0, 1, 0)))#author's interleaving pading in github
 92 | 
 93 |         height = out1_1.size()[2]
 94 |         width = out1_1.size()[3]
 95 | 
 96 |         out1_1_2 = torch.stack((out1_1, out1_2), dim=-3).permute(0, 1, 3, 4, 2).contiguous().view(
 97 |             batch_size, -1, height, width * 2)
 98 |         out1_3_4 = torch.stack((out1_3, out1_4), dim=-3).permute(0, 1, 3, 4, 2).contiguous().view(
 99 |             batch_size, -1, height, width * 2)
100 | 
101 |         out1_1234 = torch.stack((out1_1_2, out1_3_4), dim=-3).permute(0, 1, 3, 2, 4).contiguous().view(
102 |             batch_size, -1, height * 2, width * 2)
103 | 
104 |         out2_1_2 = torch.stack((out2_1, out2_2), dim=-3).permute(0, 1, 3, 4, 2).contiguous().view(
105 |             batch_size, -1, height, width * 2)
106 |         out2_3_4 = torch.stack((out2_3, out2_4), dim=-3).permute(0, 1, 3, 4, 2).contiguous().view(
107 |             batch_size, -1, height, width * 2)
108 | 
109 |         out2_1234 = torch.stack((out2_1_2, out2_3_4), dim=-3).permute(0, 1, 3, 2, 4).contiguous().view(
110 |             batch_size, -1, height * 2, width * 2)
111 | 
112 |         out1 = self.bn1_1(out1_1234)
113 |         out1 = self.relu(out1)
114 |         out1 = self.conv3(out1)
115 |         out1 = self.bn2(out1)
116 | 
117 |         out2 = self.bn1_2(out2_1234)
118 | 
119 |         out = out1 + out2
120 |         out = self.relu(out)
121 | 
122 |         return out
123 | 
124 | 
125 | class fcrn_seg(nn.Module):
126 | 
127 |     def __init__(self, n_classes=19):
128 |         super(fcrn_seg, self).__init__()
129 |         self.inplanes = 64
130 |         self.n_classes = n_classes
131 | 
132 |         # ResNet with out avrgpool & fc
133 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
134 |         self.bn1 = nn.BatchNorm2d(64)
135 |         self.relu = nn.ReLU(inplace=True)
136 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
137 |         self.layer1 = self._make_layer(Bottleneck, 64, 3, stride=1)
138 |         self.layer2 = self._make_layer(Bottleneck, 128, 4, stride=2)
139 |         self.layer3 = self._make_layer(Bottleneck, 256, 6, stride=2)
140 |         self.layer4 = self._make_layer(Bottleneck, 512, 3, stride=2)
141 | 
142 |         # Up-Conv layers
143 |         self.conv2 = nn.Conv2d(2048, 1024, kernel_size=1, bias=False)
144 |         self.bn2 = nn.BatchNorm2d(1024)
145 | 
146 |         self.up1 = self._make_upproj_layer(UpProject, 1024, 512)
147 |         self.up2 = self._make_upproj_layer(UpProject, 512, 256)
148 |         self.up3 = self._make_upproj_layer(UpProject, 256, 128)
149 |         self.up4 = self._make_upproj_layer(UpProject, 128, 64)
150 | 
151 |         self.drop = nn.Dropout2d()
152 | 
153 |         # for segmentation
154 |         self.conv3 = nn.Conv2d(64, self.n_classes, 3, padding=1)
155 | 
156 |         # initialize
157 |         if True:
158 |             for m in self.modules():
159 |                 if isinstance(m, nn.Conv2d):
160 |                     n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
161 |                     m.weight.data.normal_(0, math.sqrt(2. / n))
162 |                 elif isinstance(m, nn.BatchNorm2d):
163 |                     m.weight.data.fill_(1)
164 |                     m.bias.data.zero_()
165 | 
166 |     def _make_layer(self, block, planes, blocks, stride=1):
167 |         downsample = None
168 |         if stride != 1 or self.inplanes != planes * block.expansion:
169 |             downsample = nn.Sequential(
170 |                 nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1,
171 |                           stride=stride, bias=False),
172 |                 nn.BatchNorm2d(planes * block.expansion),
173 |             )
174 | 
175 |         layers = []
176 |         layers.append(block(self.inplanes, planes, stride, downsample))
177 |         self.inplanes = planes * block.expansion
178 |         for i in range(1, blocks):
179 |             layers.append(block(self.inplanes, planes))
180 | 
181 |         return nn.Sequential(*layers)
182 | 
183 |     def _make_upproj_layer(self, block, in_channels, out_channels):
184 |         return block(in_channels, out_channels)
185 | 
186 |     def forward(self, x):
187 |         inp_shape = x.shape[2:]
188 | 
189 |         x = self.conv1(x)
190 |         x = self.bn1(x)
191 |         x = self.relu(x)
192 |         x = self.maxpool(x)
193 | 
194 |         x = self.layer1(x)
195 |         x = self.layer2(x)
196 |         x = self.layer3(x)
197 |         x = self.layer4(x)
198 | 
199 |         x = self.conv2(x)
200 |         x = self.bn2(x)
201 | 
202 |         x = self.up1(x)
203 |         x = self.up2(x)
204 |         x = self.up3(x)
205 |         x = self.up4(x)
206 | 
207 |         x = self.drop(x)
208 | 
209 |         x = self.conv3(x)
210 |         x = self.relu(x)
211 | 
212 |         x = nn.functional.interpolate(x, size=inp_shape, mode='bilinear', align_corners=True)
213 | 
214 |         return x
215 | 
216 |     def init_resnet50_params(self, resnet50):
217 |         initial_state_dict = resnet50.state_dict()
218 |         return initial_state_dict
219 | 
220 |     # if you want to load from downloaded pretrained model:
221 |     # def init_resnet50_params(self, model_path):
222 |     #     init_state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)
223 |     #     return init_state_dict


--------------------------------------------------------------------------------
/ptsemseg/models/frrn_depth.py:
--------------------------------------------------------------------------------
  1 | # taken from https://github.com/meetshah1995/pytorch-semseg
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import functools
  7 | 
  8 | from ptsemseg.models.utils import *
  9 | 
 10 | # frrn for depth
 11 | 
 12 | # n_blocks, channel, scale
 13 | frrn_specs_dic = {
 14 |     "A": {
 15 |         "encoder": [[3, 96, 2], [4, 192, 4], [2, 384, 8], [2, 384, 16]],
 16 |         "decoder": [[2, 192, 8], [2, 192, 4], [2, 48, 2]],
 17 |     },
 18 |     "B": {
 19 |         "encoder": [[3, 96, 2], [4, 192, 4], [2, 384, 8], [2, 384, 16], [2, 384, 32]],
 20 |         "decoder": [[2, 192, 16], [2, 192, 8], [2, 192, 4], [2, 96, 2]],
 21 |     },
 22 | }
 23 | 
 24 | 
 25 | class frrn_depth(nn.Module):
 26 |     """
 27 |     Full Resolution Residual Networks for Semantic Segmentation
 28 |     URL: https://arxiv.org/abs/1611.08323
 29 | 
 30 |     References:
 31 |     1) Original Author's code: https://github.com/TobyPDE/FRRN
 32 |     2) TF implementation by @kiwonjoon: https://github.com/hiwonjoon/tf-frrn
 33 |     """
 34 | 
 35 |     def __init__(self,
 36 |                  model_type="A",
 37 |                  group_norm=False,
 38 |                  n_groups=16):
 39 |         super(frrn_depth, self).__init__()
 40 |         self.model_type = model_type
 41 |         self.group_norm = group_norm
 42 |         self.n_groups = n_groups
 43 | 
 44 |         if self.group_norm:
 45 |             self.conv1 = conv2DGroupNormRelu(3, 48, 5, 1, 2)
 46 |         else:
 47 |             self.conv1 = conv2DBatchNormRelu(3, 48, 5, 1, 2)
 48 | 
 49 |         self.up_residual_units = []
 50 |         self.down_residual_units = []
 51 |         for i in range(3):
 52 |             self.up_residual_units.append(RU(channels=48,
 53 |                                              kernel_size=3,
 54 |                                              strides=1,
 55 |                                              group_norm=self.group_norm,
 56 |                                              n_groups=self.n_groups))
 57 |             self.down_residual_units.append(RU(channels=48,
 58 |                                                kernel_size=3,
 59 |                                                strides=1,
 60 |                                                group_norm=self.group_norm,
 61 |                                                n_groups=self.n_groups))
 62 | 
 63 |         self.up_residual_units = nn.ModuleList(self.up_residual_units)
 64 |         self.down_residual_units = nn.ModuleList(self.down_residual_units)
 65 | 
 66 |         self.split_conv = nn.Conv2d(
 67 |             48, 32, kernel_size=1, padding=0, stride=1, bias=False
 68 |         )
 69 | 
 70 |         # each spec is as (n_blocks, channels, scale)
 71 |         self.encoder_frru_specs = frrn_specs_dic[self.model_type]["encoder"]
 72 | 
 73 |         self.decoder_frru_specs = frrn_specs_dic[self.model_type]["decoder"]
 74 | 
 75 |         # encoding
 76 |         prev_channels = 48
 77 |         self.encoding_frrus = {}
 78 |         for n_blocks, channels, scale in self.encoder_frru_specs:
 79 |             for block in range(n_blocks):
 80 |                 key = "_".join(map(str, ["encoding_frru", n_blocks, channels, scale, block]))
 81 |                 setattr(self, key, FRRU(prev_channels=prev_channels,
 82 |                                         out_channels=channels,
 83 |                                         scale=scale,
 84 |                                         group_norm=self.group_norm,
 85 |                                         n_groups=self.n_groups),)
 86 |             prev_channels = channels
 87 | 
 88 |         # decoding
 89 |         self.decoding_frrus = {}
 90 |         for n_blocks, channels, scale in self.decoder_frru_specs:
 91 |             # pass through decoding FRRUs
 92 |             for block in range(n_blocks):
 93 |                 key = "_".join(map(str, ["decoding_frru", n_blocks, channels, scale, block]))
 94 |                 setattr(self, key, FRRU(prev_channels=prev_channels,
 95 |                                         out_channels=channels,
 96 |                                         scale=scale,
 97 |                                         group_norm=self.group_norm,
 98 |                                         n_groups=self.n_groups),)
 99 |             prev_channels = channels
100 | 
101 |         self.merge_conv = nn.Conv2d(
102 |             prev_channels + 32, 48, kernel_size=1, padding=0, stride=1, bias=False
103 |         )
104 | 
105 |         self.predict = nn.Conv2d(
106 |             48, 1, kernel_size=3, padding=1, stride=1, bias=True
107 |         )
108 | 
109 |     def forward(self, x):
110 | 
111 |         # pass to initial conv
112 |         x = self.conv1(x)
113 | 
114 |         # pass through residual units
115 |         for i in range(3):
116 |             x = self.up_residual_units[i](x)
117 | 
118 |         # divide stream
119 |         y = x
120 |         z = self.split_conv(x)
121 | 
122 |         prev_channels = 48
123 |         # encoding
124 |         for n_blocks, channels, scale in self.encoder_frru_specs:
125 |             # maxpool bigger feature map
126 |             y_pooled = F.max_pool2d(y, stride=2, kernel_size=2, padding=0)
127 |             # pass through encoding FRRUs
128 |             for block in range(n_blocks):
129 |                 key = "_".join(
130 |                     map(str, ["encoding_frru", n_blocks, channels, scale, block])
131 |                 )
132 |                 y, z = getattr(self, key)(y_pooled, z)
133 |             prev_channels = channels
134 | 
135 |         # decoding
136 |         for n_blocks, channels, scale in self.decoder_frru_specs:
137 |             # bilinear upsample smaller feature map
138 |             upsample_size = torch.Size([_s * 2 for _s in y.size()[-2:]])
139 |             y_upsampled = F.upsample(y, size=upsample_size, mode="bilinear", align_corners=True)
140 |             # pass through decoding FRRUs
141 |             for block in range(n_blocks):
142 |                 key = "_".join(
143 |                     map(str, ["decoding_frru", n_blocks, channels, scale, block])
144 |                 )
145 |                 y, z = getattr(self, key)(y_upsampled, z)
146 |             prev_channels = channels
147 | 
148 |         # merge streams
149 |         x = torch.cat([F.upsample(y, scale_factor=2, mode="bilinear", align_corners=True), z], dim=1)
150 |         x = self.merge_conv(x)
151 | 
152 |         # pass through residual units
153 |         for i in range(3):
154 |             x = self.down_residual_units[i](x)
155 | 
156 |         # final 1x1 conv to get depth
157 |         x = self.predict(x)
158 | 
159 |         return x
160 | 


--------------------------------------------------------------------------------
/ptsemseg/models/frrn_seg.py:
--------------------------------------------------------------------------------
  1 | # taken from https://github.com/meetshah1995/pytorch-semseg
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import functools
  7 | 
  8 | # for segmentation
  9 | 
 10 | from ptsemseg.models.utils import *
 11 | from ptsemseg.loss import bootstrapped_cross_entropy2d
 12 | 
 13 | # n_blocks, channel, scale
 14 | frrn_specs_dic = {
 15 |     "A": {
 16 |         "encoder": [[3, 96, 2], [4, 192, 4], [2, 384, 8], [2, 384, 16]],
 17 |         "decoder": [[2, 192, 8], [2, 192, 4], [2, 48, 2]],
 18 |     },
 19 |     "B": {
 20 |         "encoder": [[3, 96, 2], [4, 192, 4], [2, 384, 8], [2, 384, 16], [2, 384, 32]],
 21 |         "decoder": [[2, 192, 16], [2, 192, 8], [2, 192, 4], [2, 96, 2]],
 22 |     },
 23 | }
 24 | 
 25 | 
 26 | class frrn_seg(nn.Module):
 27 |     """
 28 |     Full Resolution Residual Networks for Semantic Segmentation
 29 |     URL: https://arxiv.org/abs/1611.08323
 30 | 
 31 |     References: 
 32 |     1) Original Author's code: https://github.com/TobyPDE/FRRN
 33 |     2) TF implementation by @kiwonjoon: https://github.com/hiwonjoon/tf-frrn
 34 |     """
 35 | 
 36 |     def __init__(self, 
 37 |                  n_classes=19,
 38 |                  model_type=None, 
 39 |                  group_norm=False,
 40 |                  n_groups=16):
 41 |         super(frrn_seg, self).__init__()
 42 |         self.n_classes = n_classes
 43 |         self.model_type = model_type
 44 |         self.group_norm = group_norm
 45 |         self.n_groups = n_groups
 46 | 
 47 |         if self.group_norm: 
 48 |             self.conv1 = conv2DGroupNormRelu(3, 48, 5, 1, 2)
 49 |         else:
 50 |             self.conv1 = conv2DBatchNormRelu(3, 48, 5, 1, 2)
 51 | 
 52 |         self.up_residual_units = []
 53 |         self.down_residual_units = []
 54 |         for i in range(3):
 55 |             self.up_residual_units.append(RU(channels=48, 
 56 |                                              kernel_size=3, 
 57 |                                              strides=1,
 58 |                                              group_norm=self.group_norm,
 59 |                                              n_groups=self.n_groups))
 60 |             self.down_residual_units.append(RU(channels=48, 
 61 |                                                kernel_size=3, 
 62 |                                                strides=1,
 63 |                                                group_norm=self.group_norm,
 64 |                                                n_groups=self.n_groups))
 65 | 
 66 |         self.up_residual_units = nn.ModuleList(self.up_residual_units)
 67 |         self.down_residual_units = nn.ModuleList(self.down_residual_units)
 68 | 
 69 |         self.split_conv = nn.Conv2d(
 70 |             48, 32, kernel_size=1, padding=0, stride=1, bias=False
 71 |         )
 72 | 
 73 |         # each spec is as (n_blocks, channels, scale)
 74 |         self.encoder_frru_specs = frrn_specs_dic[self.model_type]["encoder"]
 75 | 
 76 |         self.decoder_frru_specs = frrn_specs_dic[self.model_type]["decoder"]
 77 | 
 78 |         # encoding
 79 |         prev_channels = 48
 80 |         self.encoding_frrus = {}
 81 |         for n_blocks, channels, scale in self.encoder_frru_specs:
 82 |             for block in range(n_blocks):
 83 |                 key = "_".join(map(str, ["encoding_frru", n_blocks, channels, scale, block]))
 84 |                 setattr(self, key, FRRU(prev_channels=prev_channels, 
 85 |                                         out_channels=channels, 
 86 |                                         scale=scale,
 87 |                                         group_norm=self.group_norm,
 88 |                                         n_groups=self.n_groups),)
 89 |             prev_channels = channels
 90 | 
 91 |         # decoding
 92 |         self.decoding_frrus = {}
 93 |         for n_blocks, channels, scale in self.decoder_frru_specs:
 94 |             # pass through decoding FRRUs
 95 |             for block in range(n_blocks):
 96 |                 key = "_".join(map(str, ["decoding_frru", n_blocks, channels, scale, block]))
 97 |                 setattr(self, key, FRRU(prev_channels=prev_channels, 
 98 |                                         out_channels=channels, 
 99 |                                         scale=scale,
100 |                                         group_norm=self.group_norm,
101 |                                         n_groups=self.n_groups),)
102 |             prev_channels = channels
103 | 
104 |         self.merge_conv = nn.Conv2d(
105 |             prev_channels + 32, 48, kernel_size=1, padding=0, stride=1, bias=False
106 |         )
107 | 
108 |         self.classif_conv = nn.Conv2d(
109 |             48, self.n_classes, kernel_size=3, padding=1, stride=1, bias=True
110 |         )
111 | 
112 |     def forward(self, x):
113 | 
114 |         # pass to initial conv
115 |         x = self.conv1(x)
116 | 
117 |         # pass through residual units
118 |         for i in range(3):
119 |             x = self.up_residual_units[i](x)
120 | 
121 |         # divide stream
122 |         y = x
123 |         z = self.split_conv(x)
124 | 
125 |         prev_channels = 48
126 |         # encoding
127 |         for n_blocks, channels, scale in self.encoder_frru_specs:
128 |             # maxpool bigger feature map
129 |             y_pooled = F.max_pool2d(y, stride=2, kernel_size=2, padding=0)
130 |             # pass through encoding FRRUs
131 |             for block in range(n_blocks):
132 |                 key = "_".join(
133 |                     map(str, ["encoding_frru", n_blocks, channels, scale, block])
134 |                 )
135 |                 y, z = getattr(self, key)(y_pooled, z)
136 |             prev_channels = channels
137 | 
138 |         # decoding
139 |         for n_blocks, channels, scale in self.decoder_frru_specs:
140 |             # bilinear upsample smaller feature map
141 |             upsample_size = torch.Size([_s * 2 for _s in y.size()[-2:]])
142 |             y_upsampled = F.upsample(y, size=upsample_size, mode="bilinear", align_corners=True)
143 |             # pass through decoding FRRUs
144 |             for block in range(n_blocks):
145 |                 key = "_".join(
146 |                     map(str, ["decoding_frru", n_blocks, channels, scale, block])
147 |                 )
148 |                 y, z = getattr(self, key)(y_upsampled, z)
149 |             prev_channels = channels
150 | 
151 |         # merge streams
152 |         x = torch.cat([F.upsample(y, scale_factor=2, mode="bilinear", align_corners=True), z], dim=1)
153 |         x = self.merge_conv(x)
154 | 
155 |         # pass through residual units
156 |         for i in range(3):
157 |             x = self.down_residual_units[i](x)
158 | 
159 |         # final 1x1 conv to get classification
160 |         x = self.classif_conv(x)
161 | 
162 |         return x
163 | 


--------------------------------------------------------------------------------
/ptsemseg/models/segnet_depth.py:
--------------------------------------------------------------------------------
 1 | # adapted from https://github.com/meetshah1995/pytorch-semseg
 2 | 
 3 | import torch.nn as nn
 4 | 
 5 | from ptsemseg.models.utils import *
 6 | 
 7 | # segnet for depth
 8 | 
 9 | class segnet_depth(nn.Module):
10 |     def __init__(self, in_channels=3, is_unpooling=True):
11 |         super(segnet_depth, self).__init__()
12 | 
13 |         self.in_channels = in_channels
14 |         self.is_unpooling = is_unpooling
15 | 
16 |         self.down1 = segnetDown2(self.in_channels, 64)
17 |         self.down2 = segnetDown2(64, 128)
18 |         self.down3 = segnetDown3(128, 256)
19 |         self.down4 = segnetDown3(256, 512)
20 |         self.down5 = segnetDown3(512, 512)
21 | 
22 |         self.up5 = segnetUp3(512, 512)
23 |         self.up4 = segnetUp3(512, 256)
24 |         self.up3 = segnetUp3(256, 128)
25 |         self.up2 = segnetUp2(128, 64)
26 |         self.up1 = segnetUp2(64, 1)
27 | 
28 |     def forward(self, inputs):
29 | 
30 |         down1, indices_1, unpool_shape1 = self.down1(inputs)
31 |         down2, indices_2, unpool_shape2 = self.down2(down1)
32 |         down3, indices_3, unpool_shape3 = self.down3(down2)
33 |         down4, indices_4, unpool_shape4 = self.down4(down3)
34 |         down5, indices_5, unpool_shape5 = self.down5(down4)
35 | 
36 |         up5 = self.up5(down5, indices_5, unpool_shape5)
37 |         up4 = self.up4(up5, indices_4, unpool_shape4)
38 |         up3 = self.up3(up4, indices_3, unpool_shape3)
39 |         up2 = self.up2(up3, indices_2, unpool_shape2)
40 |         up1 = self.up1(up2, indices_1, unpool_shape1)
41 | 
42 |         return up1
43 | 
44 |     def init_vgg16_params(self, vgg16):
45 |         blocks = [self.down1, self.down2, self.down3, self.down4, self.down5]
46 | 
47 |         ranges = [[0, 4], [5, 9], [10, 16], [17, 23], [24, 29]]
48 |         features = list(vgg16.features.children())
49 | 
50 |         vgg_layers = []
51 |         for _layer in features:
52 |             if isinstance(_layer, nn.Conv2d):
53 |                 vgg_layers.append(_layer)
54 | 
55 |         merged_layers = []
56 |         for idx, conv_block in enumerate(blocks):
57 |             if idx < 2:
58 |                 units = [conv_block.conv1.cbr_unit, conv_block.conv2.cbr_unit]
59 |             else:
60 |                 units = [
61 |                     conv_block.conv1.cbr_unit,
62 |                     conv_block.conv2.cbr_unit,
63 |                     conv_block.conv3.cbr_unit,
64 |                 ]
65 |             for _unit in units:
66 |                 for _layer in _unit:
67 |                     if isinstance(_layer, nn.Conv2d):
68 |                         merged_layers.append(_layer)
69 | 
70 |         assert len(vgg_layers) == len(merged_layers)
71 | 
72 |         for l1, l2 in zip(vgg_layers, merged_layers):
73 |             if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d):
74 |                 assert l1.weight.size() == l2.weight.size()
75 |                 assert l1.bias.size() == l2.bias.size()
76 |                 l2.weight.data = l1.weight.data
77 |                 l2.bias.data = l1.bias.data
78 | 


--------------------------------------------------------------------------------
/ptsemseg/models/segnet_seg.py:
--------------------------------------------------------------------------------
 1 | # taken from https://github.com/meetshah1995/pytorch-semseg
 2 | 
 3 | import torch.nn as nn
 4 | 
 5 | from ptsemseg.models.utils import *
 6 | 
 7 | # for segmentation
 8 | 
 9 | class segnet_seg(nn.Module):
10 |     def __init__(self, n_classes=21, in_channels=3, is_unpooling=True):
11 |         super(segnet_seg, self).__init__()
12 | 
13 |         self.in_channels = in_channels
14 |         self.is_unpooling = is_unpooling
15 | 
16 |         self.down1 = segnetDown2(self.in_channels, 64)
17 |         self.down2 = segnetDown2(64, 128)
18 |         self.down3 = segnetDown3(128, 256)
19 |         self.down4 = segnetDown3(256, 512)
20 |         self.down5 = segnetDown3(512, 512)
21 | 
22 |         self.up5 = segnetUp3(512, 512)
23 |         self.up4 = segnetUp3(512, 256)
24 |         self.up3 = segnetUp3(256, 128)
25 |         self.up2 = segnetUp2(128, 64)
26 |         self.up1 = segnetUp2(64, n_classes)
27 | 
28 |     def forward(self, inputs):
29 | 
30 |         down1, indices_1, unpool_shape1 = self.down1(inputs)
31 |         down2, indices_2, unpool_shape2 = self.down2(down1)
32 |         down3, indices_3, unpool_shape3 = self.down3(down2)
33 |         down4, indices_4, unpool_shape4 = self.down4(down3)
34 |         down5, indices_5, unpool_shape5 = self.down5(down4)
35 | 
36 |         up5 = self.up5(down5, indices_5, unpool_shape5)
37 |         up4 = self.up4(up5, indices_4, unpool_shape4)
38 |         up3 = self.up3(up4, indices_3, unpool_shape3)
39 |         up2 = self.up2(up3, indices_2, unpool_shape2)
40 |         up1 = self.up1(up2, indices_1, unpool_shape1)
41 | 
42 |         return up1
43 | 
44 |     def init_vgg16_params(self, vgg16):
45 |         blocks = [self.down1, self.down2, self.down3, self.down4, self.down5]
46 | 
47 |         ranges = [[0, 4], [5, 9], [10, 16], [17, 23], [24, 29]]
48 |         features = list(vgg16.features.children())
49 | 
50 |         vgg_layers = []
51 |         for _layer in features:
52 |             if isinstance(_layer, nn.Conv2d):
53 |                 vgg_layers.append(_layer)
54 | 
55 |         merged_layers = []
56 |         for idx, conv_block in enumerate(blocks):
57 |             if idx < 2:
58 |                 units = [conv_block.conv1.cbr_unit, conv_block.conv2.cbr_unit]
59 |             else:
60 |                 units = [
61 |                     conv_block.conv1.cbr_unit,
62 |                     conv_block.conv2.cbr_unit,
63 |                     conv_block.conv3.cbr_unit,
64 |                 ]
65 |             for _unit in units:
66 |                 for _layer in _unit:
67 |                     if isinstance(_layer, nn.Conv2d):
68 |                         merged_layers.append(_layer)
69 | 
70 |         assert len(vgg_layers) == len(merged_layers)
71 | 
72 |         for l1, l2 in zip(vgg_layers, merged_layers):
73 |             if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d):
74 |                 assert l1.weight.size() == l2.weight.size()
75 |                 assert l1.bias.size() == l2.bias.size()
76 |                 l2.weight.data = l1.weight.data
77 |                 l2.bias.data = l1.bias.data
78 | 


--------------------------------------------------------------------------------
/ptsemseg/optimizers/__init__.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import logging
 3 | import functools
 4 | 
 5 | from torch.optim import SGD
 6 | from torch.optim import Adam
 7 | from torch.optim import ASGD
 8 | from torch.optim import Adamax
 9 | from torch.optim import Adadelta
10 | from torch.optim import Adagrad
11 | from torch.optim import RMSprop
12 | 
13 | logger = logging.getLogger('ptsemseg')
14 | 
15 | key2opt =  {'sgd': SGD,
16 |             'adam': Adam,
17 |             'asgd': ASGD,
18 |             'adamax': Adamax,
19 |             'adadelta': Adadelta,
20 |             'adagrad': Adagrad,
21 |             'rmsprop': RMSprop,}
22 | 
23 | def get_optimizer(cfg):
24 |     if cfg['training']['optimizer'] is None:
25 |         logger.info("Using SGD optimizer")
26 |         return SGD
27 | 
28 |     else:
29 |         opt_name = cfg['training']['optimizer']['name']
30 |         if opt_name not in key2opt:
31 |             raise NotImplementedError('Optimizer {} not implemented'.format(opt_name))
32 | 
33 |         logger.info('Using {} optimizer'.format(opt_name))
34 |         return key2opt[opt_name]
35 | 


--------------------------------------------------------------------------------
/ptsemseg/schedulers/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging 
 2 | from ptsemseg.schedulers.schedulers import *
 3 | 
 4 | logger = logging.getLogger('ptsemseg')
 5 | 
 6 | key2scheduler = {'constant_lr': ConstantLR,
 7 |                  'poly_lr': PolynomialLR,
 8 |                  'multi_step': MultiStepLR,
 9 |                  'cosine_annealing': CosineAnnealingLR,
10 |                  'exp_lr': ExponentialLR}
11 | 
12 | 
13 | def get_scheduler(optimizer, scheduler_dict):
14 |     if scheduler_dict is None:
15 |         logger.info('Using No LR Scheduling')
16 |         return ConstantLR(optimizer)
17 |     
18 |     s_type = scheduler_dict['name']
19 |     scheduler_dict.pop('name')
20 | 
21 |     logger.info('Using {} scheduler with {} params'.format(s_type,
22 |                                                             scheduler_dict))
23 | 
24 |     warmup_dict = {} 
25 |     if 'warmup_iters' in scheduler_dict:
26 |         # This can be done in a more pythonic way... 
27 |         warmup_dict['warmup_iters'] = scheduler_dict.get('warmup_iters', 100)
28 |         warmup_dict['mode'] = scheduler_dict.get('warmup_mode', 'linear')
29 |         warmup_dict['gamma'] = scheduler_dict.get('warmup_factor', 0.2)
30 | 
31 |         logger.info('Using Warmup with {} iters {} gamma and {} mode'.format(
32 |                                         warmup_dict['warmup_iters'],
33 |                                         warmup_dict['gamma'],
34 |                                         warmup_dict['mode']))
35 | 
36 |         scheduler_dict.pop('warmup_iters', None) 
37 |         scheduler_dict.pop('warmup_mode', None)
38 |         scheduler_dict.pop('warmup_factor', None) 
39 | 
40 |         base_scheduler = key2scheduler[s_type](optimizer, **scheduler_dict)
41 |         return WarmUpLR(optimizer, base_scheduler, **warmup_dict)
42 | 
43 |     return key2scheduler[s_type](optimizer, **scheduler_dict)
44 | 


--------------------------------------------------------------------------------
/ptsemseg/schedulers/schedulers.py:
--------------------------------------------------------------------------------
 1 | # taken from https://github.com/meetshah1995/pytorch-semseg
 2 | 
 3 | import torch
 4 | 
 5 | from torch.optim.lr_scheduler import _LRScheduler
 6 | from torch.optim.lr_scheduler import MultiStepLR 
 7 | from torch.optim.lr_scheduler import ExponentialLR
 8 | from torch.optim.lr_scheduler import CosineAnnealingLR
 9 | from torch.optim.lr_scheduler import ReduceLROnPlateau
10 | 
11 | 
12 | class ConstantLR(_LRScheduler):
13 |     def __init__(self, optimizer, last_epoch=-1):
14 |         super(ConstantLR, self).__init__(optimizer, last_epoch)
15 | 
16 |     def get_lr(self):
17 |         return [base_lr for base_lr in self.base_lrs]
18 | 
19 | 
20 | class PolynomialLR(_LRScheduler):
21 |     def __init__(self, optimizer, max_iter, decay_iter=1, 
22 |                  gamma=0.9, last_epoch=-1):
23 |         self.decay_iter = decay_iter
24 |         self.max_iter = max_iter
25 |         self.gamma = gamma
26 |         super(PolynomialLR, self).__init__(optimizer, last_epoch)
27 | 
28 |     def get_lr(self):
29 |         if self.last_epoch % self.decay_iter or self.last_epoch % self.max_iter:
30 |             return [base_lr for base_lr in self.base_lrs]
31 |         else:
32 |             factor = (1 - self.last_epoch / float(self.max_iter)) ** self.gamma
33 |             return [base_lr * factor for base_lr in self.base_lrs] 
34 | 
35 | class WarmUpLR(_LRScheduler):
36 |     def __init__(self, optimizer, scheduler, mode='linear', 
37 |                  warmup_iters=100, gamma=0.2, last_epoch=-1):
38 |         self.mode = mode
39 |         self.scheduler = scheduler
40 |         self.warmup_iters = warmup_iters
41 |         self.gamma = gamma
42 |         super(WarmUpLR, self).__init__(optimizer, last_epoch)
43 | 
44 |     def get_lr(self):
45 |         cold_lrs = self.scheduler.get_lr()
46 | 
47 |         if self.last_epoch < self.warmup_iters:
48 |             if self.mode == 'linear':
49 |                 alpha = self.last_epoch / float(self.warmup_iters)
50 |                 factor = self.gamma * (1 - alpha) + alpha
51 | 
52 |             elif self.mode == 'constant': 
53 |                 factor = self.gamma
54 |             else:
55 |                 raise KeyError('WarmUp type {} not implemented'.format(self.mode))
56 | 
57 |             return [factor * base_lr for base_lr in cold_lrs]
58 | 
59 |         return cold_lrs
60 | 


--------------------------------------------------------------------------------
/ptsemseg/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Misc Utility functions
 3 | taken from https://github.com/meetshah1995/pytorch-semseg
 4 | """
 5 | import os
 6 | import logging
 7 | import datetime
 8 | import numpy as np
 9 | 
10 | from collections import OrderedDict
11 | 
12 | def recursive_glob(rootdir=".", suffix=""):
13 |     """Performs recursive glob with given suffix and rootdir 
14 |         :param rootdir is the root directory
15 |         :param suffix is the suffix to be searched
16 |     """
17 |     image_paths = []
18 |     for looproot, _, filenames in os.walk(rootdir):
19 |         for filename in filenames:
20 |             if filename.endswith(suffix):
21 |                 image_paths.append(os.path.join(looproot, filename))
22 |     return image_paths
23 | 
24 | 
25 | def alpha_blend(input_image, segmentation_mask, alpha=0.5):
26 |     """Alpha Blending utility to overlay RGB masks on RBG images 
27 |         :param input_image is a np.ndarray with 3 channels
28 |         :param segmentation_mask is a np.ndarray with 3 channels
29 |         :param alpha is a float value
30 | 
31 |     """
32 |     blended = np.zeros(input_image.size, dtype=np.float32)
33 |     blended = input_image * alpha + segmentation_mask * (1 - alpha)
34 |     return blended
35 | 
36 | 
37 | def get_logger(logdir):
38 |     logger = logging.getLogger('ptsemseg')
39 |     ts = str(datetime.datetime.now()).split('.')[0].replace(" ", "_")
40 |     ts = ts.replace(":", "_").replace("-","_")
41 |     file_path = os.path.join(logdir, 'run_{}.log'.format(ts))
42 |     hdlr = logging.FileHandler(file_path)
43 |     formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
44 |     hdlr.setFormatter(formatter)
45 |     logger.addHandler(hdlr) 
46 |     logger.setLevel(logging.INFO)
47 |     return logger
48 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | matplotlib==2.2.2
 2 | numpy==1.14.2
 3 | scipy==1.0.1
 4 | torch==0.4.1
 5 | torchvision==0.2.1
 6 | tqdm==4.23.0
 7 | pydensecrf
 8 | protobuf
 9 | tensorboardX
10 | blessings
11 | progressbar2
12 | path.py
13 | imageio
14 | 


--------------------------------------------------------------------------------
/saliency.py:
--------------------------------------------------------------------------------
 1 | # adapted from https://github.com/kazuto1011/grad-cam-pytorch
 2 | 
 3 | from collections import OrderedDict
 4 | 
 5 | import numpy as np
 6 | import torch
 7 | import torch.nn as nn
 8 | from torch.nn import functional as F
 9 | 
10 | 
11 | class _PropagationBase(object):
12 |     def __init__(self, model, task):
13 |         super(_PropagationBase, self).__init__()
14 |         self.device = next(model.parameters()).device
15 |         self.model = model
16 |         self.image = None
17 |         self.task = task
18 | 
19 |     def _encode_one_hot(self, pos_i, pos_j, idx):
20 |         one_hot = torch.FloatTensor(self.preds.size()).zero_()
21 |         one_hot[0][idx][pos_i][pos_j] = 1.0
22 |         return one_hot.to(self.device)
23 | 
24 |     def forward(self, image):
25 |         self.image = image.requires_grad_()
26 |         self.model.zero_grad()  # Sets gradients of all model parameters to zero
27 |         self.preds = self.model(self.image)  # [1, 19, h, w]
28 | 
29 |         self.height = image.size()[2]
30 |         self.width = image.size()[3]
31 |         if self.task == "seg":
32 |             self.pred_idx = np.squeeze(self.preds.data.max(1)[1].cpu().numpy(), axis=0)  # [h, w]
33 |         if self.task == "depth":
34 |             self.pred_idx = np.zeros((self.height, self.width), dtype=int)
35 | 
36 |         return self.pred_idx
37 | 
38 |     def backward(self, pos_i, pos_j, idx):
39 |         one_hot = self._encode_one_hot(pos_i, pos_j, idx) # [1, 19, h, w]
40 |         self.preds.backward(gradient=one_hot, retain_graph=True)  # Computes the gradient of current tensor w.r.t. graph leaves
41 | 
42 | 
43 | class BackPropagation(_PropagationBase):
44 |     def generate(self):
45 |         # produce vanilla bp map
46 |         image_grads_vanilla = self.image.grad.detach().cpu().numpy().copy()  # [1, 3, h, w]
47 |         output_vanilla_bp = image_grads_vanilla.transpose(0,2,3,1)[0]
48 | 
49 |         # produce bp saliency map
50 |         image_grads_abs = np.abs(image_grads_vanilla)
51 |         output_saliency = image_grads_abs.transpose(0,2,3,1)[0]
52 |         output_saliency = np.max(output_saliency, axis=2)
53 |         self.image.grad.data.zero_()
54 | 
55 |         return output_vanilla_bp, output_saliency  # [h, w, 3]


--------------------------------------------------------------------------------
/saliency_analysis.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.stats import normaltest, wilcoxon
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | 
  6 | num_img = 100
  7 | num_metric = 6
  8 | model_name = "fcn"   # TODO: to modify for other models, options: ["fcn", "frrn", "segnet", "deeplab", "fcrn", "dispnet"]
  9 | 
 10 | ### fcrn ###
 11 | seg_fcrn_pixel = np.load("saliency_eval_pixel/seg_fcrn_pixel.npy")[0:num_img]  # [100, 6, 1078]
 12 | depth_fcrn_pixel = np.load("saliency_eval_pixel/depth_fcrn_pixel.npy")[0:num_img]  #
 13 | fcrn_iou_pixel = np.load("saliency_eval_pixel/fcrn_iou.npy")[0:num_img]   # [100, 4, 266]  # 4 thresholds, 266 pixels for each image
 14 | 
 15 | 
 16 | ## deeplab ###
 17 | seg_deeplab_pixel = np.load("saliency_eval_pixel/seg_deeplab_pixel.npy")[0:num_img]  # [100, 6, 1078]
 18 | depth_deeplab_pixel = np.load("saliency_eval_pixel/depth_deeplab_pixel.npy")[0:num_img]  #
 19 | deeplab_iou_pixel = np.load("saliency_eval_pixel/deeplab_iou.npy")[0:num_img]
 20 | 
 21 | 
 22 | ### dispnet ###
 23 | seg_dispnet_pixel = np.load("saliency_eval_pixel/seg_dispnet_pixel.npy")[0:num_img]  #
 24 | depth_dispnet_pixel = np.load("saliency_eval_pixel/depth_dispnet_pixel.npy")[0:num_img]  #
 25 | dispnet_iou_pixel = np.load("saliency_eval_pixel/dispnet_iou.npy")[0:num_img]
 26 | 
 27 | 
 28 | ### frrn ###
 29 | seg_frrn_pixel = np.load("saliency_eval_pixel/seg_frrnA_pixel.npy")[0:num_img]  #
 30 | depth_frrn_pixel = np.load("saliency_eval_pixel/depth_frrnA_pixel.npy")[0:num_img]  #
 31 | frrn_iou_pixel = np.load("saliency_eval_pixel/frrnA_iou.npy")[0:num_img]
 32 | 
 33 | 
 34 | ### segnet ###
 35 | seg_segnet_pixel = np.load("saliency_eval_pixel/seg_segnet_pixel.npy")[0:num_img]  #
 36 | depth_segnet_pixel = np.load("saliency_eval_pixel/depth_segnet_pixel.npy")[0:num_img]  #
 37 | segnet_iou_pixel = np.load("saliency_eval_pixel/segnet_iou.npy")[0:num_img]
 38 | 
 39 | ### fcn ###
 40 | seg_fcn_pixel = np.load("saliency_eval_pixel/seg_fcn_pixel.npy")[0:num_img]  #
 41 | depth_fcn_pixel = np.load("saliency_eval_pixel/depth_fcn_pixel.npy")[0:num_img]  #
 42 | fcn_iou_pixel = np.load("saliency_eval_pixel/fcn_iou.npy")[0:num_img]
 43 | 
 44 | # range for hist for 6 pixel radius metrics
 45 | range_hist_min = [0, 0, 0, 0.01, 0.0002, 0.000010]  # for all metrics
 46 | range_hist_max = [0.35, 0.35, 0.35, 0.10, 0.0010, 0.00004]
 47 | # range_hist_min = [0.08, 0, 0, 0.01, 0, 0.000010]   # for threshold=0.1
 48 | # range_hist_max = [0.35, 0.35, 0.35, 0.08, 0.0010, 0.00004]
 49 | 
 50 | if model_name == "fcn":
 51 |     seg_pixel = seg_fcn_pixel
 52 |     depth_pixel = depth_fcn_pixel
 53 |     iou_pixel = fcn_iou_pixel
 54 | if model_name == "frrn":
 55 |     seg_pixel = seg_frrn_pixel
 56 |     depth_pixel = depth_frrn_pixel
 57 |     iou_pixel = frrn_iou_pixel
 58 | if model_name == "segnet":
 59 |     seg_pixel = seg_segnet_pixel
 60 |     depth_pixel = depth_segnet_pixel
 61 |     iou_pixel = segnet_iou_pixel
 62 | if model_name == "deeplab":
 63 |     seg_pixel = seg_deeplab_pixel
 64 |     depth_pixel = depth_deeplab_pixel
 65 |     iou_pixel = deeplab_iou_pixel
 66 | if model_name == "fcrn":
 67 |     seg_pixel = seg_fcrn_pixel
 68 |     depth_pixel = depth_fcrn_pixel
 69 |     iou_pixel = fcrn_iou_pixel
 70 | if model_name == "dispnet":
 71 |     seg_pixel = seg_dispnet_pixel
 72 |     depth_pixel = depth_dispnet_pixel
 73 |     iou_pixel = dispnet_iou_pixel
 74 | 
 75 | threshold__pixel = [0.1, 0.5, 0.9, 0.1, 0.5, 0.9]
 76 | name_pixel = ['act_d', 'act_d', 'act_d', 'act_ratio', 'act_ratio', 'act_ratio']
 77 | 
 78 | 
 79 | ### significant analysis ###
 80 | for i in range(num_metric):
 81 |     mean_seg_img_list = []  # store mean value over all pixels for each image, length: 100 (100*1)
 82 |     mean_depth_img_list = []
 83 |     p_value_list = []
 84 |     count = 0
 85 |     seg_pixel_metric = seg_pixel[:, i, :]  # [100, 1078]  metric map i for 100 images, 1078 pixels/image
 86 |     depth_pixel_metric = depth_pixel[:, i, :]
 87 | 
 88 |     for k in range(num_img):
 89 |         p_value = wilcoxon(seg_pixel_metric[k], depth_pixel_metric[k])[1]
 90 |         p_value_list.append(p_value)
 91 |         mean_seg = np.mean(seg_pixel_metric[k])
 92 |         mean_depth = np.mean(depth_pixel_metric[k])
 93 |         mean_seg_img_list.append(mean_seg)
 94 |         mean_depth_img_list.append(mean_depth)
 95 |         if mean_seg < mean_depth and p_value < 0.05:
 96 |             count += 1
 97 | 
 98 |     n, bins, patches = plt.hist(x=mean_seg_img_list, bins='auto', color='red',
 99 |                                 range=(range_hist_min[i], range_hist_max[i]), alpha=0.5)
100 |     plt.grid(axis='y', alpha=0.75)
101 |     plt.xlabel('mean value of {}>={} of each image'.format(name_pixel[i], threshold__pixel[i]))
102 |     plt.ylabel('number of images')
103 |     plt.title('{}: {}>={}'.format(model_name, name_pixel[i], threshold__pixel[i]))
104 |     plt.text(23, 45, r'$\mu=15, b=3$')
105 |     plt.hist(x=mean_depth_img_list, bins=bins, range=(range_hist_min[i], range_hist_max[i]), color='blue', alpha=0.5)
106 |     plt.ylim(0, 35)
107 |     if i == 0 or i == 3:
108 |         plt.savefig('saliency_eval_hist/{}_metric_{}.png'.format(model_name, i))
109 |     plt.show()
110 | 
111 |     print("metric ", i)
112 |     print("number of images fitting assumption:",count/num_img)
113 |     print("mean seg:", np.mean(np.array(mean_seg_img_list)))  # mean over all pixels in all images
114 |     print("mean depth:", np.mean(np.array(mean_depth_img_list)))
115 | 
116 | 
117 | for i in range(4):
118 |     iou_pixel_metric = iou_pixel[:, i, :]
119 |     iou_img_metric = np.mean(iou_pixel_metric)
120 |     print("metric ", i)
121 |     print("mean iou over all pixels over all imgs: ", iou_img_metric)
122 | 
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/saliency_eval.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import argparse
  4 | import numpy as np
  5 | import torch
  6 | import scipy.misc as m
  7 | import cv2
  8 | from torch.utils import data
  9 | from tqdm import tqdm
 10 | from joblib import Parallel, delayed
 11 | 
 12 | from ptsemseg.models.fcn_seg import *
 13 | from ptsemseg.models.segnet_seg import *
 14 | from ptsemseg.models.frrn_seg import *
 15 | from ptsemseg.models.deeplab_seg import *
 16 | from ptsemseg.models.fcrn_seg import *
 17 | from ptsemseg.models.dispnet_seg import *
 18 | 
 19 | from ptsemseg.models.fcn_depth import *
 20 | from ptsemseg.models.segnet_depth import *
 21 | from ptsemseg.models.frrn_depth import *
 22 | from ptsemseg.models.deeplab_depth import *
 23 | from ptsemseg.models.fcrn_depth import *
 24 | from ptsemseg.models.dispnet_depth import *
 25 | 
 26 | from saliency import BackPropagation
 27 | from ptsemseg.loader.kitti_loader_seg import kittiLoader_seg
 28 | 
 29 | 
 30 | parser = argparse.ArgumentParser()
 31 | parser.add_argument("--data_path", default='datasets/kitti/semantics/', type=str,
 32 |                     help='path to test images')
 33 | parser.add_argument("--model_name", type=str, default='deeplab', choices=["fcn", "frrnA", "segnet", "deeplab", "dispnet", "fcrn"])
 34 | parser.add_argument("--task", type=str, default="depth", choices=["seg", "depth"])
 35 | parser.add_argument("--model_path", type=str,
 36 |                     default='runs/deeplab_kitti_depth/2976_256_832_smooth1000_init_BNfreeze/deeplab_kitti_best_model.pkl',
 37 |                     help='path to pretrained model')
 38 | 
 39 | # the image resolution here should match the pretrained model training resolution
 40 | parser.add_argument("--height", type=int, default=256, help="image resize height")
 41 | parser.add_argument("--width", type=int, default=832, help="image resize width")
 42 | parser.add_argument("--sample_rate", type=int, default=10, help="sample rate for eval")
 43 | parser.add_argument("--num_image", type=int, default=100, help="number of images to evaluate")
 44 | 
 45 | 
 46 | args = parser.parse_args()
 47 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 48 | 
 49 | def get_model(model_name, task):
 50 |     if task == "seg":
 51 |         try:
 52 |             return {
 53 |                 "fcn": fcn_seg(n_classes=19),
 54 |                 "frrnA": frrn_seg(model_type = "A", n_classes=19),
 55 |                 "segnet": segnet_seg(n_classes=19),
 56 |                 "deeplab": deeplab_seg(n_classes=19),
 57 |                 "dispnet": dispnet_seg(n_classes=19),
 58 |                 "fcrn": fcrn_seg(n_classes=19),
 59 |             }[model_name]
 60 |         except:
 61 |             raise("Model {} not available".format(model_name))
 62 |     elif task == "depth":
 63 |         try:
 64 |             return {
 65 |                 "fcn": fcn_depth(),
 66 |                 "frrnA": frrn_depth(model_type = "A"),
 67 |                 "segnet": segnet_depth(),
 68 |                 "deeplab": deeplab_depth(),
 69 |                 "dispnet": dispnet_depth(),
 70 |                 "fcrn": fcrn_depth(),
 71 |             }[model_name]
 72 |         except:
 73 |             raise("Model {} not available".format(model_name))
 74 | 
 75 | 
 76 | def most_act_dis(saliency_map, pos_i, pos_j):
 77 |     # distance between the most activated pixel and current pixel
 78 |     height, width = saliency_map.shape
 79 |     most_act_pos = np.where(saliency_map == np.max(saliency_map))
 80 |     all_dist = 0
 81 |     for i in range(most_act_pos[0].shape[0]):
 82 |         dist = np.sqrt((most_act_pos[0][i]-pos_i) ** 2 + (most_act_pos[1][i]-pos_j) ** 2)
 83 |         all_dist = all_dist + dist
 84 |     result = (all_dist / len(most_act_pos[0])) / np.sqrt(height ** 2 + width ** 2)
 85 |     return result
 86 | 
 87 | 
 88 | # biggest distance between current pixel and all pixels with value >= threshold
 89 | # number of pixels >= threshold / number of total_pixels
 90 | def largest_radius(saliency_map, pos_i, pos_j, threshold=0.2):
 91 |     height, width = saliency_map.shape
 92 | 
 93 |     act_pixel_pos = np.where(saliency_map >= threshold)
 94 |     all_dist = np.zeros(act_pixel_pos[0].shape[0])
 95 | 
 96 |     if act_pixel_pos[0].shape[0] == 0:
 97 |         return 0, 0
 98 |     for i in range(act_pixel_pos[0].shape[0]):
 99 |         all_dist[i] = np.sqrt((act_pixel_pos[0][i]-pos_i) ** 2 + (act_pixel_pos[1][i]-pos_j) ** 2)
100 |     radius = np.max(all_dist) / np.sqrt(height ** 2 + width ** 2)
101 |     part = act_pixel_pos[0].shape[0] / (height * width)
102 |     return radius, part
103 | 
104 | 
105 | def calculate(image, label, bp, args):
106 |     pred_idx = bp.forward(image.to(device))  # predict lbl / depth: [h, w]
107 | 
108 |     img_radius1, img_radius2, img_radius3 = [], [], []
109 |     img_part1, img_part2, img_part3 = [], [], []
110 | 
111 |     y1, y2 = int(0.40810811 * args.height), int(0.99189189 * args.height)
112 |     x1, x2 = int(0.03594771 * args.width), int(0.96405229 * args.width)
113 |     total_pixel = 0
114 | 
115 |     for pos_i in tqdm(range(y1+args.sample_rate, y2, args.sample_rate)):
116 |         for pos_j in tqdm(range(x1+args.sample_rate, x2, args.sample_rate)):
117 |             bp.backward(pos_i=pos_i, pos_j=pos_j, idx=pred_idx[pos_i, pos_j])
118 |             output_vanilla, output_saliency = bp.generate()  # output_saliency: [h, w]
119 | 
120 |             output_saliency = output_saliency[y1:y2, x1:x2]
121 |             # normalized saliency map for a pixel in an image
122 |             if np.max(output_saliency) > 0:
123 |                 output_saliency = (output_saliency - np.min(output_saliency)) / np.max(output_saliency)
124 |             radius1, part1 = largest_radius(output_saliency, pos_i=pos_i-y1, pos_j=pos_j-x1, threshold=0.1)
125 |             radius2, part2 = largest_radius(output_saliency, pos_i=pos_i-y1, pos_j=pos_j-x1, threshold=0.5)
126 |             radius3, part3 = largest_radius(output_saliency, pos_i=pos_i-y1, pos_j=pos_j-x1, threshold=0.9)
127 | 
128 |             img_radius1.append(radius1)
129 |             img_radius2.append(radius2)
130 |             img_radius3.append(radius3)
131 |             img_part1.append(part1)
132 |             img_part2.append(part2)
133 |             img_part3.append(part3)
134 |             total_pixel += 1
135 | 
136 |     return img_radius1, img_radius2, img_radius3, \
137 |            img_part1, img_part2, img_part3
138 | 
139 | 
140 | 
141 | def main():
142 |     # Model
143 |     model = get_model(args.model_name, args.task)
144 |     weights = torch.load(args.model_path)
145 |     # weights = torch.load(args.model_path, map_location=lambda storage, loc: storage)
146 |     model.load_state_dict(weights['model_state'])
147 |     model.to(device)
148 |     model.eval()
149 | 
150 |     depth_flag = False
151 |     if args.task == 'depth':
152 |         depth_flag = True
153 | 
154 |     loader = kittiLoader_seg(
155 |         root=args.data_path,
156 |         split='train',
157 |         is_transform=True,
158 |         img_size=(args.height, args.width),
159 |         augmentations=None,
160 |         img_norm=True,
161 |         saliency_eval_depth=depth_flag
162 |         )
163 | 
164 |     testloader = data.DataLoader(loader,
165 |                                  batch_size=1,
166 |                                  num_workers=0,
167 |                                  shuffle=False)
168 | 
169 |     bp = BackPropagation(model=model, task=args.task)
170 |     result_img = []
171 |     for i, (image, label, img_path) in enumerate(testloader):
172 |         print(img_path)
173 |         img_eval_res = calculate(image=image, label=label, bp=bp, args=args)
174 |         result_img.append(img_eval_res)
175 |         result_img_out = np.array(result_img, dtype=float)  # [num_image, num_metrics, num_pixels_for_each_image]
176 |         np.save("saliency_eval_pixel/{}_{}_pixel_try.npy".format(args.task, args.model_name), result_img_out)
177 |         if i >= args.num_image:
178 |             break
179 | 
180 | 
181 | if __name__ == '__main__':
182 |     main()
183 | 


--------------------------------------------------------------------------------
/saliency_iou.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import argparse
  4 | import numpy as np
  5 | import torch
  6 | import scipy.misc as m
  7 | import cv2
  8 | from torch.utils import data
  9 | from tqdm import tqdm
 10 | from joblib import Parallel, delayed
 11 | 
 12 | from ptsemseg.models.fcn_seg import *
 13 | from ptsemseg.models.segnet_seg import *
 14 | from ptsemseg.models.frrn_seg import *
 15 | from ptsemseg.models.deeplab_seg import *
 16 | from ptsemseg.models.fcrn_seg import *
 17 | from ptsemseg.models.dispnet_seg import *
 18 | 
 19 | from ptsemseg.models.fcn_depth import *
 20 | from ptsemseg.models.segnet_depth import *
 21 | from ptsemseg.models.frrn_depth import *
 22 | from ptsemseg.models.deeplab_depth import *
 23 | from ptsemseg.models.fcrn_depth import *
 24 | from ptsemseg.models.dispnet_depth import *
 25 | 
 26 | from saliency import BackPropagation
 27 | from ptsemseg.loader.kitti_loader_seg import kittiLoader_seg
 28 | 
 29 | 
 30 | parser = argparse.ArgumentParser()
 31 | parser.add_argument("--data_path", default='datasets/kitti/semantics/', type=str,
 32 |                     help='path to test images')
 33 | parser.add_argument("--model_name", type=str, default='fcn', choices=["fcn", "frrnA", "segnet", "deeplab", "dispnet", "fcrn"])
 34 | parser.add_argument("--model_seg_path", type=str,
 35 |                     default='runs/fcn8s_kitti_seg/12543_256_832_cityscaperPretrained_lr5/fcn8s_kitti_best_model.pkl',
 36 |                     help='path to pretrained model')
 37 | parser.add_argument("--model_depth_path", type=str,
 38 |                     default='runs/fcn_kitti_depth/2972_256_832_bs4_smooth1000/fcn_kitti_best_model.pkl',
 39 |                     help='path to pretrained model')
 40 | 
 41 | # the image resolution here should match the pretrained model training resolution
 42 | # here the segmentation model and depth model should have the same training resolution
 43 | parser.add_argument("--height", type=int, default=256, help="image resize height")
 44 | parser.add_argument("--width", type=int, default=832, help="image resize width")
 45 | parser.add_argument("--sample_rate", type=int, default=20, help="sample rate for eval")
 46 | parser.add_argument("--num_image", type=int, default=100, help="number of images to evaluate")
 47 | 
 48 | 
 49 | args = parser.parse_args()
 50 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 51 | 
 52 | def get_model(model_name, task):
 53 |     if task == "seg":
 54 |         try:
 55 |             return {
 56 |                 "fcn": fcn_seg(n_classes=19),
 57 |                 "frrnA": frrn_seg(model_type="A", n_classes=19),
 58 |                 "segnet": segnet_seg(n_classes=19),
 59 |                 "deeplab": deeplab_seg(n_classes=19),
 60 |                 "dispnet": dispnet_seg(n_classes=19),
 61 |                 "fcrn": fcrn_seg(n_classes=19),
 62 |             }[model_name]
 63 |         except:
 64 |             raise("Model {} not available".format(model_name))
 65 |     elif task == "depth":
 66 |         try:
 67 |             return {
 68 |                 "fcn": fcn_depth(),
 69 |                 "frrnA": frrn_depth(model_type = "A"),
 70 |                 "segnet": segnet_depth(),
 71 |                 "deeplab": deeplab_depth(),
 72 |                 "dispnet": dispnet_depth(),
 73 |                 "fcrn": fcrn_depth(),
 74 |             }[model_name]
 75 |         except:
 76 |             raise("Model {} not available".format(model_name))
 77 | 
 78 | 
 79 | def saliency_iou(saliency_seg, saliency_depth, threshold):
 80 |     mask_seg = np.logical_not(saliency_seg < threshold)
 81 |     mask_depth = np.logical_not(saliency_depth < threshold)
 82 |     union = np.logical_or(mask_seg, mask_depth)
 83 |     inter = np.logical_and(mask_seg, mask_depth)
 84 |     iou = np.sum(inter) / np.sum(union)
 85 |     return iou
 86 | 
 87 | 
 88 | def calculate_overlap(img_seg, img_depth, bp_seg, bp_depth, args):
 89 |     pred_seg = bp_seg.forward(img_seg.to(device))  # predict lbl / depth: [h, w]
 90 |     pred_depth = bp_depth.forward(img_depth.to(device))
 91 | 
 92 |     img_iou1, img_iou2, img_iou3, img_iou4 = [], [], [], []
 93 | 
 94 |     y1, y2 = int(0.40810811 * args.height), int(0.99189189 * args.height)
 95 |     x1, x2 = int(0.03594771 * args.width), int(0.96405229 * args.width)
 96 |     total_pixel = 0
 97 | 
 98 |     for pos_i in tqdm(range(y1+args.sample_rate, y2, args.sample_rate)):
 99 |         for pos_j in tqdm(range(x1+args.sample_rate, x2, args.sample_rate)):
100 |             bp_seg.backward(pos_i=pos_i, pos_j=pos_j, idx=pred_seg[pos_i, pos_j])
101 |             bp_depth.backward(pos_i=pos_i, pos_j=pos_j, idx=pred_depth[pos_i, pos_j])
102 |             _, output_saliency_seg = bp_seg.generate()  # output_saliency: [h, w]
103 |             _, output_saliency_depth = bp_depth.generate()
104 | 
105 |             output_saliency_seg = output_saliency_seg[y1:y2, x1:x2]
106 |             output_saliency_depth = output_saliency_depth[y1:y2, x1:x2]
107 |             # normalized saliency map for a pixel in an image
108 |             if np.max(output_saliency_seg) > 0:
109 |                 output_saliency_seg = (output_saliency_seg - np.min(output_saliency_seg)) / np.max(output_saliency_seg)
110 |             if np.max(output_saliency_depth) > 0:
111 |                 output_saliency_depth = (output_saliency_depth - np.min(output_saliency_depth)) / np.max(output_saliency_depth)
112 | 
113 |             iou1 = saliency_iou(saliency_seg=output_saliency_seg, saliency_depth=output_saliency_depth, threshold=0.05)
114 |             iou2 = saliency_iou(saliency_seg=output_saliency_seg, saliency_depth=output_saliency_depth, threshold=0.1)
115 |             iou3 = saliency_iou(saliency_seg=output_saliency_seg, saliency_depth=output_saliency_depth, threshold=0.5)
116 |             iou4 = saliency_iou(saliency_seg=output_saliency_seg, saliency_depth=output_saliency_depth, threshold=0.9)
117 | 
118 |             total_pixel += 1
119 |             img_iou1.append(iou1)
120 |             img_iou2.append(iou2)
121 |             img_iou3.append(iou3)
122 |             img_iou4.append(iou4)
123 | 
124 |     return img_iou1, img_iou2, img_iou3, img_iou4  # list, for all pixels evaluated
125 | 
126 | 
127 | 
128 | def main():
129 |     # seg Model and depth Model
130 |     model_seg = get_model(args.model_name, task="seg")
131 |     weights_seg = torch.load(args.model_seg_path)
132 |     # weights = torch.load(args.model_seg_path, map_location=lambda storage, loc: storage)
133 |     model_seg.load_state_dict(weights_seg['model_state'])
134 |     model_seg.to(device)
135 |     model_seg.eval()
136 | 
137 |     model_depth = get_model(args.model_name, task="depth")
138 |     weights_depth = torch.load(args.model_depth_path)
139 |     # weights = torch.load(args.model_depth_path, map_location=lambda storage, loc: storage)
140 |     model_depth.load_state_dict(weights_depth['model_state'])
141 |     model_depth.to(device)
142 |     model_depth.eval()
143 | 
144 |     loader_seg = kittiLoader_seg(
145 |         root=args.data_path,
146 |         split='train',
147 |         is_transform=True,
148 |         img_size=(args.height, args.width),
149 |         augmentations=None,
150 |         img_norm=True,
151 |         saliency_eval_depth=False
152 |         )
153 | 
154 |     loader_depth = kittiLoader_seg(
155 |         root=args.data_path,
156 |         split='train',
157 |         is_transform=True,
158 |         img_size=(args.height, args.width),
159 |         augmentations=None,
160 |         img_norm=True,
161 |         saliency_eval_depth=True
162 |         )
163 | 
164 |     testloader_seg = data.DataLoader(loader_seg,
165 |                                  batch_size=1,
166 |                                  num_workers=0,
167 |                                  shuffle=False)
168 |     testloader_depth = data.DataLoader(loader_depth,
169 |                                  batch_size=1,
170 |                                  num_workers=0,
171 |                                  shuffle=False)
172 | 
173 |     bp_seg = BackPropagation(model=model_seg, task="seg")
174 |     bp_depth = BackPropagation(model=model_depth, task="depth")
175 |     result_img = []
176 | 
177 |     for i, (image_seg, label_seg, img_path_seg) in enumerate(testloader_seg):
178 |         for j, (image_depth, _, img_path_depth) in enumerate(testloader_depth):
179 |             if i == j:
180 |                 print(img_path_seg)
181 |                 img_iou = calculate_overlap(img_seg=image_seg, img_depth=image_depth, bp_seg=bp_seg, bp_depth=bp_depth, args=args)
182 |                 result_img.append(img_iou)
183 |                 result_img_out = np.array(result_img, dtype=float)  # [num_image, num_metrics=4, num_pixels_for_each_image]
184 |                 np.save("saliency_eval_pixel/{}_iou_try.npy".format(args.model_name), result_img_out)
185 | 
186 |             if i >= args.num_image:
187 |                 break
188 |             else:
189 |                 continue
190 | 
191 | 
192 | if __name__ == '__main__':
193 |     main()
194 | 


--------------------------------------------------------------------------------
/saliency_results/BP_saliency_map_fcrn_seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/saliency_results/BP_saliency_map_fcrn_seg.png


--------------------------------------------------------------------------------
/saliency_results/image_pixel_locate_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanweiliti/Segmentation-MonoDepth-Pytorch/d1a3de8d10c60fe9d3b86b585e0f0089555fc8a6/saliency_results/image_pixel_locate_0.png


--------------------------------------------------------------------------------
/test_depth_cityscapes.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | import torch
 3 | import argparse
 4 | 
 5 | from torch.utils import data
 6 | from tqdm import tqdm
 7 | 
 8 | from ptsemseg.models import get_model
 9 | from ptsemseg.loader import get_loader
10 | from ptsemseg.metrics import runningScoreDepth, averageMeter
11 | 
12 | 
13 | def count_parameters(model):
14 |     return sum(p.numel() for p in model.parameters() if p.requires_grad)
15 | 
16 | 
17 | def test(cfg, args):
18 |     # Setup device
19 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20 | 
21 |     # Setup Dataloader
22 |     data_loader = get_loader(cfg['data']['dataset'], cfg['task'])
23 |     data_path = cfg['data']['path']
24 | 
25 |     loader = data_loader(
26 |         data_path,
27 |         split=cfg['data']['test_split'],
28 |         is_transform=True,
29 |         img_size=(cfg['data']['img_rows'],
30 |                   cfg['data']['img_cols']),
31 |         img_norm=cfg['data']['img_norm']
32 |     )
33 | 
34 |     n_classes = 0
35 |     running_metrics_val = runningScoreDepth(cfg['data']['dataset'])
36 | 
37 |     testloader = data.DataLoader(loader,
38 |                                  batch_size=cfg['training']['batch_size'],
39 |                                  num_workers=0)
40 | 
41 |     # Load Model
42 |     model = get_model(cfg['model'], cfg['task'], n_classes=n_classes).to(device)
43 |     #weights = torch.load(cfg['testing']['trained_model'])
44 |     weights = torch.load(cfg['testing']['trained_model'], map_location=lambda storage, loc: storage)
45 |     model.load_state_dict(weights["model_state"])
46 |     model.eval()
47 |     model.to(device)
48 | 
49 |     with torch.no_grad():
50 |         for i, (images, labels, img_path) in tqdm(enumerate(testloader)):
51 |             images = images.to(device)
52 |             labels = labels.to(device)
53 | 
54 |             outputs = model(images)  # [batch_size, n_classes, height, width]
55 |             if cfg['model']['arch'] == "dispnet" and cfg['task'] == "depth":
56 |                 outputs = 1 / outputs
57 | 
58 |             pred = outputs.squeeze(1).data.cpu().numpy()
59 |             gt = labels.data.squeeze(1).cpu().numpy()
60 | 
61 |             running_metrics_val.update(gt=gt, pred=pred)
62 | 
63 |     val_result = running_metrics_val.get_scores()
64 |     for k, v in val_result.items():
65 |         print(k, v)
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     parser = argparse.ArgumentParser(description="Hyperparams")
70 |     parser.add_argument(
71 |         "--config",
72 |         nargs="?",
73 |         type=str,
74 |         default="configs/fcn_cityscapes_depth.yml",
75 |         help="Config file to be used",
76 |     )
77 | 
78 |     args = parser.parse_args()
79 | 
80 |     with open(args.config) as fp:
81 |         cfg = yaml.load(fp)
82 | 
83 |     test(cfg, args)
84 | 
85 | 


--------------------------------------------------------------------------------
/test_depth_kitti.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from scipy.misc import imresize
  4 | from scipy.ndimage.interpolation import zoom
  5 | import numpy as np
  6 | from path import Path
  7 | import argparse
  8 | from tqdm import tqdm
  9 | 
 10 | from ptsemseg.models.fcn_depth import *
 11 | from ptsemseg.models.segnet_depth import *
 12 | from ptsemseg.models.frrn_depth import *
 13 | from ptsemseg.models.deeplab_depth import *
 14 | from ptsemseg.models.fcrn_depth import *
 15 | from ptsemseg.models.dispnet_depth import *
 16 | 
 17 | from kitti_depth_eval.depth_evaluation_utils import test_framework_KITTI as test_framework
 18 | 
 19 | 
 20 | parser = argparse.ArgumentParser(description='Script for depth testing with corresponding groundTruth',
 21 |                                  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 22 | parser.add_argument("--model_name", type=str, default='dispnet', choices=["fcn", "frrnA", "segnet", "deeplab", "dispnet", "fcrn"])
 23 | parser.add_argument("--model_path", default='runs/frrn_kitti_depth/33888_128_416_bs4_smooth1000/frrn_kitti_best_model.pkl',
 24 |                     type=str, help="pretrained model path")
 25 | parser.add_argument("--img_height", default=128, type=int, help="Image height")
 26 | parser.add_argument("--img_width", default=416, type=int, help="Image width")
 27 | parser.add_argument("--min-depth", default=1e-3)
 28 | parser.add_argument("--max-depth", default=80)
 29 | parser.add_argument("--pred_disp", action='store_true',
 30 |                     help="model predicts disparity instead of depth if selected")
 31 | 
 32 | parser.add_argument("--dataset_dir", default='../kitti', type=str, help="Kitti raw dataset directory")
 33 | parser.add_argument("--dataset_list", default='kitti_depth_eval/test_files_eigen.txt',
 34 |                     type=str, help="Kitti test dataset list file")
 35 | 
 36 | device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 37 | 
 38 | 
 39 | def get_depth_model(model_name):
 40 |     try:
 41 |         return {
 42 |             "fcn": fcn_depth(),
 43 |             "frrnA": frrn_depth(model_type = "A"),
 44 |             "segnet": segnet_depth(),
 45 |             "deeplab": deeplab_depth(),
 46 |             "dispnet": dispnet_depth(),
 47 |             "fcrn": fcrn_depth(),
 48 |         }[model_name]
 49 |     except:
 50 |         raise("Model {} not available".format(model_name))
 51 | 
 52 | 
 53 | @torch.no_grad()
 54 | def main():
 55 |     args = parser.parse_args()
 56 | 
 57 |     model = get_depth_model(args.model_name).to(device)
 58 |     weights = torch.load(args.model_path)
 59 |     # weights = torch.load(args.model_path, map_location=lambda storage, loc: storage)
 60 |     model.load_state_dict(weights['model_state'])
 61 |     model.eval()
 62 | 
 63 |     seq_length = 0
 64 | 
 65 |     dataset_dir = Path(args.dataset_dir)
 66 |     with open(args.dataset_list, 'r') as f:
 67 |         test_files = list(f.read().splitlines())
 68 | 
 69 |     framework = test_framework(dataset_dir, test_files, seq_length, args.min_depth, args.max_depth)
 70 | 
 71 |     print('{} files to test'.format(len(test_files)))
 72 |     errors = np.zeros((2, 7, len(test_files)), np.float32)
 73 | 
 74 | 
 75 |     for j, sample in enumerate(tqdm(framework)):
 76 |         tgt_img = sample['tgt']  # [375, 1242, 3] ndarray, original RGB image
 77 | 
 78 |         h,w,_ = tgt_img.shape
 79 |         if h != args.img_height or w != args.img_width:
 80 |             tgt_img = imresize(tgt_img, (args.img_height, args.img_width)).astype(np.float32)
 81 | 
 82 |         tgt_img = np.transpose(tgt_img, (2, 0, 1))
 83 |         tgt_img = torch.from_numpy(tgt_img).unsqueeze(0)
 84 |         tgt_img = ((tgt_img/255 - 0.5)/0.5).to(device)  # normalize to [-1, 1]
 85 | 
 86 |         pred = model(tgt_img).cpu().numpy()[0,0]
 87 |         gt_depth = sample['gt_depth']
 88 | 
 89 |         if args.pred_disp:
 90 |             pred_depth = 1 / pred
 91 |         else:
 92 |             pred_depth = pred
 93 | 
 94 |         # upsample to gt depth resolution, [375, 1242]
 95 |         # and mask out pixels with depth not in [min_depth, max_depth]
 96 |         pred_depth_zoomed = zoom(pred_depth,
 97 |                                  (gt_depth.shape[0]/pred_depth.shape[0],
 98 |                                   gt_depth.shape[1]/pred_depth.shape[1])
 99 |                                  ).clip(args.min_depth, args.max_depth)
100 |         if sample['mask'] is not None:
101 |             pred_depth_zoomed = pred_depth_zoomed[sample['mask']]
102 |             gt_depth = gt_depth[sample['mask']]
103 | 
104 |         errors[1, :, j] = compute_errors(gt_depth, pred_depth_zoomed)
105 | 
106 |     mean_errors = errors.mean(2)
107 |     error_names = ['abs_rel','sq_rel','rms','log_rms','a1','a2','a3']
108 | 
109 |     print("Results : ")
110 |     print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(*error_names))
111 |     print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(*mean_errors[1]))
112 | 
113 | 
114 | def compute_errors(gt, pred):
115 |     thresh = np.maximum((gt / pred), (pred / gt))
116 |     a1 = (thresh < 1.25   ).mean()
117 |     a2 = (thresh < 1.25 ** 2).mean()
118 |     a3 = (thresh < 1.25 ** 3).mean()
119 | 
120 |     rmse = (gt - pred) ** 2
121 |     rmse = np.sqrt(rmse.mean())
122 | 
123 |     rmse_log = (np.log(gt) - np.log(pred)) ** 2
124 |     rmse_log = np.sqrt(rmse_log.mean())
125 | 
126 |     abs_rel = np.mean(np.abs(gt - pred) / gt)
127 |     sq_rel = np.mean(((gt - pred)**2) / gt)
128 | 
129 |     return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
130 | 
131 | 
132 | if __name__ == '__main__':
133 |     main()
134 | 


--------------------------------------------------------------------------------
/test_seg_cityscapes.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import yaml
  4 | import torch
  5 | import argparse
  6 | import timeit
  7 | import numpy as np
  8 | import scipy.misc as m
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | import torchvision.models as models
 12 | 
 13 | from torch.backends import cudnn
 14 | from torch.utils import data
 15 | 
 16 | from tqdm import tqdm
 17 | 
 18 | from ptsemseg.models import get_model
 19 | from ptsemseg.loader import get_loader
 20 | from ptsemseg.metrics import runningScoreSeg
 21 | from ptsemseg.utils import convert_state_dict
 22 | 
 23 | torch.backends.cudnn.benchmark = True
 24 | 
 25 | # test code for cityscapes segmentation
 26 | def test(cfg, args):
 27 | 
 28 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 29 | 
 30 |     # Setup Dataloader
 31 |     data_loader = get_loader(cfg['data']['dataset'], cfg['task'])
 32 |     data_path = cfg['data']['path']
 33 | 
 34 |     loader = data_loader(
 35 |         data_path,
 36 |         split=cfg['data']['test_split'],
 37 |         is_transform=True,
 38 |         img_size=(cfg['data']['img_rows'],
 39 |                   cfg['data']['img_cols']),
 40 |         img_norm=cfg['data']['img_norm']
 41 |     )
 42 | 
 43 |     n_classes = loader.n_classes
 44 | 
 45 |     testloader = data.DataLoader(loader,
 46 |                                 batch_size=cfg['training']['batch_size'],
 47 |                                 num_workers=0)
 48 | 
 49 |     # Setup Model
 50 |     model = get_model(cfg['model'], cfg['task'], n_classes=n_classes).to(device)
 51 |     weights = torch.load(cfg['testing']['trained_model'], map_location=lambda storage, loc: storage)
 52 |     model.load_state_dict(weights["model_state"])
 53 | 
 54 |     model.eval()
 55 |     model.to(device)
 56 | 
 57 |     for i, (images, labels, img_path) in tqdm(enumerate(testloader)):
 58 |         images = images.to(device)
 59 | 
 60 |         outputs = model(images)
 61 |         pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0)
 62 | 
 63 |         decoded = loader.decode_segmap_tocolor(pred)   # color segmentation mask
 64 |         decoded_labelID = loader.decode_segmap_tolabelId(pred)  # segmentation mask of labelIDs for online test
 65 |         print("Classes found: ", np.unique(decoded_labelID))
 66 | 
 67 |         # m.imsave("output.png", decoded)
 68 | 
 69 |         out_file_name = [img_path[0][39:-16], '*.png']
 70 |         out_file_name = ''.join(out_file_name)
 71 |         out_path = os.path.join(args.out_path, out_file_name)
 72 | 
 73 |         decoded_labelID = m.imresize(decoded_labelID, (1024, 2048), "nearest", mode="F")
 74 |         m.toimage(decoded_labelID, high=np.max(decoded_labelID), low=np.min(decoded_labelID)).save(out_path)
 75 |         print("Segmentation Mask Saved at: {}".format(out_path))
 76 | 
 77 | 
 78 | if __name__ == "__main__":
 79 |     parser = argparse.ArgumentParser(description="Hyperparams")
 80 |     parser.add_argument(
 81 |         "--config",
 82 |         nargs="?",
 83 |         type=str,
 84 |         default="configs/fcn8s_cityscapes.yml",
 85 |         help="Config file to be used",
 86 |     )
 87 | 
 88 |     parser.add_argument(
 89 |         "--out_path",
 90 |         nargs="?",
 91 |         type=str,
 92 |         default="./test_output/fcn8s_cityscapes",
 93 |         help="Path of the output segmap",
 94 |     )
 95 | 
 96 |     args = parser.parse_args()
 97 | 
 98 |     with open(args.config) as fp:
 99 |         cfg = yaml.load(fp)
100 | 
101 |     test(cfg, args)
102 | 


--------------------------------------------------------------------------------
/validate_seg.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import yaml
 4 | import torch
 5 | import argparse
 6 | import timeit
 7 | import numpy as np
 8 | import scipy.misc as misc
 9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 | import torchvision.models as models
12 | 
13 | from torch.backends import cudnn
14 | from torch.utils import data
15 | 
16 | from tqdm import tqdm
17 | 
18 | from ptsemseg.models import get_model
19 | from ptsemseg.loader import get_loader
20 | from ptsemseg.metrics import runningScoreSeg
21 | 
22 | torch.backends.cudnn.benchmark = True
23 | 
24 | ### for segmentation validation
25 | 
26 | def validate(cfg, args):
27 | 
28 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29 | 
30 |     # Setup Dataloader
31 |     data_loader = get_loader(cfg['data']['dataset'], cfg['task'])
32 |     data_path = cfg['data']['path']
33 | 
34 |     loader = data_loader(
35 |         data_path,
36 |         split=cfg['data']['val_split'],
37 |         is_transform=True,
38 |         img_norm=cfg['data']['img_norm'],
39 |         img_size=(cfg['data']['img_rows'], 
40 |                   cfg['data']['img_cols']),
41 |     )
42 | 
43 |     n_classes = loader.n_classes
44 |     valloader = data.DataLoader(loader, 
45 |                                 batch_size=cfg['training']['batch_size'], 
46 |                                 num_workers=0)
47 |     running_metrics = runningScoreSeg(n_classes)
48 | 
49 |     # Setup Model
50 | 
51 |     model = get_model(cfg['model'], cfg['task'], n_classes).to(device)
52 |     state = torch.load(args.model_path)["model_state"]
53 |     #state = torch.load(args.model_path, map_location=lambda storage, loc: storage)["model_state"]
54 |     model.load_state_dict(state)
55 |     model.to(device)
56 |     model.eval()
57 | 
58 |     with torch.no_grad():
59 |         for i, (images, labels, images_path) in enumerate(valloader):
60 |             images = images.to(device)
61 |             outputs = model(images)
62 |             pred = outputs.data.max(1)[1].cpu().numpy()
63 |             gt = labels.numpy()
64 |             running_metrics.update(gt, pred)
65 | 
66 |     score, class_iou = running_metrics.get_scores()
67 | 
68 |     for k, v in score.items():
69 |         print(k, v)
70 | 
71 |     for i in range(n_classes):
72 |         print(i, class_iou[i])
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     parser = argparse.ArgumentParser(description="Hyperparams")
77 |     parser.add_argument(
78 |         "--config",
79 |         nargs="?",
80 |         type=str,
81 |         default="configs/segnet_kitti_seg.yml",
82 |         help="Config file to be used",
83 |     )
84 |     parser.add_argument(
85 |         "--model_path",
86 |         nargs="?",
87 |         type=str,
88 |         default="runs/segnet_kitti_seg/3574_256_832_cityscaperPretrained_lr5/segnet_kitti_best_model.pkl",
89 |         help="Path to the saved model",
90 |     )
91 |     args = parser.parse_args()
92 | 
93 |     with open(args.config) as fp:
94 |         cfg = yaml.load(fp)
95 | 
96 |     validate(cfg, args)
97 | 


--------------------------------------------------------------------------------