├── .gitignore
├── LICENSE
├── README.md
├── config
    ├── compare_tracking.yaml
    ├── experiment_place_recognition.yaml
    ├── experiment_tracking.yaml
    ├── tracking_compare_plot.yaml
    ├── train_gcn_coco.yaml
    ├── train_maskrcnn_coco.yaml
    ├── train_superpoint_coco.yaml
    ├── train_superpoint_synthetic.yaml
    ├── validate_detection.yaml
    └── validate_gcn.yaml
├── datasets
    ├── coco
    │   ├── coco.py
    │   ├── coco_cat.txt
    │   └── paths_catalog.py
    ├── evaluation
    │   └── coco
    │   │   ├── __init__.py
    │   │   └── coco_eval.py
    ├── kitti
    │   ├── kitti_odomery.py
    │   └── kitti_tracking.py
    ├── otb
    │   └── otb_tracking.py
    ├── synthetic
    │   └── synthetic.py
    ├── utils
    │   ├── __init__.py
    │   ├── augmentation_legacy.py
    │   ├── batch_collator.py
    │   ├── build_data.py
    │   ├── gcn_mask_augmentation.py
    │   ├── homographies.py
    │   ├── pipeline.py
    │   ├── postprocess.py
    │   ├── preprocess.py
    │   └── transforms.py
    └── vot
    │   └── vot_tracking.py
├── debug_tools
    ├── command.txt
    ├── draw_points.py
    ├── object_tracking.py
    ├── show_batch.py
    ├── show_detections.py
    ├── show_match.py
    ├── show_points_detection.py
    ├── test_batch_H.py
    └── test_data_process.py
├── experiments
    ├── compare_tracking.py
    ├── demo
    │   ├── kitti-relocalization.gif
    │   ├── object-matching1.gif
    │   └── object-matching2.gif
    ├── object_tracking
    │   ├── object_tracking.py
    │   └── single_object_tracking.py
    ├── place_recogination
    │   ├── offline_process.py
    │   ├── offline_topK.py
    │   └── online_relocalization.py
    ├── show_object_matching
    │   ├── draw_object.py
    │   └── show_object_matching.py
    └── utils
    │   └── utils.py
├── model
    ├── backbone
    │   ├── fcn.py
    │   └── resnet_fpn.py
    ├── build_model.py
    ├── graph_models
    │   ├── attention.py
    │   ├── descriptor_loss.py
    │   └── object_descriptor.py
    ├── inference.py
    ├── mask_rcnn
    │   ├── mask_rcnn.py
    │   └── transform.py
    └── superpoint
    │   ├── superpoint_loss.py
    │   ├── superpoint_public_model.py
    │   └── vgg_like.py
├── structures
    ├── __init__.py
    └── segmentation_mask.py
├── train_gcn.py
├── train_maskrcnn.py
├── train_superpoint.py
├── utils
    ├── __init__.py
    ├── checkpoint.py
    ├── cv2_util.py
    ├── imports.py
    └── tools.py
├── validate_detection.py
└── validate_gcn.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # Dataset
107 | CityScapes/
108 | Lane/
109 | 
110 | # Generated
111 | scores/
112 | 
113 | # tmp files
114 | *.pyc
115 | runs/
116 | *.pickle
117 | *.pth
118 | saving/*
119 | *.swp
120 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2023, SAIR Lab
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AirCode
 2 | 
 3 | Xu, Kuan, [Chen Wang](https://chenwang.site), Chao Chen, Wei Wu, and Sebastian Scherer. "["AirCode: A Robust Object Encoding Method"](https://arxiv.org/abs/2105.00327)." IEEE Robotics and Automation Letters (2022). (Accepted to ICRA 2022)
 4 | 
 5 | ## Demo
 6 | Object matching comparison when the objects are non-rigid and the view is changed, left is the result of our method while right is the result of NetVLAD
 7 | 
 8 | ![](experiments/demo/object-matching1.gif) ![](experiments/demo/object-matching2.gif) 
 9 | 
10 | Relocalization on KITTI datasets
11 | 
12 | ![](experiments/demo/kitti-relocalization.gif)
13 | 
14 | 
15 | ## Dependencies
16 | * Python       3.7
17 | * Torchvision  0.8.0
18 | * PyTorch      1.7.0
19 | * OpenCV       4.4.0
20 | * Matplotlib   3.3.3
21 | * NumPy        1.19.2
22 | * Pyyaml       5.3.1
23 |   
24 | 
25 | ## Data
26 | Four datasets are used in our experiments.
27 | 
28 | ### KITTI Odometry
29 | For relocalization experiment. Three sequences are selected, and they are "00", "05" and "06".
30 | 
31 | ### KITTI Tracking
32 | For multi-object matching experiment. Four sequences are selected, and they are "0002", "0003", "0006", "0010".
33 | 
34 | ### VOT Datasets
35 | For single-object matching experiment. We select three sequences from VOT2019 datasets and they are "bluecar", "bus6" and "humans_corridor_occ_2_A", because the tracked objects in these sequences are included in coco datasets, which are the data we used to train mask-rcnn. 
36 | 
37 | ### OTB Datasets
38 | For single-object matching experiment. We select five sequences and they are "BlurBody", "BlurCar2", "Human2", "Human7" and "Liquor".
39 | 
40 | 
41 | ## Examples
42 | 
43 | ### Relocalization on KITTI Datasets
44 | 
45 | 1. Extract object descrptors
46 |    ```
47 |    python experiments/place_recogination/online_relocalization.py -c config/experiment_tracking.yaml -g 1 -s PATH_TO_SAVE_MIDDLE_RESULTS -d PATH_TO_DATASET -m PATH_TO_MODELS
48 |    ```
49 | 
50 | 2. Compute precision-recall curves
51 |    ```
52 |    python experiments/place_recogination/offline_process.py -c config/experiment_place_recognization.yaml -d PATH_TO_DATASET -n PATH_TO_MIDDLE_RESULTS -s PATH_TO_SAVE_RESULTS
53 |    ```
54 | 
55 | 3. Compute top-K relocalization results
56 |    ```
57 |    python experiments/place_recogination/offline_topK.py -c config/experiment_place_recognization.yaml -d PATH_TO_DATASET -n PATH_TO_MIDDLE_RESULTS -s PATH_TO_SAVE_RESULTS
58 |    ```
59 | 
60 | ### Object Matching on OTB, VOT or KITTI Tracking Datasets
61 | 
62 | * Run multi-object matching experiment in KITTI Tracking Datasets
63 |   Modify the [config file](config/experiment_tracking.yaml) and run  
64 |   ```
65 |   python experiments/object_tracking/object_tracking.py -c config/experiment_tracking.yaml -g 1 -s PATH_TO_SAVE_RESULTS -d PATH_TO_DATASET -m PATH_TO_MODELS 
66 |   ```
67 | 
68 | * Run single-object matching experiment in OTB or VOT Datasets
69 |   Modify the [config file](config/experiment_tracking.yaml) and run  
70 |   ```
71 |   python experiments/object_tracking/single_object_tracking.py -c config/experiment_tracking.yaml -g 1 -s PATH_TO_SAVE_RESULTS -d PATH_TO_DATASET -m PATH_TO_MODELS 
72 |   ```
73 | 
74 | ### Pretrained Models
75 | 
76 | * Go to [this link](https://github.com/sair-lab/AirCode/releases/tag/v2.0.0).
77 | 


--------------------------------------------------------------------------------
/config/compare_tracking.yaml:
--------------------------------------------------------------------------------
 1 | intervals: [1, 3, 5]  # select lines, the number represent the interval
 2 | title: kitti          # figure title
 3 | colors: ['green', 'red', 'blue', 'yellow', 'darkviolet', 'sandybrown']  # lines colors
 4 | linewidth: 3          # line width
 5 | xlabel: recall        # x-axis name 
 6 | ylabel: precision     # y-axis name
 7 | fontsize: 20          # font size
 8 | figsize: (10, 10)     # fingure size, inch
 9 | dpi: 100              # dots per inch
10 | 


--------------------------------------------------------------------------------
/config/experiment_place_recognition.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   name: 'kitti'
 3 |   nclass: 81
 4 |   normal_size: [384, 1280] # min_size, max_size
 5 |   cache_in_memory: false
 6 |   validation_size: 96
 7 | model:
 8 |   superpoint:
 9 |     cell : 8
10 |     detection_threshold: 0.2
11 |   maskrcnn:
12 |     add_maskrcnn: true
13 |     trainable_layers: 5  # backbone trainable layers
14 |     fix_backbone: true
15 |     backbone_type: 'resnet50'
16 |     image_mean: [0.45, 0.45, 0.45]
17 |     image_std: [0.225, 0.225, 0.225]
18 |   gcn:
19 |     descriptor_dim: 256
20 |     points_encoder_dims: [2, 4, 8, 16]
21 |     hidden_dim: 512
22 |     dropout: 0
23 |     alpha: 0.2
24 |     nheads: 4
25 |     nout: 2048


--------------------------------------------------------------------------------
/config/experiment_tracking.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   ### kitti datasets
 3 |   name: 'kitti'
 4 |   normal_size: [384, 1280] # min_size, max_size
 5 |   ### OTB or VOT datasets
 6 |   # name: 'otb'
 7 |   # normal_size: [384, 1280] # min_size, max_size  
 8 |   ###
 9 |   nclass: 81
10 |   cache_in_memory: false
11 |   validation_size: 96
12 | model:
13 |   superpoint:
14 |     cell : 8
15 |     detection_threshold: 0.2
16 |   maskrcnn:
17 |     add_maskrcnn: true
18 |     trainable_layers: 5  # backbone trainable layers
19 |     fix_backbone: true
20 |     backbone_type: 'resnet50'
21 |     image_mean: [0.45, 0.45, 0.45]
22 |     image_std: [0.225, 0.225, 0.225]
23 |   gcn:
24 |     descriptor_dim: 256
25 |     points_encoder_dims: [2, 4, 8, 16]
26 |     hidden_dim: 512
27 |     dropout: 0
28 |     alpha: 0.2
29 |     nheads: 4
30 |     nout: 2048


--------------------------------------------------------------------------------
/config/tracking_compare_plot.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sair-lab/AirCode/6f7aaeafa3b6f8c762170431447568855601c684/config/tracking_compare_plot.yaml


--------------------------------------------------------------------------------
/config/train_gcn_coco.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   name: 'coco'
 3 |   nclass: 81
 4 |   # TRAIN: 'coco_2014_train'
 5 |   TRAIN: 'coco_2014_train'
 6 |   VAL: 'coco_2014_minival'
 7 |   TEST: 'coco_2014_valminusminival'
 8 |   normal_size: [320, 320] # min_size, max_size
 9 |   cache_in_memory: false
10 |   validation_size: 96
11 |   augmentation:
12 |     photometric:
13 |       enable: true
14 |       primitives: [
15 |           'random_brightness', 'random_contrast', 'additive_speckle_noise',
16 |           'additive_gaussian_noise', 'add_shade', 'motion_blur']
17 |       params:
18 |         random_brightness: {max_change: 50}
19 |         random_contrast: {max_change: [0.5, 1.5]}
20 |         additive_gaussian_noise: {std: [0, 10]}
21 |         additive_speckle_noise: {intensity: 5}
22 |         add_shade:
23 |           amplitude: [-0.5, 0.5]
24 |           kernel_size_interval: [100, 150]
25 |         motion_blur: {max_ksize: 3}
26 |     homographic:
27 |       enable: false  # not implemented
28 |     gcn_mask:
29 |       enable: false
30 |       primitives: [
31 |           'erode', 'dilate', 'random_region_zero', 'random_block_zero', 
32 |           'random_block_one']
33 |       params:
34 |         erode: {kernel_size: 10}
35 |         dilate: {kernel_size: 10}
36 |         random_region_zero: 
37 |           scale_x: 0.3
38 |           scale_y: 0.3
39 |         random_block_zero: 
40 |           num: 5
41 |           size: 10
42 |         random_block_one:
43 |           num: 5
44 |           size: 10
45 |   warped_pair:
46 |     enable: false
47 |     params:
48 |       translation: true
49 |       rotation: true
50 |       scaling: true
51 |       perspective: true
52 |       scaling_amplitude: 0.2
53 |       perspective_amplitude_x: 0.2
54 |       perspective_amplitude_y: 0.2
55 |       patch_ratio: 0.85
56 |       max_angle: 1.57
57 |       allow_artifacts: true
58 |     valid_border_margin: 3
59 | model:
60 |   superpoint:
61 |     cell : 8
62 |     detection_threshold: 0.2
63 |   mask_rcnn:
64 |     add_maskrcnn: true
65 |     trainable_layers: 5  # backbone trainable layers
66 |     fix_backbone: true
67 |     backbone_type: 'resnet50'
68 |     image_mean: [0.45, 0.45, 0.45]
69 |     image_std: [0.225, 0.225, 0.225]
70 |   gcn:
71 |     descriptor_dim: 256
72 |     points_encoder_dims: [2, 4, 8, 16]
73 |     hidden_dim: 512
74 |     dropout: 0
75 |     alpha: 0.2
76 |     nheads: 4
77 |     nout: 2048
78 |     train:
79 |       batch_szie: 16
80 |       positive_margin: 1
81 |       negative_margin: 0.2
82 |       lambda_d: 0.5
83 |       epochs     : 100
84 |       lr         : 0.0001
85 |       momentum   : 0
86 |       w_decay    : 0.00001
87 |       milestones : [1000, 2000, 5000, 10000, 15000] # iter
88 |       gamma      : 0.3
89 |       checkpoint: 1000
90 |       weight_lambda: [0.1, 10]
91 | 


--------------------------------------------------------------------------------
/config/train_maskrcnn_coco.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   name: 'coco'
 3 |   nclass: 81
 4 |   # TRAIN: 'coco_2014_train'
 5 |   TRAIN: 'coco_2014_train'
 6 |   VAL: 'coco_2014_minival'
 7 |   TEST: 'coco_2014_valminusminival'
 8 |   normal_size: [320, 320] # min_size, max_size
 9 |   cache_in_memory: false
10 |   validation_size: 96
11 | model:
12 |   maskrcnn: 
13 |     trainable_layers: 5  # backbone trainable layers
14 |     backbone_type: 'resnet50'
15 |     image_mean: [0.45, 0.45, 0.45]
16 |     image_std: [0.225, 0.225, 0.225]
17 |     batch_size : 8
18 |     epochs     : 10
19 |     lr         : 0.00001
20 |     momentum   : 0
21 |     w_decay    : 0.0001
22 |     milestones : [10000, 20000, 50000, 100000, 150000] # iter
23 |     gamma      : 0.3
24 |     dataset_size : 10000
25 |     checkpoint: 1000


--------------------------------------------------------------------------------
/config/train_superpoint_coco.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   name: 'coco'
 3 |   nclass: 81
 4 |   # TRAIN: 'coco_2014_train'
 5 |   TRAIN: 'coco_2014_train'
 6 |   VAL: 'coco_2014_minival'
 7 |   TEST: 'coco_2014_valminusminival'
 8 |   normal_size: [320, 320] # min_size, max_size
 9 |   cache_in_memory: false
10 |   validation_size: 96
11 |   augmentation:
12 |     photometric:
13 |       enable: true
14 |       primitives: [
15 |           'random_brightness', 'random_contrast', 'additive_speckle_noise',
16 |           'additive_gaussian_noise', 'add_shade', 'motion_blur']
17 |       params:
18 |         random_brightness: {max_change: 50}
19 |         random_contrast: {max_change: [0.5, 1.5]}
20 |         additive_gaussian_noise: {std: [0, 10]}
21 |         additive_speckle_noise: {intensity: 5}
22 |         add_shade:
23 |           amplitude: [-0.5, 0.5]
24 |           kernel_size_interval: [100, 150]
25 |         motion_blur: {max_ksize: 3}
26 |     homographic:
27 |         enable: false  # not implemented
28 |   warped_pair:
29 |     enable: false
30 |     params:
31 |       translation: true
32 |       rotation: true
33 |       scaling: true
34 |       perspective: true
35 |       scaling_amplitude: 0.2
36 |       perspective_amplitude_x: 0.2
37 |       perspective_amplitude_y: 0.2
38 |       patch_ratio: 0.85
39 |       max_angle: 1.57
40 |       allow_artifacts: true
41 |     valid_border_margin: 3
42 | model:
43 |   superpoint:
44 |     cell : 8
45 |     train:
46 |       name: 'superpoint'
47 |       add_descriptor: 0
48 |       lambda_d: 250
49 |       positive_margin: 1
50 |       negative_margin: 0.2
51 |       lambda_loss: 5
52 |       kernel_reg: 0.
53 |       nms: 4
54 |       batch_size : 8
55 |       epochs     : 10
56 |       lr         : 0.00001
57 |       momentum   : 0
58 |       w_decay    : 0.0001
59 |       milestones : [10000, 20000, 50000, 100000, 150000] # iter
60 |       gamma      : 0.3
61 |       dataset_size : 10000
62 |       checkpoint: 1000
63 |       gaussian_region:
64 |         radius : 8
65 |         milestones : [20000, 50000, 100000]  # iter
66 |         gamma : 0.5
67 |     eval:
68 |       detection_threshold: 0.15
69 |       batch_size : 1
70 |       dataset_size : 500


--------------------------------------------------------------------------------
/config/train_superpoint_synthetic.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   name: 'synthetic'
 3 |   nclass: 81
 4 |   normal_size: [320, 320] # min_size, max_size
 5 |   cache_in_memory: false
 6 |   validation_size: 96
 7 |   augmentation:
 8 |     photometric:
 9 |       enable: true
10 |       primitives: [
11 |           'random_brightness', 'random_contrast', 'additive_speckle_noise',
12 |           'additive_gaussian_noise', 'add_shade', 'motion_blur']
13 |       params:
14 |         random_brightness: {max_change: 50}
15 |         random_contrast: {max_change: [0.5, 1.5]}
16 |         additive_gaussian_noise: {std: [0, 10]}
17 |         additive_speckle_noise: {intensity: 5}
18 |         add_shade:
19 |           amplitude: [-0.5, 0.5]
20 |           kernel_size_interval: [100, 150]
21 |         motion_blur: {max_ksize: 3}
22 |     homographic:
23 |       enable: false  # not implemented
24 |   warped_pair:
25 |     enable: false
26 |     params:
27 |       translation: true
28 |       rotation: true
29 |       scaling: true
30 |       perspective: true
31 |       scaling_amplitude: 0.2
32 |       perspective_amplitude_x: 0.2
33 |       perspective_amplitude_y: 0.2
34 |       patch_ratio: 0.85
35 |       max_angle: 1.57
36 |       allow_artifacts: true
37 |     valid_border_margin: 3
38 | model:
39 |   superpoint:
40 |     cell : 8
41 |     train:
42 |       name: 'superpoint'
43 |       add_descriptor: false
44 |       lambda_d: 250
45 |       positive_margin: 1
46 |       negative_margin: 0.2
47 |       lambda_loss: 5
48 |       kernel_reg: 0.
49 |       nms: 4
50 |       batch_size : 2
51 |       epochs     : 10
52 |       lr         : 0.00001
53 |       momentum   : 0
54 |       w_decay    : 0.0001
55 |       milestones : [10000, 20000, 50000, 100000, 150000] # iter
56 |       gamma      : 0.3
57 |       dataset_size : 10000
58 |       checkpoint: 1000
59 |       gaussian_region:
60 |         radius : 8
61 |         milestones : [20000, 50000, 100000]  # iter
62 |         gamma : 0.5
63 |     eval:
64 |       detection_threshold: 0.15
65 |       batch_size : 1
66 |       dataset_size : 500


--------------------------------------------------------------------------------
/config/validate_detection.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |     name: 'coco'
 3 |     nclass: 81
 4 |     TRAIN: 'coco_2014_train'
 5 |     VAL: 'coco_2014_minival'
 6 |     TEST: 'coco_2014_valminusminival'
 7 |     normal_size: [384, 1280] # min_size, max_size
 8 |     cache_in_memory: false
 9 |     validation_size: 96
10 |     augmentation:
11 |         photometric:
12 |             enable: true
13 |             primitives: [
14 |                 'random_brightness', 'random_contrast', 'additive_speckle_noise',
15 |                 'additive_gaussian_noise', 'add_shade', 'motion_blur']
16 |             params:
17 |                 random_brightness: {max_change: 50}
18 |                 random_contrast: {max_change: [0.5, 1.5]}
19 |                 additive_gaussian_noise: {std: [0, 10]}
20 |                 additive_speckle_noise: {intensity: 5}
21 |                 add_shade:
22 |                     amplitude: [-0.5, 0.5]
23 |                     kernel_size_interval: [100, 150]
24 |                 motion_blur: {max_ksize: 3}
25 |         homographic:
26 |             enable: false  # not implemented
27 |     warped_pair:
28 |         enable: false
29 |         params:
30 |             translation: true
31 |             rotation: true
32 |             scaling: true
33 |             perspective: true
34 |             scaling_amplitude: 0.2
35 |             perspective_amplitude_x: 0.2
36 |             perspective_amplitude_y: 0.2
37 |             patch_ratio: 0.85
38 |             max_angle: 1.57
39 |             allow_artifacts: true
40 |         valid_border_margin: 3
41 | model:
42 |     superpoint:
43 |         cell : 8
44 |         detection_threshold: 0.2
45 |         batch_size : 1
46 |     maskrcnn:
47 |         trainable_layers: 0  # backbone trainable layers
48 |         fix_backbone: true
49 |         backbone_type: 'resnet50'
50 |         image_mean: [0.45, 0.45, 0.45]
51 |         image_std: [0.225, 0.225, 0.225]
52 |     gcn:
53 |         descriptor_dim: 256
54 |         points_encoder_dims: [2, 4, 8, 16]
55 |         hidden_dim: 512
56 |         dropout: 0
57 |         alpha: 0.2
58 |         nheads: 4
59 |         nout: 2048
60 | 


--------------------------------------------------------------------------------
/config/validate_gcn.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |     name: 'coco'
 3 |     nclass: 81
 4 |     # TRAIN: 'coco_2014_train'
 5 |     TRAIN: 'coco_2014_train'
 6 |     VAL: 'coco_2014_minival'
 7 |     TEST: 'coco_2014_valminusminival'
 8 |     normal_size: [320, 320] # min_size, max_size
 9 |     cache_in_memory: false
10 |     validation_size: 96
11 | model:
12 |   superpoint:
13 |     cell : 8
14 |     detection_threshold: 0.2
15 |   maskrcnn:
16 |     add_maskrcnn: true
17 |     trainable_layers: 5  # backbone trainable layers
18 |     fix_backbone: true
19 |     backbone_type: 'resnet50'
20 |     image_mean: [0.45, 0.45, 0.45]
21 |     image_std: [0.225, 0.225, 0.225]
22 |   gcn:
23 |     descriptor_dim: 256
24 |     points_encoder_dims: [2, 4, 8, 16]
25 |     hidden_dim: 512
26 |     dropout: 0
27 |     alpha: 0.2
28 |     nheads: 4
29 |     nout: 2048


--------------------------------------------------------------------------------
/datasets/coco/coco.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | sys.path.append('.')
  7 | import os
  8 | 
  9 | import torch
 10 | import torchvision
 11 | import cv2
 12 | import numpy as np
 13 | 
 14 | from structures.segmentation_mask import SegmentationMask
 15 | from datasets.utils import pipeline as pp
 16 | from datasets.utils import transforms as T
 17 | 
 18 | min_keypoints_per_image = 10
 19 | 
 20 | 
 21 | def _count_visible_keypoints(anno):
 22 |   return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
 23 | 
 24 | 
 25 | def _has_only_empty_bbox(anno):
 26 |   return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
 27 | 
 28 | 
 29 | def has_valid_annotation(anno):
 30 |   # if it's empty, there is no annotation
 31 |   if len(anno) == 0:
 32 |     return False
 33 |   # if all boxes have close to zero area, there is no annotation
 34 |   if _has_only_empty_bbox(anno):
 35 |     return False
 36 |   # keypoints task have a slight different critera for considering
 37 |   # if an annotation is valid
 38 |   if "keypoints" not in anno[0]:
 39 |     return True
 40 |   # for keypoint detection tasks, only consider valid images those
 41 |   # containing at least min_keypoints_per_image
 42 |   if _count_visible_keypoints(anno) >= min_keypoints_per_image:
 43 |     return True
 44 |   return False
 45 | 
 46 | 
 47 | class COCODataset(torchvision.datasets.coco.CocoDetection):
 48 |   def __init__(
 49 |       self, image_root, ann_file, config, remove_images_without_annotations, 
 50 |       transforms=None
 51 |   ):
 52 |     super(COCODataset, self).__init__(image_root, ann_file)
 53 |     # sort indices for reproducible results
 54 |     self.ids = sorted(self.ids)
 55 | 
 56 |     # filter images without detection annotations
 57 |     if remove_images_without_annotations:
 58 |       ids = []
 59 |       for img_id in self.ids:
 60 |         ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None)
 61 |         anno = self.coco.loadAnns(ann_ids)
 62 |         if has_valid_annotation(anno):
 63 |           ids.append(img_id)
 64 |       self.ids = ids
 65 | 
 66 |     self.categories = {cat['id']: cat['name'] for cat in self.coco.cats.values()}
 67 | 
 68 |     self.json_category_id_to_contiguous_id = {
 69 |       v: i + 1 for i, v in enumerate(self.coco.getCatIds())
 70 |     }
 71 |     self.contiguous_category_id_to_json_id = {
 72 |       v: k for k, v in self.json_category_id_to_contiguous_id.items()
 73 |     }
 74 |     self.id_to_img_map = {k: v for k, v in enumerate(self.ids)}
 75 |     self._transforms = transforms
 76 | 
 77 |     # for superpoint
 78 |     self.length = len(self.ids)
 79 |     self.config = config
 80 |     self.points_root = image_root + "_points"
 81 | 
 82 |   def __getitem__(self, idx):
 83 |     data = {}
 84 | 
 85 |     # image
 86 |     image, anno = super(COCODataset, self).__getitem__(idx)
 87 |     image = cv2.cvtColor(np.asarray(image),cv2.COLOR_RGB2GRAY)
 88 |     image = cv2.merge([image, image, image])
 89 |     image = self._transforms(image)
 90 |     data['image'] = image
 91 | 
 92 |     # for maskrcnn
 93 |     # filter crowd annotations
 94 |     # TODO might be better to add an extra field
 95 |     anno = [obj for obj in anno if obj["iscrowd"] == 0]
 96 |     boxes = [obj["bbox"] for obj in anno]
 97 |     boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes, xywh
 98 |     # remove small bbox
 99 |     keep = (boxes[:, 2] > 4) & (boxes[:, 3] > 4)
100 |     boxes[:, 2] = boxes[:, 0] + boxes[:, 2] - 1
101 |     boxes[:, 3] = boxes[:, 1] + boxes[:, 3] - 1
102 |     data['boxes'] = boxes[keep]  # [x1, y1, x2, y2]
103 | 
104 |     labels = [obj["category_id"] for obj in anno]
105 |     labels = [self.json_category_id_to_contiguous_id[c] for c in labels]
106 |     labels = torch.tensor(labels)
107 |     data['labels'] = labels[keep]
108 | 
109 |     if anno and "segmentation" in anno[0]:
110 |       masks = [obj["segmentation"] for obj in anno]
111 |       masks = SegmentationMask(masks, (image.shape[2], image.shape[1]), mode='poly')
112 |       masks = masks.get_mask_tensor()
113 |       masks = masks
114 |       if len(masks.shape) == 2:
115 |         masks = masks.unsqueeze(0)
116 |       data['masks'] = masks[keep]
117 | 
118 |     # for superpoint
119 |     image_info = self.get_img_info(idx)
120 |     image_name = image_info['file_name'].split('.')[0]
121 |     data['image_name'] = image_name
122 | 
123 |     point_name = image_name + ".txt"
124 |     point_path = os.path.join(self.points_root, point_name)
125 |     points = np.loadtxt(point_path, dtype=np.float32, ndmin=2)
126 |     if np.sum(points) < 0:
127 |       points = np.empty((0, 2), dtype=np.float32)
128 |     points = torch.tensor(points)
129 |     data['points'] = points
130 |   
131 |     return data
132 | 
133 |   def get_img_info(self, index):
134 |     img_id = self.id_to_img_map[index]
135 |     img_data = self.coco.imgs[img_id]
136 |     return img_data
137 |   
138 | if __name__ == "__main__":
139 |   
140 |   import torchvision.transforms as transforms
141 |   from debug_tools.show_batch import show_batch, show_numpy
142 |   from torch.utils.data import Dataset, DataLoader
143 |   from datasets.utils.batch_collator import BatchCollator
144 |   import yaml
145 | 
146 |   root = "/home/haoyuefan/xk_data/superpoint/coco/full/coco/train2014"
147 |   annFile = "/home/haoyuefan/xk_data/superpoint/coco/full/coco/annotations/instances_train2014.json"
148 |   config = "/home/xukuan/code/object_rcnn/config/train_superpoint_coco.yaml"
149 | 
150 |   f = open(config, 'r', encoding='utf-8')
151 |   configs = f.read()
152 |   configs = yaml.load(configs)
153 | 
154 |   dataset = COCODataset(root, annFile, configs['data'], True, transforms=transforms.ToTensor())
155 | 
156 |   print(dataset.categories)


--------------------------------------------------------------------------------
/datasets/coco/coco_cat.txt:
--------------------------------------------------------------------------------
1 | {1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane', 6: 'bus', 7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light', 11: 'fire hydrant', 13: 'stop sign', 14: 'parking meter', 15: 'bench', 16: 'bird', 17: 'cat', 18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow', 22: 'elephant', 23: 'bear', 24: 'zebra', 25: 'giraffe', 27: 'backpack', 28: 'umbrella', 31: 'handbag', 32: 'tie', 33: 'suitcase', 34: 'frisbee', 35: 'skis', 36: 'snowboard', 37: 'sports ball', 38: 'kite', 39: 'baseball bat', 40: 'baseball glove', 41: 'skateboard', 42: 'surfboard', 43: 'tennis racket', 44: 'bottle', 46: 'wine glass', 47: 'cup', 48: 'fork', 49: 'knife', 50: 'spoon', 51: 'bowl', 52: 'banana', 53: 'apple', 54: 'sandwich', 55: 'orange', 56: 'broccoli', 57: 'carrot', 58: 'hot dog', 59: 'pizza', 60: 'donut', 61: 'cake', 62: 'chair', 63: 'couch', 64: 'potted plant', 65: 'bed', 67: 'dining table', 70: 'toilet', 72: 'tv', 73: 'laptop', 74: 'mouse', 75: 'remote', 76: 'keyboard', 77: 'cell phone', 78: 'microwave', 79: 'oven', 80: 'toaster', 81: 'sink', 82: 'refrigerator', 84: 'book', 85: 'clock', 86: 'vase', 87: 'scissors', 88: 'teddy bear', 89: 'hair drier', 90: 'toothbrush'}


--------------------------------------------------------------------------------
/datasets/coco/paths_catalog.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import os
  5 | from copy import deepcopy
  6 | 
  7 | class DatasetCatalog(object):
  8 |     DATA_DIR = ""
  9 |     DATASETS = {
 10 |         "coco_2017_train": {
 11 |             "img_dir": "coco/train2017",
 12 |             "ann_file": "coco/annotations/instances_train2017.json"
 13 |         },
 14 |         "coco_2017_val": {
 15 |             "img_dir": "coco/val2017",
 16 |             "ann_file": "coco/annotations/instances_val2017.json"
 17 |         },
 18 |         "coco_2014_train": {
 19 |             "img_dir": "coco/train2014",
 20 |             "ann_file": "coco/annotations/instances_train2014.json"
 21 |         },
 22 |         "coco_2014_val": {
 23 |             "img_dir": "coco/val2014",
 24 |             "ann_file": "coco/annotations/instances_val2014.json"
 25 |         },
 26 |         "coco_2014_minival": {
 27 |             "img_dir": "coco/val2014",
 28 |             "ann_file": "coco/annotations/instances_minival2014.json"
 29 |         },
 30 |         "coco_2014_valminusminival": {
 31 |             "img_dir": "coco/val2014",
 32 |             "ann_file": "coco/annotations/instances_valminusminival2014.json"
 33 |         },
 34 |         "keypoints_coco_2014_train": {
 35 |             "img_dir": "coco/train2014",
 36 |             "ann_file": "coco/annotations/person_keypoints_train2014.json",
 37 |         },
 38 |         "keypoints_coco_2014_val": {
 39 |             "img_dir": "coco/val2014",
 40 |             "ann_file": "coco/annotations/person_keypoints_val2014.json"
 41 |         },
 42 |         "keypoints_coco_2014_minival": {
 43 |             "img_dir": "coco/val2014",
 44 |             "ann_file": "coco/annotations/person_keypoints_minival2014.json",
 45 |         },
 46 |         "keypoints_coco_2014_valminusminival": {
 47 |             "img_dir": "coco/val2014",
 48 |             "ann_file": "coco/annotations/person_keypoints_valminusminival2014.json",
 49 |         },
 50 |         "voc_2007_train": {
 51 |             "data_dir": "voc/VOC2007",
 52 |             "split": "train"
 53 |         },
 54 |         "voc_2007_train_cocostyle": {
 55 |             "img_dir": "voc/VOC2007/JPEGImages",
 56 |             "ann_file": "voc/VOC2007/Annotations/pascal_train2007.json"
 57 |         },
 58 |         "voc_2007_val": {
 59 |             "data_dir": "voc/VOC2007",
 60 |             "split": "val"
 61 |         },
 62 |         "voc_2007_val_cocostyle": {
 63 |             "img_dir": "voc/VOC2007/JPEGImages",
 64 |             "ann_file": "voc/VOC2007/Annotations/pascal_val2007.json"
 65 |         },
 66 |         "voc_2007_test": {
 67 |             "data_dir": "voc/VOC2007",
 68 |             "split": "test"
 69 |         },
 70 |         "voc_2007_test_cocostyle": {
 71 |             "img_dir": "voc/VOC2007/JPEGImages",
 72 |             "ann_file": "voc/VOC2007/Annotations/pascal_test2007.json"
 73 |         },
 74 |         "voc_2012_train": {
 75 |             "data_dir": "voc/VOC2012",
 76 |             "split": "train"
 77 |         },
 78 |         "voc_2012_train_cocostyle": {
 79 |             "img_dir": "voc/VOC2012/JPEGImages",
 80 |             "ann_file": "voc/VOC2012/Annotations/pascal_train2012.json"
 81 |         },
 82 |         "voc_2012_val": {
 83 |             "data_dir": "voc/VOC2012",
 84 |             "split": "val"
 85 |         },
 86 |         "voc_2012_val_cocostyle": {
 87 |             "img_dir": "voc/VOC2012/JPEGImages",
 88 |             "ann_file": "voc/VOC2012/Annotations/pascal_val2012.json"
 89 |         },
 90 |         "voc_2012_test": {
 91 |             "data_dir": "voc/VOC2012",
 92 |             "split": "test"
 93 |             # PASCAL VOC2012 doesn't made the test annotations available, so there's no json annotation
 94 |         },
 95 | 
 96 |         ##############################################
 97 |         # These ones are deprecated, should be removed
 98 |         "cityscapes_fine_instanceonly_seg_train_cocostyle": {
 99 |             "img_dir": "cityscapes/images",
100 |             "ann_file": "cityscapes/annotations/instancesonly_filtered_gtFine_train.json"
101 |         },
102 |         "cityscapes_fine_instanceonly_seg_val_cocostyle": {
103 |             "img_dir": "cityscapes/images",
104 |             "ann_file": "cityscapes/annotations/instancesonly_filtered_gtFine_val.json"
105 |         },
106 |         "cityscapes_fine_instanceonly_seg_test_cocostyle": {
107 |             "img_dir": "cityscapes/images",
108 |             "ann_file": "cityscapes/annotations/instancesonly_filtered_gtFine_test.json"
109 |         },
110 |         ##############################################
111 | 
112 |         "cityscapes_poly_instance_train": {
113 |             "img_dir": "cityscapes/leftImg8bit/",
114 |             "ann_dir": "cityscapes/gtFine/",
115 |             "split": "train",
116 |             "mode": "poly",
117 |         },
118 |         "cityscapes_poly_instance_val": {
119 |             "img_dir": "cityscapes/leftImg8bit",
120 |             "ann_dir": "cityscapes/gtFine",
121 |             "split": "val",
122 |             "mode": "poly",
123 |         },
124 |         "cityscapes_poly_instance_minival": {
125 |             "img_dir": "cityscapes/leftImg8bit",
126 |             "ann_dir": "cityscapes/gtFine",
127 |             "split": "val",
128 |             "mode": "poly",
129 |             "mini": 10,
130 |         },
131 |         "cityscapes_mask_instance_train": {
132 |             "img_dir": "cityscapes/leftImg8bit/",
133 |             "ann_dir": "cityscapes/gtFine/",
134 |             "split": "train",
135 |             "mode": "mask",
136 |         },
137 |         "cityscapes_mask_instance_val": {
138 |             "img_dir": "cityscapes/leftImg8bit",
139 |             "ann_dir": "cityscapes/gtFine",
140 |             "split": "val",
141 |             "mode": "mask",
142 |         },
143 |         "cityscapes_mask_instance_minival": {
144 |             "img_dir": "cityscapes/leftImg8bit",
145 |             "ann_dir": "cityscapes/gtFine",
146 |             "split": "val",
147 |             "mode": "mask",
148 |             "mini": 10,
149 |         },
150 |     }
151 | 
152 |     @staticmethod
153 |     def get(name):
154 |         if "coco" in name:
155 |             data_dir = DatasetCatalog.DATA_DIR
156 |             attrs = DatasetCatalog.DATASETS[name]
157 |             args = dict(
158 |                 root=os.path.join(data_dir, attrs["img_dir"]),
159 |                 ann_file=os.path.join(data_dir, attrs["ann_file"]),
160 |             )
161 |             return dict(
162 |                 factory="COCODataset",
163 |                 args=args,
164 |             )
165 |         elif "voc" in name:
166 |             data_dir = DatasetCatalog.DATA_DIR
167 |             attrs = DatasetCatalog.DATASETS[name]
168 |             args = dict(
169 |                 data_dir=os.path.join(data_dir, attrs["data_dir"]),
170 |                 split=attrs["split"],
171 |             )
172 |             return dict(
173 |                 factory="PascalVOCDataset",
174 |                 args=args,
175 |             )
176 |         elif "cityscapes" in name:
177 |             data_dir = DatasetCatalog.DATA_DIR
178 |             attrs = deepcopy(DatasetCatalog.DATASETS[name])
179 |             attrs["img_dir"] = os.path.join(data_dir, attrs["img_dir"])
180 |             attrs["ann_dir"] = os.path.join(data_dir, attrs["ann_dir"])
181 |             return dict(factory="CityScapesDataset", args=attrs)
182 |         raise RuntimeError("Dataset not available: {}".format(name))
183 | 
184 | 
185 | class ModelCatalog(object):
186 |     S3_C2_DETECTRON_URL = "https://dl.fbaipublicfiles.com/detectron"
187 |     C2_IMAGENET_MODELS = {
188 |         "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl",
189 |         "MSRA/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl",
190 |         "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl",
191 |         "MSRA/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl",
192 |         "FAIR/20171220/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl",
193 |     }
194 | 
195 |     C2_DETECTRON_SUFFIX = "output/train/{}coco_2014_train%3A{}coco_2014_valminusminival/generalized_rcnn/model_final.pkl"
196 |     C2_DETECTRON_MODELS = {
197 |         "35857197/e2e_faster_rcnn_R-50-C4_1x": "01_33_49.iAX0mXvW",
198 |         "35857345/e2e_faster_rcnn_R-50-FPN_1x": "01_36_30.cUF7QR7I",
199 |         "35857890/e2e_faster_rcnn_R-101-FPN_1x": "01_38_50.sNxI7sX7",
200 |         "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "06_31_39.5MIHi1fZ",
201 |         "35858791/e2e_mask_rcnn_R-50-C4_1x": "01_45_57.ZgkA7hPB",
202 |         "35858933/e2e_mask_rcnn_R-50-FPN_1x": "01_48_14.DzEQe4wC",
203 |         "35861795/e2e_mask_rcnn_R-101-FPN_1x": "02_31_37.KqyEK4tT",
204 |         "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "06_35_59.RZotkLKI",
205 |         "37129812/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x": "09_35_36.8pzTQKYK",
206 |         # keypoints
207 |         "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "08_42_54.kdzV35ao"
208 |     }
209 | 
210 |     @staticmethod
211 |     def get(name):
212 |         if name.startswith("Caffe2Detectron/COCO"):
213 |             return ModelCatalog.get_c2_detectron_12_2017_baselines(name)
214 |         if name.startswith("ImageNetPretrained"):
215 |             return ModelCatalog.get_c2_imagenet_pretrained(name)
216 |         raise RuntimeError("model not present in the catalog {}".format(name))
217 | 
218 |     @staticmethod
219 |     def get_c2_imagenet_pretrained(name):
220 |         prefix = ModelCatalog.S3_C2_DETECTRON_URL
221 |         name = name[len("ImageNetPretrained/"):]
222 |         name = ModelCatalog.C2_IMAGENET_MODELS[name]
223 |         url = "/".join([prefix, name])
224 |         return url
225 | 
226 |     @staticmethod
227 |     def get_c2_detectron_12_2017_baselines(name):
228 |         # Detectron C2 models are stored following the structure
229 |         # prefix/<model_id>/2012_2017_baselines/<model_name>.yaml.<signature>/suffix
230 |         # we use as identifiers in the catalog Caffe2Detectron/COCO/<model_id>/<model_name>
231 |         prefix = ModelCatalog.S3_C2_DETECTRON_URL
232 |         dataset_tag = "keypoints_" if "keypoint" in name else ""
233 |         suffix = ModelCatalog.C2_DETECTRON_SUFFIX.format(dataset_tag, dataset_tag)
234 |         # remove identification prefix
235 |         name = name[len("Caffe2Detectron/COCO/"):]
236 |         # split in <model_id> and <model_name>
237 |         model_id, model_name = name.split("/")
238 |         # parsing to make it match the url address from the Caffe2 models
239 |         model_name = "{}.yaml".format(model_name)
240 |         signature = ModelCatalog.C2_DETECTRON_MODELS[name]
241 |         unique_name = ".".join([model_name, signature])
242 |         url = "/".join([prefix, model_id, "12_2017_baselines", unique_name, suffix])
243 |         return url
244 | 


--------------------------------------------------------------------------------
/datasets/evaluation/coco/__init__.py:
--------------------------------------------------------------------------------
 1 | from .coco_eval import do_coco_evaluation as do_orig_coco_evaluation
 2 | from .coco_eval_wrapper import do_coco_evaluation as do_wrapped_coco_evaluation
 3 | from maskrcnn_benchmark.data.datasets import AbstractDataset, COCODataset
 4 | 
 5 | 
 6 | def coco_evaluation(
 7 |     dataset,
 8 |     predictions,
 9 |     output_folder,
10 |     box_only,
11 |     iou_types,
12 |     expected_results,
13 |     expected_results_sigma_tol,
14 | ):
15 |     if isinstance(dataset, COCODataset):
16 |         return do_orig_coco_evaluation(
17 |             dataset=dataset,
18 |             predictions=predictions,
19 |             box_only=box_only,
20 |             output_folder=output_folder,
21 |             iou_types=iou_types,
22 |             expected_results=expected_results,
23 |             expected_results_sigma_tol=expected_results_sigma_tol,
24 |         )
25 |     elif isinstance(dataset, AbstractDataset):
26 |         return do_wrapped_coco_evaluation(
27 |             dataset=dataset,
28 |             predictions=predictions,
29 |             box_only=box_only,
30 |             output_folder=output_folder,
31 |             iou_types=iou_types,
32 |             expected_results=expected_results,
33 |             expected_results_sigma_tol=expected_results_sigma_tol,
34 |         )
35 |     else:
36 |         raise NotImplementedError(
37 |             (
38 |                 "Ground truth dataset is not a COCODataset, "
39 |                 "nor it is derived from AbstractDataset: type(dataset)="
40 |                 "%s" % type(dataset)
41 |             )
42 |         )
43 | 


--------------------------------------------------------------------------------
/datasets/kitti/kitti_odomery.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | sys.path.append('.')
  7 | import torch
  8 | from torch.utils.data import Dataset
  9 | from torchvision import transforms
 10 | import numpy as np
 11 | import numbers
 12 | import random
 13 | import os
 14 | import cv2
 15 | import yaml
 16 | 
 17 | from datasets.utils.pipeline import makedir
 18 | 
 19 | class KittiOdometry(Dataset):
 20 |   def __init__(self, data_root, id, dis_thr=15, angle_thr=1.0, interval=100):
 21 |     image_dir = os.path.join(data_root, "images", id, "image_0")
 22 |     label_file = os.path.join(data_root, "poses", (id+".txt"))
 23 |     image_names = os.listdir(image_dir)
 24 |     image_names.sort()
 25 | 
 26 | 
 27 |     self.data_root = data_root
 28 |     self.id = id
 29 |     self.dis_thr = dis_thr
 30 |     self.angle_thr = angle_thr
 31 |     self.interval = interval
 32 |     self.image_dir = image_dir
 33 |     self.image_names = image_names
 34 |     self.length = len(image_names)
 35 |     self.poses_gt = self.read_label_file(label_file)
 36 |     loop_gt, num_loop = self.find_loops(dis_thr, angle_thr, interval)
 37 |     # loop_gt, num_loop = None, None
 38 |     self.loop_gt = loop_gt
 39 |     self.num_loop = num_loop
 40 |     self.transform = transforms.ToTensor()
 41 | 
 42 |   def __len__(self):
 43 |     return self.length
 44 | 
 45 |   def __getitem__(self, idx):
 46 |     image_path = os.path.join(self.image_dir, self.image_names[idx])  
 47 |     image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
 48 |     
 49 |     if len(image.shape) == 2:
 50 |       image = cv2.merge([image, image, image])
 51 |       
 52 |     return {'image': image, 'image_name': self.image_names[idx]}
 53 | 
 54 | 
 55 |   def read_label_file(self, file_path):
 56 |     poses_gt = {}
 57 | 
 58 |     index = 0
 59 |     fo = open(file_path, "r")
 60 |     for line in fo.readlines():
 61 |       line = line.strip('\n')
 62 |       line = line.split(' ')  
 63 |       line = [float(l) for l in line]
 64 |       line = np.array(line)
 65 |       line = line.reshape(3, 4)
 66 |       position = line[:, -1]
 67 |       rotation = line[:, :3]
 68 | 
 69 |       gt = {'index':index, 'position':position, 'rotation':rotation}
 70 | 
 71 |       image_name = self.image_names[index]
 72 |       poses_gt[image_name] = gt
 73 |       index = index + 1
 74 | 
 75 |     fo.close()
 76 | 
 77 |     return poses_gt
 78 | 
 79 | 
 80 |   def get_label(self, r):
 81 |     if type(r) == type(0):
 82 |       image_name = self.image_names[r]
 83 |     else:
 84 |       image_name = r
 85 |     
 86 |     if image_name in self.poses_gt:
 87 |       pose_gt = self.poses_gt[image_name]
 88 |     else:
 89 |       pose_gt = None
 90 |     
 91 |     return pose_gt
 92 | 
 93 | 
 94 |   def find_loops(self, dis_thr, angle_thr, interval):
 95 |     loop_gt = {}
 96 |     num_loop = 0
 97 |     for i in range(len(self.image_names)):
 98 |       image_name = self.image_names[i]
 99 |       if i < interval:
100 |         loop_gt[image_name] = 0
101 |         continue
102 | 
103 |       gt_i = self.get_label(i)
104 |       position_i = gt_i['position']
105 |       rotation_i = gt_i['rotation']
106 | 
107 |       for j in range(i):
108 |         if i - j < interval:
109 |           loop_gt[image_name] = 0
110 |           break
111 |       
112 |         gt_j = self.get_label(j)
113 |         position_j = gt_j['position']
114 |         rotation_j = gt_j['rotation']
115 | 
116 |         delta_dis = np.linalg.norm((position_i-position_j))
117 |         delta_R = rotation_i.dot(rotation_j.T)
118 |         delta_r, _ = cv2.Rodrigues(delta_R)
119 |         deleta_angle = np.linalg.norm(delta_r)
120 | 
121 |         if delta_dis < dis_thr and deleta_angle < angle_thr:
122 |           loop_gt[image_name] = 1
123 |           num_loop += 1
124 |           break
125 | 
126 |     return loop_gt, num_loop
127 | 
128 | 
129 |   def get_loop_gt(self):
130 |     return self.loop_gt, self.num_loop
131 | 
132 |   def image_size(self):
133 |     '''
134 |     H, W
135 |     '''
136 |     image_path = os.path.join(self.image_dir, self.image_names[0])  
137 |     image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
138 |     return image.shape[-2:]
139 | 


--------------------------------------------------------------------------------
/datasets/kitti/kitti_tracking.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | sys.path.append('.')
  7 | import torch
  8 | from torch.utils.data import Dataset
  9 | from torchvision import transforms
 10 | import numpy as np
 11 | import numbers
 12 | import random
 13 | import os
 14 | import cv2
 15 | 
 16 | class KittiTracking(Dataset):
 17 |   def __init__(self, data_root, id):
 18 |     image_dir = os.path.join(data_root, "images", id)
 19 |     label_file = os.path.join(data_root, "labels", (id+".txt"))
 20 |     image_names = os.listdir(image_dir)
 21 |     image_names.sort()
 22 | 
 23 |     self.image_dir = image_dir
 24 |     self.image_names = image_names
 25 |     self.length = len(image_names)
 26 |     self.track_gt = self.read_label_file(label_file)
 27 |     self.transform = transforms.ToTensor()
 28 | 
 29 |   def __len__(self):
 30 |     return self.length
 31 | 
 32 |   def __getitem__(self, idx):
 33 |     image_path = os.path.join(self.image_dir, self.image_names[idx])  
 34 |     image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
 35 |     
 36 |     if len(image.shape) == 2:
 37 |       image = cv2.merge([image, image, image])
 38 |       
 39 |     # image = self.transform(image)
 40 | 
 41 |     return {'image': image, 'image_name': self.image_names[idx]}
 42 | 
 43 |   def read_label_file(self, file_path):
 44 |     track_gt = {}
 45 | 
 46 |     fo = open(file_path, "r")
 47 |     for line in fo.readlines():
 48 |       line = line.strip('\n')
 49 |       line = line.split(' ')     
 50 |       track_id = int(line[1])
 51 |       if track_id < 0:
 52 |         continue 
 53 |       frame_id = int(line[0])
 54 |       object_type = line[2]
 55 |       truncated = int(line[3])
 56 |       occulded = int(line[4])
 57 |       # x1, y1, x2, y2
 58 |       box = [float(line[6]), float(line[7]), float(line[8]), float(line[9])]
 59 |       object_info = {'frame_id':frame_id, 'track_id':track_id, 'object_type':object_type, 
 60 |           'truncated':truncated, 'occulded':occulded, 'box':box, }
 61 | 
 62 |       image_name = self.image_names[frame_id]
 63 |       if image_name in track_gt:
 64 |         track_gt[image_name].append(object_info)
 65 |       else:
 66 |         track_gt[image_name] = [object_info]
 67 | 
 68 |     fo.close()
 69 | 
 70 |     # re-organioze groundtruth, Dict[List[Dict]] -> Dict[Dict[List]]
 71 |     new_track_gt = {}
 72 |     for image_name in track_gt.keys():
 73 |       if len(track_gt[image_name]) > 0:
 74 |         new_track_gt[image_name] = {}
 75 |         for k in track_gt[image_name][0].keys():
 76 |           data_list = [data[k] for data in track_gt[image_name]]
 77 |           new_track_gt[image_name][k] = data_list
 78 | 
 79 |     return new_track_gt
 80 | 
 81 |   def get_label(self, r):
 82 |     if type(r) == type(0):
 83 |       image_name = self.image_names[r]
 84 |     else:
 85 |       image_name = r
 86 |     
 87 |     if image_name in self.track_gt:
 88 |       image_info = self.track_gt[image_name]
 89 |     else:
 90 |       image_info = None
 91 |     
 92 |     return image_info
 93 | 
 94 |   def image_size(self):
 95 |     '''
 96 |     H, W
 97 |     '''
 98 |     image_path = os.path.join(self.image_dir, self.image_names[0])  
 99 |     image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
100 |     return image.shape[-2:]
101 | 


--------------------------------------------------------------------------------
/datasets/otb/otb_tracking.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function
 5 | import sys
 6 | sys.path.append('.')
 7 | import torch
 8 | from torch.utils.data import Dataset
 9 | from torchvision import transforms
10 | import numpy as np
11 | import numbers
12 | import random
13 | import os
14 | import cv2
15 | 
16 | class OtbTracking(Dataset):
17 |   def __init__(self, data_root, id):
18 |     image_dir = os.path.join(data_root, id, "img")
19 |     label_file = os.path.join(data_root, id, "groundtruth_rect.txt")
20 |     image_names = os.listdir(image_dir)
21 |     image_names.sort()
22 | 
23 |     self.image_dir = image_dir
24 |     self.image_names = image_names
25 |     self.length = len(image_names)
26 |     self.track_gt = self.read_label_file(label_file)
27 |     self.transform = transforms.ToTensor()
28 | 
29 |   def __len__(self):
30 |     return self.length
31 | 
32 |   def __getitem__(self, idx):
33 |     image_path = os.path.join(self.image_dir, self.image_names[idx])  
34 |     image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
35 |     
36 |     if len(image.shape) == 2:
37 |       image = cv2.merge([image, image, image])
38 |       
39 |     # image = self.transform(image)
40 | 
41 |     return {'image': image, 'image_name': self.image_names[idx]}
42 | 
43 |   def read_label_file(self, file_path):
44 |     track_gt = {}
45 | 
46 |     fo = open(file_path, "r")
47 |     i = 0
48 |     for line in fo.readlines():
49 |       line = line.strip('\n')
50 |       if ',' in line:
51 |         line = line.split(',')     
52 |       else:
53 |         line = line.split('\t')    
54 |          
55 |       track_id = 0
56 |       frame_id = i
57 |       # x1, y1, w, h
58 |       x1, y1, w, h = float(line[0]), float(line[1]), float(line[2]), float(line[3])
59 |       x2, y2 = (x1 + w - 1), (y1 + h - 1) 
60 |       box = [x1, y1, x2, y2]
61 |       object_info = {'frame_id':frame_id, 'track_id':track_id, 'box':box, }
62 |       i = i + 1
63 |       image_name = self.image_names[frame_id]
64 |       track_gt[image_name] = object_info
65 | 
66 |     fo.close()
67 | 
68 |     return track_gt
69 | 
70 |   def get_label(self, r):
71 |     if type(r) == type(0):
72 |       image_name = self.image_names[r]
73 |     else:
74 |       image_name = r
75 |     
76 |     if image_name in self.track_gt:
77 |       image_info = self.track_gt[image_name]
78 |     else:
79 |       image_info = None
80 |     
81 |     return image_info
82 | 
83 |   def image_size(self):
84 |     '''
85 |     H, W
86 |     '''
87 |     image_path = os.path.join(self.image_dir, self.image_names[0])  
88 |     image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
89 |     return image.shape[-2:]
90 | 


--------------------------------------------------------------------------------
/datasets/synthetic/synthetic.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function
 5 | import sys
 6 | sys.path.append('.')
 7 | import torch
 8 | from torch.utils.data import Dataset, DataLoader
 9 | from torchvision import transforms
10 | import numpy as np
11 | import numbers
12 | import random
13 | import os
14 | import cv2
15 | 
16 | from datasets.utils.pipeline import draw_umich_gaussian
17 | from datasets.utils import pipeline as pp
18 | 
19 | class SyntheticDataset(Dataset):
20 |   def __init__(self, data_root, use_for = None):
21 |     image_dir = os.path.join(data_root, use_for, "images")
22 |     point_dir = os.path.join(data_root, use_for, "points")
23 |     image_names = os.listdir(image_dir)
24 | 
25 |     self.image_dir = image_dir
26 |     self.point_dir = point_dir
27 |     self.image_names = image_names
28 |     self.length = len(image_names)
29 | 
30 |     self.transform = transforms.ToTensor()
31 | 
32 |   def __len__(self):
33 |     return self.length
34 | 
35 |   def __getitem__(self, idx):
36 |     image_path = os.path.join(self.image_dir, self.image_names[idx])  
37 |     point_name = self.image_names[idx].split('.')[0] + ".txt"
38 |     point_path = os.path.join(self.point_dir, point_name)  
39 |     image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
40 |     points = np.loadtxt(point_path, dtype=np.float32, ndmin=2)
41 |     if np.sum(points) < 0:
42 |       points = np.empty((0, 2), dtype=np.float32)
43 |       
44 |     image_name = self.image_names[idx].split('.')
45 |     image_name = image_name[0]
46 |     
47 |     if len(image.shape) == 2:
48 |       image = cv2.merge([image, image, image])
49 |       
50 |     image = self.transform(image)
51 | 
52 |     points = torch.tensor(points)
53 | 
54 |     return {'image': image, 'image_name': image_name, 'points': points}


--------------------------------------------------------------------------------
/datasets/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sair-lab/AirCode/6f7aaeafa3b6f8c762170431447568855601c684/datasets/utils/__init__.py


--------------------------------------------------------------------------------
/datasets/utils/batch_collator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import os
 5 | import torch
 6 | 
 7 | class BatchCollator(object):
 8 |     '''
 9 |     pack dict batch
10 |     '''
11 |     def __init__(self):
12 |         super(BatchCollator,self).__init__()
13 | 
14 |     def __call__(self, batch):
15 |         data= {}
16 |         size = len(batch)
17 |         for key in batch[0]:
18 |             l = []
19 |             for i in range(size):
20 |                 l = l + [batch[i][key]]
21 |             data[key] = l
22 |         return data
23 | 


--------------------------------------------------------------------------------
/datasets/utils/build_data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import os
 5 | import torch
 6 | from torchvision import transforms
 7 | from torch.utils.data import DataLoader
 8 | 
 9 | from datasets.coco.coco import COCODataset
10 | from datasets.utils.batch_collator import BatchCollator
11 | from datasets.coco.paths_catalog import DatasetCatalog
12 | 
13 | def coco_loader(
14 |     data_root, name, config, batch_size=2, transforms=transforms.ToTensor(), drop_last=True,
15 |     remove_images_without_annotations=False, oints_root="", num_workers=8):
16 |   data_info = DatasetCatalog.get(name)
17 | 
18 |   data_dir = os.path.join(data_root, data_info['args']['root'])
19 |   ann_file = os.path.join(data_root, data_info['args']['ann_file'])
20 | 
21 |   dataset = COCODataset(
22 |       image_root=data_dir, ann_file=ann_file, config=config, transforms=transforms,
23 |       remove_images_without_annotations=remove_images_without_annotations)
24 |   
25 |   sampler = torch.utils.data.sampler.RandomSampler(dataset)
26 |   batch_sampler = torch.utils.data.sampler.BatchSampler(sampler=sampler, batch_size=batch_size, drop_last=drop_last)
27 | 
28 |   collator = BatchCollator()
29 |   loader = DataLoader(dataset, batch_sampler=batch_sampler, collate_fn=collator, num_workers=num_workers)
30 | 
31 |   return loader


--------------------------------------------------------------------------------
/datasets/utils/gcn_mask_augmentation.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import sys
  3 | sys.path.append('.')
  4 | import cv2
  5 | import numpy as np
  6 | import math
  7 | import random
  8 | 
  9 | """ Data augmentation for gcn masks """
 10 | 
 11 | augmentations = [
 12 |     'additive_gaussian_noise',
 13 |     'additive_speckle_noise',
 14 |     'random_brightness',
 15 |     'random_contrast',
 16 |     'affine_transform',
 17 |     'perspective_transform',
 18 |     'random_crop',
 19 |     'add_shade',
 20 |     'motion_blur'
 21 | ]
 22 | 
 23 | def erode(image, kernel_size):
 24 |   kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(kernel_size, kernel_size))
 25 |   image = cv2.erode(image, kernel)   
 26 |   return image
 27 | 
 28 | def dilate(image, kernel_size):
 29 |   kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(kernel_size, kernel_size))
 30 |   image = cv2.dilate(image, kernel)  
 31 |   return image
 32 | 
 33 | def random_region_zero(image, scale_x=0.3, scale_y=0.3):
 34 |   ys, xs = np.where(image > 0)
 35 |   x0, x1, y0, y1 = xs.min(), xs.max(), ys.min(), ys.max()
 36 |   
 37 |   region_width = (x1 - x0) * scale_x
 38 |   region_height = (y1 - y0) * scale_y
 39 | 
 40 |   x0 = random.uniform(x0, (x1 - region_width))
 41 |   y0 = random.uniform(y0, (y1 - region_height))
 42 | 
 43 |   x1 = x0 + region_width
 44 |   y1 = y0 + region_height
 45 | 
 46 |   x0, x1, y0, y1 = int(x0), int(x1), int(y0), int(y1)
 47 | 
 48 |   image[y0:y1, x0:x1] = 0
 49 | 
 50 |   return image
 51 | 
 52 | def random_block_zero(image, num=5, size=10):
 53 |   ys, xs = np.where(image > 0)
 54 |   x0, x1, y0, y1 = xs.min(), xs.max(), ys.min(), ys.max()
 55 |   
 56 |   block_xs = random.sample(range(x0, x1), num)
 57 |   block_ys = random.sample(range(y0, y1), num)
 58 |   mask = np.ones_like(image)
 59 | 
 60 |   for y, x in zip(block_ys, block_xs):
 61 |     mask[y, x] = 0
 62 | 
 63 |   kernel_size = size
 64 |   mask = erode(mask, kernel_size)
 65 | 
 66 |   image = (image * mask).astype(np.uint8)
 67 | 
 68 |   return image
 69 | 
 70 | def random_block_one(image, num=10, size=10):
 71 |   H, W = image.shape[-2:]
 72 | 
 73 |   block_xs = random.sample(range(0, (W-1)), num)
 74 |   block_ys = random.sample(range(0, (H-1)), num)
 75 |   
 76 |   mask = np.zeros_like(image)
 77 | 
 78 |   for y, x in zip(block_ys, block_xs):
 79 |     mask[y, x] = 1
 80 | 
 81 |   kernel_size = size 
 82 |   mask = dilate(mask, kernel_size)
 83 |   
 84 |   img = (image > 0).astype(float)
 85 |   value = np.sum(image) / np.sum(img)
 86 | 
 87 |   img = img + mask
 88 | 
 89 |   img = (img > 0).astype(float) 
 90 | 
 91 |   img = img * value
 92 |   img = img.astype(np.uint8)
 93 | 
 94 |   return img
 95 | 
 96 | 
 97 | if __name__ == "__main__":
 98 | 
 99 |   from debug_tools.show_batch import show_numpy
100 | 
101 |   img1 = np.ones([640, 640])
102 |   img2 = np.zeros([640, 640])
103 |   img = np.concatenate([img1, img2], 1)
104 |   img = (img * 150.0 + 0.5).astype(np.uint8)
105 |   img = np.clip(img, 0, 255)
106 | 
107 |   img = dilate(img, kernel_size=10)
108 | 
109 |   img = cv2.merge([img, img, img])
110 |   show_numpy(img)


--------------------------------------------------------------------------------
/datasets/utils/pipeline.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import numpy as np
  4 | from random import sample
  5 | 
  6 | from datasets.utils import augmentation_legacy as photaug
  7 | from datasets.utils import gcn_mask_augmentation as maskaug
  8 | from datasets.utils.homographies import sample_homography, warp_points, filter_points
  9 | 
 10 | def makedir(d):
 11 |     if not os.path.exists(d):
 12 |         os.makedirs(d)
 13 | 
 14 | def parse_primitives(names, all_primitives):
 15 |     p = all_primitives if (names == 'all') \
 16 |         else (names if isinstance(names, list) else [names])
 17 |     assert set(p) <= set(all_primitives)
 18 |     return p
 19 | 
 20 | def space_to_depth(data, cell_size, add_dustbin=False):
 21 |     H, W = data.shape[0], data.shape[1]
 22 |     Hc, Wc = H // cell_size, W // cell_size
 23 |     result = data[:, np.newaxis, :, np.newaxis]
 24 |     result = result.reshape(Hc, cell_size, Wc, cell_size)
 25 |     result = np.transpose(result, [1, 3, 0, 2])
 26 |     result = result.reshape(1, cell_size ** 2, Hc, Wc)
 27 |     result = result.squeeze()
 28 |     if add_dustbin:
 29 |       dustbin = np.ones([Hc, Wc])
 30 |       depth_sum = result.sum(axis=0)
 31 |       dustbin[depth_sum>0] = 0
 32 |       result = np.concatenate((result, dustbin[np.newaxis, :, :]), axis=0)
 33 |     return result
 34 | 
 35 | '''
 36 | draw gaussian function
 37 | '''
 38 | def gaussian2D(shape, sigma=1):
 39 |   # generate a gaussion in a box
 40 |   m, n = [(ss - 1.) / 2. for ss in shape]
 41 |   y, x = np.ogrid[-m:m+1,-n:n+1]
 42 | 
 43 |   h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
 44 |   h[h < np.finfo(h.dtype).eps * h.max()] = 0
 45 |   return h
 46 | 
 47 | def draw_umich_gaussian(heatmap, center, radius, k=1):
 48 |   diameter = 2 * radius + 1
 49 |   gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
 50 | 
 51 |   x, y = int(center[0]), int(center[1])
 52 | 
 53 |   height, width = heatmap.shape[0:2]
 54 | 
 55 |   left, right = min(x, radius), min(width - x, radius + 1)
 56 |   top, bottom = min(y, radius), min(height - y, radius + 1)
 57 | 
 58 |   masked_heatmap  = heatmap[y - top:y + bottom, x - left:x + right]
 59 |   masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
 60 |   if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
 61 |     np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
 62 |     
 63 |   return heatmap
 64 | 
 65 | def convert_to_guassian(label, radius):
 66 |   label_gaussian = np.zeros(label.shape)
 67 |   ys, xs = np.where(label > 0)
 68 |   if len(xs) != 0:
 69 |     for i in range(len(xs)):
 70 |       draw_umich_gaussian(label_gaussian, (xs[i], ys[i]), radius)
 71 | 
 72 |   return label_gaussian
 73 | 
 74 | def generate_shape_gaussian(matrix, radius):
 75 |   '''
 76 |   Generate 3D or 4D shape like gaussian 
 77 |   '''
 78 |   origin_shape = matrix.shape
 79 |   if len(origin_shape) == 2:
 80 |     return convert_to_guassian(matrix, radius)
 81 |   elif len(origin_shape) == 3:
 82 |     origin_matrix = matrix[np.newaxis, :, :, :]
 83 |   elif len(origin_shape) == 4:
 84 |     origin_matrix = matrix 
 85 |   else:
 86 |     return None
 87 | 
 88 |   matrix_gaussian = np.zeros(origin_matrix.shape)
 89 |   for i in range(matrix_gaussian.shape[0]):
 90 |     for j in range(matrix_gaussian.shape[1]):
 91 |       matrix_gaussian[i, j, :, :] = convert_to_guassian(origin_matrix[i, j, :, :], radius)
 92 |   
 93 |   if len(origin_shape) == 3:
 94 |     matrix_gaussian = np.squeeze(matrix_gaussian, 0)
 95 | 
 96 |   return matrix_gaussian
 97 | 
 98 | '''
 99 | generate valid mask, heatmap and keypoint map
100 | '''
101 | def generate_valid_mask(img_shape, border_remove=2):
102 |   '''
103 |   inputs :
104 |     img_shape: [H, W]
105 |   '''
106 |   H, W = img_shape[0:2]
107 |   valid_mask = np.zeros((H, W), dtype=np.int)
108 |   valid_mask[border_remove:(H-border_remove), border_remove:(W-border_remove)] = 1
109 |   return valid_mask
110 | 
111 | def generate_keypoint_map(img_shape, points):
112 |   '''
113 |   inputs :
114 |     img_shape: [H, W]
115 |     points: N * 2, [hy, wx]
116 |   '''
117 |   height, width = img_shape[:2]
118 |   points = (points + 0.5).astype(int)
119 |   points[:, 0] = np.clip(points[:, 0], 0, height - 1)
120 |   points[:, 1] = np.clip(points[:, 1], 0, width -1)
121 |   keypoint_map = np.zeros(img_shape[:2], dtype=np.float32)
122 |   for h, w in points:
123 |     keypoint_map[h, w] = 1.0
124 |   return keypoint_map
125 | 
126 | def generate_heatmap(img_shape, points, gaussian_radius):
127 |   '''
128 |   inputs:
129 |     img_shape: [H, W]
130 |     points: N * 2, [hy, wx]
131 |     gaussian_radius: int
132 |   '''
133 |   if gaussian_radius < 2:
134 |     heatmap = generate_keypoint_map(img_shape, points)
135 |   else:
136 |     heatmap = np.zeros(img_shape[:2])
137 |     for i in range(points.shape[0]):
138 |       heatmap = draw_umich_gaussian(heatmap, (points[i][1], points[i][0]), gaussian_radius)
139 |   return heatmap
140 | 
141 | def generate_idx_map(points, shape):
142 |   '''
143 |   inputs:
144 |     image: numpy array, [H, W]
145 |     points: N * 2, [hy, wx]
146 |   '''
147 |   idx_map = np.zeros(shape)
148 |   for i in range(len(points)):
149 |     hy, wx = int(points[i][0]), int(points[i][1])
150 |     idx_map[hy, wx] = i
151 | 
152 |   return idx_map
153 | 
154 | 
155 | '''
156 | homographic augmentation
157 | '''
158 | def homographic_augmentation(image, points, config):
159 |   '''
160 |   inputs:
161 |     image: numpy array
162 |     points: N * 2, [hy, wx]
163 |     config
164 |   '''
165 |   H = sample_homography(image.shape[:2], **config['params'])
166 | 
167 |   warped_image = cv2.warpPerspective(image, H, (image.shape[1], image.shape[0]))
168 | 
169 |   if points.shape[0] > 0:
170 |     warped_points = warp_points(H, points)
171 |     warped_points = filter_points(image.shape[:2], warped_points)
172 |   else:
173 |     warped_points = points
174 | 
175 |   valid_mask = np.ones(image.shape[:2], dtype=np.uint8) * 255
176 |   warped_valid_mask = cv2.warpPerspective(valid_mask, H, (valid_mask.shape[1], valid_mask.shape[0]))
177 |   k = np.ones((config['valid_border_margin'], config['valid_border_margin']), np.uint8)
178 |   warped_valid_mask = cv2.erode(warped_valid_mask, k, iterations=1)
179 |   warped_valid_mask = (warped_valid_mask > 0).astype(int)
180 | 
181 |   return warped_image, warped_points, warped_valid_mask, H
182 | 
183 | '''
184 | photometric augmentation
185 | '''
186 | def photometric_augmentation(image, points, config):
187 |   '''
188 |   inputs:
189 |     image: numpy array
190 |     points: N * 2, [hy, wx]
191 |     config
192 |   '''
193 |   primitives = config['primitives']
194 |   fun_name = sample(primitives, 1)[0]
195 |   fun_config = config['params'][fun_name]
196 |   
197 |   if len(image.shape) == 3:
198 |     img = image[:, :, 0]
199 |   else:
200 |     img = image
201 | 
202 |   aug = getattr(photaug, fun_name)
203 |   img, points = aug(img, np.flip(points, 1), **fun_config)
204 | 
205 |   img = cv2.merge([img, img, img])
206 |   points = np.flip(points, 1)
207 | 
208 |   return img, points
209 | 
210 | 
211 | '''
212 | mask augmentation
213 | '''
214 | def mask_augmentation(masks, config):
215 |   '''
216 |   inputs:
217 |     image: numpy array or List[numpy array]
218 |     config
219 |   '''
220 |   primitives = config['primitives']
221 | 
222 |   new_masks = []
223 |   for mask in masks:
224 |     fun_name = sample(primitives, 1)[0]
225 |     fun_config = config['params'][fun_name]
226 |     aug = getattr(maskaug, fun_name)
227 |     mask = aug(mask, **fun_config)
228 |     new_masks.append(mask)
229 | 
230 |   return np.stack(new_masks)


--------------------------------------------------------------------------------
/datasets/utils/transforms.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | from torch import nn, Tensor
  4 | from torch.nn import functional as F
  5 | import torchvision
  6 | from typing import List, Tuple, Dict, Optional
  7 | 
  8 | @torch.jit.unused
  9 | def _resize_image_and_masks_onnx(image, self_min_size, self_max_size, target):
 10 |   # type: (Tensor, float, float, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]
 11 |   from torch.onnx import operators
 12 |   im_shape = operators.shape_as_tensor(image)[-2:]
 13 |   min_size = torch.min(im_shape).to(dtype=torch.float32)
 14 |   max_size = torch.max(im_shape).to(dtype=torch.float32)
 15 |   scale_factor = torch.min(self_min_size / min_size, self_max_size / max_size)
 16 | 
 17 |   image = torch.nn.functional.interpolate(
 18 |       image[None], scale_factor=scale_factor, mode='bilinear', recompute_scale_factor=True,
 19 |       align_corners=False)[0]
 20 | 
 21 |   if target is None:
 22 |     return image, target
 23 | 
 24 |   if "masks" in target:
 25 |     mask = target["masks"]
 26 |     mask = F.interpolate(mask[:, None].float(), scale_factor=scale_factor)[:, 0].byte()
 27 |     target["masks"] = mask
 28 |   return image, target
 29 | 
 30 | 
 31 | def _resize_image_and_masks(image, self_min_size, self_max_size, target):
 32 |   # type: (Tensor, float, float, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]
 33 |   im_shape = torch.tensor(image.shape[-2:])
 34 |   min_size = float(torch.min(im_shape))
 35 |   max_size = float(torch.max(im_shape))
 36 |   scale_factor = self_min_size / min_size
 37 |   if max_size * scale_factor > self_max_size:
 38 |     scale_factor = self_max_size / max_size
 39 |   image = torch.nn.functional.interpolate(
 40 |       image[None], scale_factor=scale_factor, mode='bilinear', recompute_scale_factor=True,
 41 |       align_corners=False)[0]
 42 | 
 43 |   if target is None:
 44 |     return image, target
 45 | 
 46 |   if "masks" in target:
 47 |     mask = target["masks"]
 48 |     mask = F.interpolate(mask[:, None].float(), scale_factor=scale_factor)[:, 0].byte()
 49 |     target["masks"] = mask
 50 |   return image, target
 51 | 
 52 | def normalize(image, image_mean, image_std):
 53 |   dtype, device = image.dtype, image.device
 54 |   mean = torch.as_tensor(image_mean, dtype=dtype, device=device)
 55 |   std = torch.as_tensor(image_std, dtype=dtype, device=device)
 56 |   return (image - mean[:, None, None]) / std[:, None, None]
 57 | 
 58 | def resize(image, target, min_size, max_size):
 59 |   # type: (Tensor, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]
 60 |   h, w = image.shape[-2:]
 61 |   if torchvision._is_tracing():
 62 |     image, target = _resize_image_and_masks_onnx(image, min_size, float(max_size), target)
 63 |   else:
 64 |     image, target = _resize_image_and_masks(image, min_size, float(max_size), target)
 65 | 
 66 |   if target is None:
 67 |     return image, target
 68 | 
 69 |   bbox = target["boxes"]
 70 |   bbox = resize_boxes(bbox, (h, w), image.shape[-2:])
 71 |   target["boxes"] = bbox
 72 | 
 73 |   if "keypoints" in target:
 74 |     keypoints = target["keypoints"]
 75 |     keypoints = resize_keypoints(keypoints, (h, w), image.shape[-2:])
 76 |     target["keypoints"] = keypoints
 77 |   return image, target
 78 | 
 79 | # _onnx_batch_images() is an implementation of
 80 | # batch_images() that is supported by ONNX tracing.
 81 | @torch.jit.unused
 82 | def _onnx_batch_images(images, size_divisible=32):
 83 |   # type: (List[Tensor], int) -> Tensor
 84 |   max_size = []
 85 |   for i in range(images[0].dim()):
 86 |     max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64)
 87 |     max_size.append(max_size_i)
 88 |   stride = size_divisible
 89 |   max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64)
 90 |   max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64)
 91 |   max_size = tuple(max_size)
 92 | 
 93 |   # work around for
 94 |   # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
 95 |   # which is not yet supported in onnx
 96 |   padded_imgs = []
 97 |   for img in images:
 98 |     padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
 99 |     padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0]))
100 |     padded_imgs.append(padded_img)
101 | 
102 |   return torch.stack(padded_imgs)
103 | 
104 | def resize_keypoints(keypoints, original_size, new_size):
105 |   # type: (Tensor, List[int], List[int]) -> Tensor
106 |   ratios = [
107 |       torch.tensor(s, dtype=torch.float32, device=keypoints.device) /
108 |       torch.tensor(s_orig, dtype=torch.float32, device=keypoints.device)
109 |       for s, s_orig in zip(new_size, original_size)
110 |   ]
111 |   ratio_h, ratio_w = ratios
112 |   resized_data = keypoints.clone()
113 |   if torch._C._get_tracing_state():
114 |     resized_data_0 = resized_data[:, :, 0] * ratio_w
115 |     resized_data_1 = resized_data[:, :, 1] * ratio_h
116 |     resized_data = torch.stack((resized_data_0, resized_data_1, resized_data[:, :, 2]), dim=2)
117 |   else:
118 |     resized_data[..., 0] *= ratio_w
119 |     resized_data[..., 1] *= ratio_h
120 |   return resized_data
121 | 
122 | 
123 | def resize_boxes(boxes, original_size, new_size):
124 |   # type: (Tensor, List[int], List[int]) -> Tensor
125 |   ratios = [
126 |     torch.tensor(s, dtype=torch.float32, device=boxes.device) /
127 |     torch.tensor(s_orig, dtype=torch.float32, device=boxes.device)
128 |     for s, s_orig in zip(new_size, original_size)
129 |   ]
130 |   ratio_height, ratio_width = ratios
131 |   xmin, ymin, xmax, ymax = boxes.unbind(1)
132 | 
133 |   xmin = xmin * ratio_width
134 |   xmax = xmax * ratio_width
135 |   ymin = ymin * ratio_height
136 |   ymax = ymax * ratio_height
137 |   return torch.stack((xmin, ymin, xmax, ymax), dim=1)
138 | 


--------------------------------------------------------------------------------
/datasets/vot/vot_tracking.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function
 5 | import sys
 6 | sys.path.append('.')
 7 | import torch
 8 | from torch.utils.data import Dataset
 9 | from torchvision import transforms
10 | import numpy as np
11 | import numbers
12 | import random
13 | import os
14 | import cv2
15 | 
16 | class VotTracking(Dataset):
17 |   def __init__(self, data_root, id):
18 |     image_dir = os.path.join(data_root, id, "images")
19 |     label_file = os.path.join(data_root, id, "configs/groundtruth.txt")
20 |     image_names = os.listdir(image_dir)
21 |     image_names.sort()
22 | 
23 |     self.image_dir = image_dir
24 |     self.image_names = image_names
25 |     self.length = len(image_names)
26 |     self.track_gt = self.read_label_file(label_file)
27 |     self.transform = transforms.ToTensor()
28 | 
29 |   def __len__(self):
30 |     return self.length
31 | 
32 |   def __getitem__(self, idx):
33 |     image_path = os.path.join(self.image_dir, self.image_names[idx])  
34 |     image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
35 |     
36 |     if len(image.shape) == 2:
37 |       image = cv2.merge([image, image, image])
38 |       
39 |     # image = self.transform(image)
40 | 
41 |     return {'image': image, 'image_name': self.image_names[idx]}
42 | 
43 |   def read_label_file(self, file_path):
44 |     track_gt = {}
45 | 
46 |     fo = open(file_path, "r")
47 |     i = 0
48 |     for line in fo.readlines():
49 |       line = line.strip('\n')
50 |       line = line.split(',')     
51 |       track_id = 0
52 |       frame_id = i
53 |       # x1, y1, w, h
54 |       if len(line) == 4:
55 |         x1, y1, w, h = float(line[0]), float(line[1]), float(line[2]), float(line[3])
56 |         x2, y2 = (x1 + w - 1), (y1 + h - 1) 
57 |       else:
58 |         assert len(line) == 8
59 |         line = [float(l) for l in line]
60 |         line = np.array(line)
61 |         x1, x2 = min(line[0::2]), max(line[0::2])
62 |         y1, y2 = min(line[1::2]), max(line[1::2])
63 | 
64 |       box = [x1, y1, x2, y2]
65 |       object_info = {'frame_id':frame_id, 'track_id':track_id, 'box':box, }
66 |       i = i + 1
67 |       image_name = self.image_names[frame_id]
68 |       track_gt[image_name] = object_info
69 | 
70 |     fo.close()
71 | 
72 |     return track_gt
73 | 
74 |   def get_label(self, r):
75 |     if type(r) == type(0):
76 |       image_name = self.image_names[r]
77 |     else:
78 |       image_name = r
79 |     
80 |     if image_name in self.track_gt:
81 |       image_info = self.track_gt[image_name]
82 |     else:
83 |       image_info = None
84 |     
85 |     return image_info
86 | 
87 |   def image_size(self):
88 |     '''
89 |     H, W
90 |     '''
91 |     image_path = os.path.join(self.image_dir, self.image_names[0])  
92 |     image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
93 |     return image.shape[-2:]
94 | 


--------------------------------------------------------------------------------
/debug_tools/command.txt:
--------------------------------------------------------------------------------
1 | python validate_maskrcnn.py -c config/maskrcnn_coco.yaml -g 1 -s saving/debug/ -d /home/haoyuefan/xk_data/superpoint/coco/full/ -m saving/models/object_rcnn3/object_rcnn_iter124000.pth 
2 | 
3 | python train_maskrcnn.py -c config/maskrcnn_coco.yaml -d /home/haoyuefan/xk_data/superpoint/coco/full/ -s saving/models/ -g 1 -m saving/models/object_rcnn3/object_rcnn_iter124000.pth 
4 | 
5 | python debug_tools/show_match.py -d /home/haoyuefan/xk_data/superpoint/match_debug/pairs/ -s /home/haoyuefan/xk_data/superpoint/match_debug/result/ -g 1 -c config/maskrcnn_coco.yaml -m saving/models/object_rcnn_iter205000.pth 
6 | 
7 | python train_synthetic.py -c config/superpoint_synthetic.yaml -d /home/haoyuefan/xk_data/superpoint/synthetic_dataset -g 1 -s saving/models/synthetic -p 1 -m saving/models/pretrained/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth


--------------------------------------------------------------------------------
/debug_tools/draw_points.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function
 5 | import sys
 6 | sys.path.append('.')
 7 | # sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages/')
 8 | import numpy as np
 9 | import time
10 | import sys
11 | import os
12 | import argparse
13 | from datetime import datetime
14 | import yaml
15 | import cv2
16 | 
17 | from datasets.utils.pipeline import makedir
18 | 
19 | data_root = "/home/haoyuefan/xk_data/superpoint/coco/full/coco"
20 | images_dir = os.path.join(data_root, "train2014")
21 | points_dir = os.path.join(data_root, "train2014_points")
22 | debug_dir = os.path.join(data_root, "show/points")
23 | makedir(debug_dir)
24 | 
25 | image_names = os.listdir(images_dir)
26 | for image_name in image_names:
27 |   image_path = os.path.join(images_dir, image_name)
28 |   vis_image = os.path.join(debug_dir, image_name)
29 |   file_name = image_name.split('.')[0]
30 |   point_path = os.path.join(points_dir, '{}.txt'.format(file_name))
31 | 
32 |   img = cv2.imread(image_path)
33 |   points = np.loadtxt(point_path, ndmin=2)
34 |   for j in range(points.shape[0]):
35 |     x = points[j][0].astype(int)
36 |     y = points[j][1].astype(int)
37 |     if(x < 0) :
38 |       break
39 |     cv2.circle(img, (y,x), 1, (0,0,255), thickness=-1)
40 |   
41 |   cv2.imwrite(vis_image, img)
42 | 


--------------------------------------------------------------------------------
/debug_tools/show_batch.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function
 5 | import sys
 6 | sys.path.append('.')   
 7 | from matplotlib import pyplot as plt
 8 | import torchvision
 9 | 
10 | def show_batch(batch):
11 |   grid = torchvision.utils.make_grid(batch)
12 |   plt.imshow(grid.numpy()[::-1].transpose((1, 2, 0)))
13 |   plt.title('Batch')
14 |   plt.show()
15 | 
16 | def show_batch_opencv(batch):
17 |   T = torchvision.transforms.ToTensor()
18 |   batch = [T(img) for img in batch]
19 |   batch = torch.stack(batch)
20 |   show_batch(batch)
21 | 
22 | def show_numpy(img):
23 |   plt.imshow(img)
24 |   plt.title('Image')
25 |   plt.show()


--------------------------------------------------------------------------------
/debug_tools/show_detections.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | sys.path.append('.')   
  7 | import datetime
  8 | import logging
  9 | import os
 10 | import time
 11 | import argparse
 12 | import yaml
 13 | import cv2
 14 | import torch
 15 | import torch.distributed as dist
 16 | from torchvision import transforms
 17 | import torch.optim as optim
 18 | from torch.autograd import Variable
 19 | from torch.optim import lr_scheduler
 20 | 
 21 | from model.build_model import build_maskrcnn, build_superpoint_model
 22 | from model.inference import detection_inference
 23 | 
 24 | os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 25 | 
 26 | def read_image(img_path):
 27 |   image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
 28 |   if len(image.shape) == 2:
 29 |     image = cv2.merge([image, image, image])
 30 |   return image
 31 | 
 32 | def show_detections(configs):
 33 |   # read configs
 34 |   ## command line config
 35 |   use_gpu = configs['use_gpu']
 36 |   save_dir = configs['save_dir']
 37 |   data_root = configs['data_root']
 38 |   ## data cofig
 39 |   data_config = configs['data']
 40 |   ## superpoint model config
 41 |   superpoint_model_config = configs['model']['superpoint']
 42 |   detection_threshold = superpoint_model_config['detection_threshold']
 43 |   gaussian_radius = 2
 44 |   ## others
 45 |   configs['num_gpu'] = [0]
 46 |   configs['public_model'] = 0
 47 | 
 48 |   # model 
 49 |   maskrcnn_model = build_maskrcnn(configs)
 50 |   superpoint_model = build_superpoint_model(configs)
 51 | 
 52 |   # data
 53 |   image_names = os.listdir(data_root)
 54 | 
 55 |   transform = transforms.ToTensor()
 56 |   with torch.no_grad():
 57 |     maskrcnn_model.eval()    
 58 |     for image_name in image_names:
 59 |       print(image_name)
 60 |       image_path = os.path.join(data_root, image_name)
 61 |       image = read_image(image_path)
 62 |       image = transform(image)
 63 |       image = image.unsqueeze(0)
 64 |       batch = {'image': image, 'image_name': [image_name]}
 65 | 
 66 |       detection_inference(maskrcnn_model, superpoint_model, batch, use_gpu, 1,
 67 |           detection_threshold, data_config, save_dir=save_dir)
 68 | 
 69 | 
 70 | def main():
 71 |   parser = argparse.ArgumentParser(description="Training")
 72 |   parser.add_argument(
 73 |       "-c", "--config_file",
 74 |       dest = "config_file",
 75 |       type = str, 
 76 |       default = ""
 77 |   )
 78 |   parser.add_argument(
 79 |       "-g", "--gpu",
 80 |       dest = "gpu",
 81 |       type = int, 
 82 |       default = 0 
 83 |   )
 84 |   parser.add_argument(
 85 |       "-s", "--save_dir",
 86 |       dest = "save_dir",
 87 |       type = str, 
 88 |       default = ""
 89 |   )
 90 |   parser.add_argument(
 91 |       "-d", "--data_root",
 92 |       dest = "data_root",
 93 |       type = str, 
 94 |       default = "" 
 95 |   )
 96 |   parser.add_argument(
 97 |       "-mm", "--maskrcnn_model_path",
 98 |       dest = "maskrcnn_model_path",
 99 |       type = str, 
100 |       default = "" 
101 |   )
102 |   parser.add_argument(
103 |       "-sm", "--superpoint_model_path",
104 |       dest = "superpoint_model_path",
105 |       type = str, 
106 |       default = "" 
107 |   )
108 |   args = parser.parse_args()
109 | 
110 |   config_file = args.config_file
111 |   f = open(config_file, 'r', encoding='utf-8')
112 |   configs = f.read()
113 |   configs = yaml.load(configs)
114 |   configs['use_gpu'] = args.gpu
115 |   configs['save_dir'] = args.save_dir
116 |   configs['data_root'] = args.data_root
117 |   configs['maskrcnn_model_path'] = args.maskrcnn_model_path
118 |   configs['superpoint_model_path'] = args.superpoint_model_path
119 | 
120 |   show_detections(configs)
121 | 
122 | if __name__ == "__main__":
123 |     main()
124 | 
125 | 
126 |     


--------------------------------------------------------------------------------
/debug_tools/show_match.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | sys.path.append('.')
  7 | import os
  8 | 
  9 | import torch
 10 | from torchvision import transforms
 11 | import yaml
 12 | import cv2
 13 | import numpy as np
 14 | import argparse
 15 | 
 16 | from model.build_model import build_superpoint_model
 17 | from model.inference import maskrcnn_inference
 18 | from datasets.utils.pipeline import makedir
 19 | from debug_tools.show_batch import show_batch, show_numpy
 20 | from utils.tools import tensor_to_numpy
 21 | from datasets.utils.postprocess import nms_fast
 22 | from kornia.feature import match_nn
 23 | 
 24 | os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1, 2, 3"
 25 | 
 26 | def process(prob, desc, border_remove, nms_dist):
 27 |   # Convert pytorch -> numpy.
 28 |   heatmap = prob.squeeze() # H * W
 29 |   desc_data = desc.squeeze() # 256 * H * W
 30 |   ys, xs = np.where(heatmap > 0) # Confidence threshold.
 31 |   if len(xs) == 0:
 32 |     return None, None
 33 |   pts = np.zeros((3, len(xs))) # Populate point data sized 3xN.
 34 |   pts[0, :] = xs
 35 |   pts[1, :] = ys
 36 |   pts[2, :] = heatmap[ys, xs]
 37 |   H, W = heatmap.shape[-2:]
 38 |   pts, _ = nms_fast(pts, H, W, dist_thresh=nms_dist)
 39 |   inds = np.argsort(pts[2,:])
 40 |   pts = pts[:,inds[::-1]] # Sort by confidence.
 41 |   # Remove points along border.
 42 |   bord = border_remove
 43 |   toremoveW = np.logical_or(pts[0, :] < bord, pts[0, :] >= (W-bord))
 44 |   toremoveH = np.logical_or(pts[1, :] < bord, pts[1, :] >= (H-bord))
 45 |   toremove = np.logical_or(toremoveW, toremoveH)
 46 |   pts = pts[:, ~toremove]
 47 | 
 48 |   desc_point = []
 49 |   for i in range(pts.shape[1]):
 50 |     xs = int(pts[0][i])
 51 |     ys = int(pts[1][i])
 52 |     desc_point = desc_point + [desc_data[:, ys, xs]]
 53 |   
 54 |   desc_point = np.stack(desc_point)
 55 |   return pts, desc_point
 56 | 
 57 | def extract_desc(img_path, model, detection_thr, img_new_size):
 58 | 
 59 |   transform = transforms.ToTensor()
 60 | 
 61 |   image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
 62 |   image = cv2.resize(image, tuple(img_new_size[::-1]), interpolation=cv2.INTER_LINEAR)
 63 |   if len(image.shape) == 2:
 64 |     image = cv2.merge([image, image, image])
 65 | 
 66 |   img = transform(image)
 67 |   img = img.unsqueeze(0)
 68 |   points_output = model(img)
 69 |   
 70 |   # process point
 71 |   prob = points_output['prob'].cpu().detach().gt(detection_thr).float().numpy()
 72 |   desc = points_output['desc'].cpu().detach().float().numpy()
 73 |   points, descs = process(prob, desc, 4, 4)
 74 | 
 75 |   return image, points, descs
 76 | 
 77 | def draw_points(points, img, color=(255,0,0)):
 78 |   for j in range(points.shape[1]):
 79 |     x = points[0][j].astype(int)
 80 |     y = points[1][j].astype(int)
 81 |     if x < 0:
 82 |       break
 83 |     cv2.circle(img, (x,y), 1, color, thickness=-1)
 84 |   return img
 85 | 
 86 | def generate_pair_result(dataroot, name, save_dir, model, detection_thr, img_new_size):
 87 |   with torch.no_grad():
 88 |     model.eval()    
 89 | 
 90 |     pair_path = os.path.join(dataroot, name)
 91 |     image_names = os.listdir(pair_path)
 92 | 
 93 |     img1_path = os.path.join(pair_path, image_names[0])
 94 |     img2_path = os.path.join(pair_path, image_names[1])
 95 | 
 96 |     img1, points1, desc1 = extract_desc(img1_path, model, detection_thr, img_new_size)
 97 |     img2, points2, desc2 = extract_desc(img2_path, model, detection_thr, img_new_size)
 98 | 
 99 |     h, w = img1.shape[:2]
100 | 
101 |     desc1 = torch.tensor(desc1)
102 |     desc2 = torch.tensor(desc2)
103 | 
104 |     dis, match = match_nn(desc1, desc2)
105 |     dis, match = dis.squeeze(1).numpy(), match.numpy()
106 |     img1 = draw_points(points1, img1)
107 |     img2 = draw_points(points2, img2, (0, 255, 0))
108 | 
109 |     img = np.concatenate([img1, img2], 1)
110 |     for i in range(match.shape[0]):
111 |       if dis[i] > 0.7 :
112 |         continue
113 | 
114 |       idx1 = int(match[i, 0])
115 |       idx2 = int(match[i, 1])
116 | 
117 |       px1, py1 = int(points1[0][idx1]), int(points1[1][idx1]) 
118 |       px2, py2 = int(points2[0][idx2] + w), int(points2[1][idx2]) 
119 | 
120 |       p1 = (px1, py1)
121 |       p2 = (px2, py2)
122 | 
123 |       a = np.random.randint(0,255)
124 |       b = np.random.randint(0,255)
125 |       c = np.random.randint(0,255)
126 | 
127 |       cv2.line(img, (px1, py1), (px2, py2), (a, b, c), 1)
128 | 
129 |     save_name = name + ".png"
130 |     save_path = os.path.join(save_dir, save_name)
131 |     cv2.imwrite(save_path, img)
132 | 
133 | def show_match(configs):
134 | 
135 |   # read configs
136 |   save_dir = configs['save_dir']
137 |   data_root = configs['data_root']
138 |   superpoint_model_config = configs['model']['superpoint']
139 |   detection_thr = superpoint_model_config['detection_threshold']
140 |   img_new_size = configs['img_new_size']
141 |   configs['num_gpu'] = [0]
142 |   configs['public_model'] = 0
143 | 
144 |   # model 
145 |   superpoint_model = build_superpoint_model(configs, requires_grad=False)
146 |   superpoint_model.eval()
147 | 
148 | 
149 |   pair_names = os.listdir(data_root)
150 | 
151 |   for pair_name in pair_names:
152 |     generate_pair_result(data_root, pair_name, save_dir, superpoint_model, detection_thr, img_new_size)
153 | 
154 | 
155 | def main():
156 |   parser = argparse.ArgumentParser(description="show match")
157 |   parser.add_argument(
158 |       "-c", "--config_file",
159 |       dest = "config_file",
160 |       type = str, 
161 |       default = ""
162 |   )
163 |   parser.add_argument(
164 |       "-g", "--gpu",
165 |       dest = "gpu",
166 |       type = int, 
167 |       default = 0 
168 |   )
169 |   parser.add_argument(
170 |       "-s", "--save_dir",
171 |       dest = "save_dir",
172 |       type = str, 
173 |       default = "" 
174 |   )
175 |   parser.add_argument(
176 |       "-d", "--data_root",
177 |       dest = "data_root",
178 |       type = str, 
179 |       default = "" 
180 |   )
181 |   parser.add_argument(
182 |       "-m", "--model_path",
183 |       dest = "superpoint_model_path",
184 |       type = str, 
185 |       default = "" 
186 |   )
187 |   args = parser.parse_args()
188 |   config_file = args.config_file
189 |   f = open(config_file, 'r', encoding='utf-8')
190 |   configs = f.read()
191 |   configs = yaml.load(configs)
192 |   configs['use_gpu'] = args.gpu
193 |   configs['data_root'] = args.data_root
194 |   configs['superpoint_model_path'] = args.superpoint_model_path
195 |   configs['save_dir'] = args.save_dir
196 |   configs['img_new_size'] = [480, 640]
197 | 
198 |   show_match(configs)
199 | 
200 | if __name__ == "__main__":
201 |   main()
202 | 


--------------------------------------------------------------------------------
/debug_tools/show_points_detection.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | sys.path.append('.')
  7 | import os
  8 | 
  9 | import torch
 10 | from torchvision import transforms
 11 | import yaml
 12 | import cv2
 13 | import numpy as np
 14 | import argparse
 15 | 
 16 | from model.build_model import build_maskrcnn
 17 | from model.inference import maskrcnn_inference
 18 | from datasets.utils.pipeline import makedir
 19 | from debug_tools.show_batch import show_batch, show_numpy
 20 | from utils.tools import tensor_to_numpy
 21 | from datasets.utils.postprocess import nms_fast
 22 | from kornia.feature import match_nn
 23 | 
 24 | os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1, 2, 3"
 25 | 
 26 | def process(prob, border_remove, nms_dist):
 27 |   # Convert pytorch -> numpy.
 28 |   heatmap = prob.squeeze() # H * W
 29 |   ys, xs = np.where(heatmap > 0) # Confidence threshold.
 30 |   if len(xs) == 0:
 31 |     return None, None
 32 |   pts = np.zeros((3, len(xs))) # Populate point data sized 3xN.
 33 |   pts[0, :] = xs
 34 |   pts[1, :] = ys
 35 |   pts[2, :] = heatmap[ys, xs]
 36 |   H, W = heatmap.shape[-2:]
 37 |   pts, _ = nms_fast(pts, H, W, dist_thresh=nms_dist)
 38 |   inds = np.argsort(pts[2,:])
 39 |   pts = pts[:,inds[::-1]] # Sort by confidence.
 40 |   # Remove points along border.
 41 |   bord = border_remove
 42 |   toremoveW = np.logical_or(pts[0, :] < bord, pts[0, :] >= (W-bord))
 43 |   toremoveH = np.logical_or(pts[1, :] < bord, pts[1, :] >= (H-bord))
 44 |   toremove = np.logical_or(toremoveW, toremoveH)
 45 |   pts = pts[:, ~toremove]
 46 |   
 47 |   return pts
 48 | 
 49 | def extract_points(img_path, model, detection_thr):
 50 | 
 51 |   transform = transforms.ToTensor()
 52 | 
 53 |   image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
 54 |   if len(image.shape) == 2:
 55 |     image = cv2.merge([image, image, image])
 56 | 
 57 |   sizes = {}
 58 |   original_image_sizes = [image.shape[-2:]]
 59 |   sizes['original_sizes'] = torch.tensor(original_image_sizes)
 60 |   sizes['new_sizes'] = torch.tensor(original_image_sizes)
 61 | 
 62 |   img = transform(image)
 63 |   img = img.unsqueeze(0)
 64 |   _, _, points_output = model(img, sizes)
 65 |   
 66 |   # process point
 67 |   prob = points_output['prob'].cpu().detach().gt(detection_thr).float().numpy()
 68 |   points = process(prob, 4, 4)
 69 | 
 70 |   return image, points
 71 | 
 72 | def draw_points(points, img, color=(255,0,0)):
 73 |   for j in range(points.shape[1]):
 74 |     x = points[0][j].astype(int)
 75 |     y = points[1][j].astype(int)
 76 |     if x < 0:
 77 |       break
 78 |     cv2.circle(img, (x,y), 1, color, thickness=-1)
 79 |   return img
 80 | 
 81 | def show_image_points(dataroot, image_name, save_dir, model, detection_thr):
 82 |   with torch.no_grad():
 83 |     model.eval()    
 84 | 
 85 |     img_path = os.path.join(dataroot, image_name)
 86 |     img, points = extract_points(img_path, model, detection_thr)
 87 |     h, w = img.shape[:2]
 88 |     img = draw_points(points, img)
 89 | 
 90 |     save_path = os.path.join(save_dir, image_name)
 91 |     cv2.imwrite(save_path, img)
 92 | 
 93 | def show_points(configs):
 94 | 
 95 |   # read configs
 96 |   save_dir = configs['save_dir']
 97 |   data_root = configs['data_root']
 98 |   superpoint_model_config = configs['model']['superpoint']
 99 |   detection_thr = superpoint_model_config['eval']['detection_threshold']
100 |   configs['num_gpu'] = [0]
101 |   configs['public_model'] = 0
102 | 
103 |   # model 
104 |   maskrcnn_model = build_maskrcnn(configs)
105 | 
106 |   image_names = os.listdir(data_root)
107 | 
108 |   for image_name in image_names:
109 |     show_image_points(data_root, image_name, save_dir, maskrcnn_model, detection_thr)
110 | 
111 | 
112 | def main():
113 |   parser = argparse.ArgumentParser(description="show match")
114 |   parser.add_argument(
115 |       "-c", "--config_file",
116 |       dest = "config_file",
117 |       type = str, 
118 |       default = ""
119 |   )
120 |   parser.add_argument(
121 |       "-g", "--gpu",
122 |       dest = "gpu",
123 |       type = int, 
124 |       default = 0 
125 |   )
126 |   parser.add_argument(
127 |       "-s", "--save_dir",
128 |       dest = "save_dir",
129 |       type = str, 
130 |       default = "" 
131 |   )
132 |   parser.add_argument(
133 |       "-d", "--data_root",
134 |       dest = "data_root",
135 |       type = str, 
136 |       default = "" 
137 |   )
138 |   parser.add_argument(
139 |       "-m", "--model_path",
140 |       dest = "pretrained_model_path",
141 |       type = str, 
142 |       default = "" 
143 |   )
144 |   args = parser.parse_args()
145 |   config_file = args.config_file
146 |   f = open(config_file, 'r', encoding='utf-8')
147 |   configs = f.read()
148 |   configs = yaml.load(configs)
149 |   configs['use_gpu'] = args.gpu
150 |   configs['data_root'] = args.data_root
151 |   configs['pretrained_model_path'] = args.pretrained_model_path
152 |   configs['save_dir'] = args.save_dir
153 | 
154 |   show_points(configs)
155 | 
156 | if __name__ == "__main__":
157 |   main()
158 | 


--------------------------------------------------------------------------------
/debug_tools/test_batch_H.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function
 5 | import sys
 6 | sys.path.append('.')
 7 | # sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages/')
 8 | import numpy as np
 9 | import time
10 | import sys
11 | import os
12 | import argparse
13 | from datetime import datetime
14 | 
15 | import torch
16 | import torch.nn as nn
17 | from torch.utils.data import DataLoader
18 | from torchvision import transforms
19 | import yaml
20 | import cv2
21 | 
22 | from models.superpoint import SuperPoint
23 | from models.backbone.fcn import VGGNet
24 | from models.vgg_like import VggLike
25 | from models.superobject import SuperObject
26 | from datasets.utils.pipeline import makedir
27 | from debug_tools.show_batch import show_batch, show_numpy
28 | from datasets.testdata.dataloader import TestDataset
29 | from utils.tools import process_point, tensor_to_numpy
30 | from datasets.utils.homographies import sample_homography, warp_batch_images
31 | 
32 | def export_points(configs):
33 |   # read configs
34 |   val_batch_size = configs['model']['batch_size']
35 |   data_root = configs['data_root']
36 |   cell = configs['model']['cell']
37 |   img_new_size = configs['img_new_size']
38 | 
39 |   # dataset
40 |   val_data = TestDataset(dataroot=data_root, img_new_size=img_new_size)
41 |   val_loader = DataLoader(val_data, batch_size=val_batch_size, num_workers=8)
42 | 
43 |   for iter, batch in enumerate(val_loader):
44 |     inputs = batch['image']
45 |     img_shape = inputs.shape[-2:]
46 |     H = sample_homography(img_shape, **configs['model']['homography_adaptation']['homographies'])
47 |     show_batch(inputs)
48 |     warped_img = warp_batch_images(inputs, H)
49 |     show_batch(warped_img)
50 | 
51 |     img = tensor_to_numpy(inputs[0])
52 |     img = cv2.warpPerspective(img, H, (img_shape[1], img_shape[0]))
53 |     show_numpy(img)
54 | 
55 | def main():
56 |   parser = argparse.ArgumentParser(description="export points")
57 |   parser.add_argument(
58 |       "-c", "--config_file",
59 |       dest = "config_file",
60 |       type = str, 
61 |       default = ""
62 |   )
63 |   parser.add_argument(
64 |       "-d", "--data_root",
65 |       dest = "data_root",
66 |       type = str, 
67 |       default = "" 
68 |   )
69 |   args = parser.parse_args()
70 | 
71 |   config_file = args.config_file
72 |   f = open(config_file, 'r', encoding='utf-8')
73 |   configs = f.read()
74 |   configs = yaml.load(configs)
75 |   configs['data_root'] = args.data_root
76 |   configs['img_new_size'] = [240, 320]
77 | 
78 |   export_points(configs)
79 | 
80 | if __name__ == "__main__":
81 |     main()
82 | 


--------------------------------------------------------------------------------
/debug_tools/test_data_process.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | sys.path.append('.')   
  7 | import os
  8 | import yaml
  9 | import argparse
 10 | import copy
 11 | import torch
 12 | import torchvision
 13 | import torchvision.transforms as transforms
 14 | from torch.utils.data import Dataset, DataLoader
 15 | 
 16 | from datasets.utils.preprocess import preprocess_train_data
 17 | from datasets.utils import postprocess as post
 18 | from datasets.utils.batch_collator import BatchCollator
 19 | from debug_tools.show_batch import show_batch, show_numpy, show_batch_opencv
 20 | from utils.tools import tensor_to_numpy
 21 | from datasets.utils.build_data import coco_loader
 22 | from torch.nn import functional as F
 23 | 
 24 | from torchvision.models.detection.transform import resize_boxes
 25 | from torchvision.models.detection.roi_heads import paste_masks_in_image
 26 | 
 27 | def postT(result,               # type: List[Dict[str, Tensor]]
 28 |                 image_shapes,         # type: List[Tuple[int, int]]
 29 |                 original_image_sizes  # type: List[Tuple[int, int]]
 30 |                 ):
 31 |   for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)):
 32 |     boxes = pred["boxes"]
 33 |     boxes = resize_boxes(boxes, im_s, o_im_s)
 34 |     result[i]["boxes"] = boxes
 35 |     if "masks" in pred:
 36 |       masks = pred["masks"].unsqueeze(1)
 37 |       scale = min(float(o_im_s[0])/im_s[0], float(o_im_s[1])/im_s[1])
 38 |       masks = F.interpolate(masks.float(), scale_factor=scale).squeeze(1).byte()
 39 |       result[i]["masks"] = masks
 40 |   return result
 41 | 
 42 | 
 43 | def test(configs):
 44 |   # read configs
 45 |   model_dir = configs['model_dir']
 46 |   data_root = configs['data_root']
 47 |   data_config = configs['data']
 48 |   train_data_name = data_config['TRAIN']
 49 | 
 50 |   debug_dir = "/home/haoyuefan/xk_data/superpoint/coco/debug_results/data_processing"
 51 | 
 52 |   # data
 53 |   loader = coco_loader(
 54 |       data_root=data_root, name=train_data_name, config=data_config, batch_size=2, remove_images_without_annotations=True)
 55 | 
 56 |   for iter, batch in enumerate(loader):
 57 |     print("iter = {}".format(iter))
 58 |     gt = copy.deepcopy(batch)
 59 |     original_images = batch['image']
 60 |     image_names = batch['image_name']
 61 | 
 62 |     images, sizes, maskrcnn_targets, warped_images, superpoint_targets = preprocess_train_data(batch, False, 1, data_config)
 63 | 
 64 |     # original_images
 65 |     original_images = [tensor_to_numpy(img) for img in original_images]
 66 | 
 67 |     # sizes
 68 |     original_sizes = sizes['original_sizes']
 69 |     new_sizes = sizes['new_sizes']
 70 | 
 71 |     # maskrcnn 
 72 |     num_images = len(images)
 73 |     new_targets = []
 74 |     for i in range(num_images):
 75 |       target = {}
 76 |       num_objs = int(torch.sum(maskrcnn_targets['labels'][i] >= 0).item())
 77 |       for k in maskrcnn_targets.keys():
 78 |         target[k] = maskrcnn_targets[k][i][:num_objs]
 79 |       target['scores'] = torch.ones(num_objs)
 80 |       target['masks'] = target['masks'].float()
 81 |       new_targets += [target]
 82 |     maskrcnn_targets = new_targets
 83 |     maskrcnn_targets = postT(maskrcnn_targets, new_sizes.numpy().tolist(), original_sizes.numpy().tolist())
 84 | 
 85 |     # superpoint
 86 |     points_probs = superpoint_targets['kpt_map']
 87 |     points_desc = torch.ones(len(points_probs), 256, points_probs.shape[-2], points_probs.shape[-1])
 88 |     points_output = {'prob':points_probs, 'desc': points_desc}
 89 | 
 90 |     detections, points_output = post.postprocess(new_sizes, original_sizes, 0.3, maskrcnn_targets, points_output)
 91 |     
 92 |     results = post.save_detection_results(original_images, image_names, debug_dir, detections, None, points_output, True, True)
 93 | 
 94 |     # save gt 
 95 |     new_gts = []
 96 |     for i in range(len(images)):
 97 |       new_gt = {}
 98 |       for k in gt:
 99 |         new_gt[k] = gt[k][i]
100 |       new_gt['scores'] = new_gt['labels']
101 |       new_gts.append(new_gt)
102 |     
103 |     save_dir_list = [os.path.join(debug_dir, image_name) for image_name in image_names]
104 |     images = copy.deepcopy(original_images)
105 |     images = post.overlay_objects(images, new_gts, None)
106 |     images = post.overlay_points(images, new_gts)
107 |     post.save_images(images, save_dir_list, "groundtruth")
108 | 
109 | 
110 | def main():
111 |   parser = argparse.ArgumentParser(description="Test Process")
112 |   parser.add_argument(
113 |       "-c", "--config_file",
114 |       dest = "config_file",
115 |       type = str, 
116 |       default = ""
117 |   )
118 |   parser.add_argument(
119 |       "-s", "--save_dir",
120 |       dest = "save_dir",
121 |       type = str, 
122 |       default = ""
123 |   )
124 |   parser.add_argument(
125 |       "-d", "--data_root",
126 |       dest = "data_root",
127 |       type = str, 
128 |       default = "" 
129 |   )
130 |   args = parser.parse_args()
131 | 
132 |   config_file = args.config_file
133 |   f = open(config_file, 'r', encoding='utf-8')
134 |   configs = f.read()
135 |   configs = yaml.load(configs)
136 |   configs['model_dir'] = args.save_dir
137 |   configs['data_root'] = args.data_root
138 | 
139 |   test(configs)
140 | 
141 | if __name__ == "__main__":
142 |   main()


--------------------------------------------------------------------------------
/experiments/compare_tracking.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function
 5 | import sys
 6 | sys.path.append('.')
 7 | import os
 8 | import yaml
 9 | import numpy as np
10 | import argparse
11 | 
12 | from experiments.utils.utils import read_tracking_results, plot_tracking_details
13 | 
14 | 
15 | def filter_pr_curves(results, plot_keys):
16 |   pr_curves = results['pr_curves']
17 |   new_pr_curves = {}
18 |   for k in pr_curves.keys():
19 |     if k in plot_keys:
20 |       new_pr_curves[k] = pr_curves[k]
21 | 
22 |   results['pr_curves'] = new_pr_curves
23 |   return results
24 | 
25 | 
26 | def compare_tracking(configs):
27 |   f1 = configs['file1']
28 |   f2 = configs['file2']
29 |   save_dir = configs['save_dir']
30 |   plot_keys = configs['interval']
31 | 
32 |   results1 = read_tracking_results(f1)
33 |   results2 = read_tracking_results(f2)
34 |   
35 |   results1 = filter_pr_curves(results1, plot_keys)
36 |   results2 = filter_pr_curves(results2, plot_keys)
37 | 
38 |   results_list = [results1, results2]
39 |   plot_tracking_details(results_list, save_dir, configs=configs)
40 | 
41 | 
42 | def main():
43 |   parser = argparse.ArgumentParser(description="compare tracking results")
44 |   parser.add_argument(
45 |       "-f1", "--file1",
46 |       dest = "file1",
47 |       type = str, 
48 |       default = ""
49 |   )
50 |   parser.add_argument(
51 |       "-f2", "--file2",
52 |       dest = "file2",
53 |       type = str, 
54 |       default = ""
55 |   )
56 |   parser.add_argument(
57 |       "-s", "--save_dir",
58 |       dest = "save_dir",
59 |       type = str, 
60 |       default = "" 
61 |   )
62 |   parser.add_argument(
63 |       "-c", "--config_file",
64 |       dest = "config_file",
65 |       type = str, 
66 |       default = ""
67 |   )
68 |   args = parser.parse_args()
69 |   config_file = args.config_file
70 |   f = open(config_file, 'r', encoding='utf-8')
71 |   configs = f.read()
72 |   configs = yaml.load(configs)
73 |   configs['file1'] = args.file1
74 |   configs['file2'] = args.file2
75 |   configs['save_dir'] = args.save_dir
76 | 
77 |   compare_tracking(configs)
78 | 
79 | if __name__ == "__main__":
80 |   main()


--------------------------------------------------------------------------------
/experiments/demo/kitti-relocalization.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sair-lab/AirCode/6f7aaeafa3b6f8c762170431447568855601c684/experiments/demo/kitti-relocalization.gif


--------------------------------------------------------------------------------
/experiments/demo/object-matching1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sair-lab/AirCode/6f7aaeafa3b6f8c762170431447568855601c684/experiments/demo/object-matching1.gif


--------------------------------------------------------------------------------
/experiments/demo/object-matching2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sair-lab/AirCode/6f7aaeafa3b6f8c762170431447568855601c684/experiments/demo/object-matching2.gif


--------------------------------------------------------------------------------
/experiments/object_tracking/single_object_tracking.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | sys.path.append('.')
  7 | import os
  8 | 
  9 | import torch
 10 | from torchvision import transforms
 11 | import yaml
 12 | import cv2
 13 | import numpy as np
 14 | import argparse
 15 | 
 16 | from model.build_model import build_maskrcnn, build_gcn, build_superpoint_model
 17 | from datasets.utils.pipeline import makedir
 18 | from datasets.kitti.kitti_tracking import KittiTracking
 19 | from datasets.otb.otb_tracking import OtbTracking
 20 | from datasets.vot.vot_tracking import VotTracking
 21 | 
 22 | from experiments.object_tracking.object_tracking import update_normal_size, network_output, calculate_pr_curve, box_iou
 23 | from experiments.utils.utils import save_tracking_results, plot_pr_curves, plot_tracking_details, get_pr_curve_area
 24 | 
 25 | 
 26 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 27 | 
 28 | 
 29 | def reorder_descs(net_output, tracking_gt):
 30 |   '''
 31 |   reorder the descs, the first is the desc of tracked object
 32 |   '''
 33 |   net_output_boxes = net_output['objects']['boxes']  # N * 4
 34 |   if len(net_output_boxes) < 1 : 
 35 |     return None
 36 | 
 37 |   boxes = torch.tensor(tracking_gt['box'])  # 4
 38 |   boxes = boxes.unsqueeze(0)  # 1 * 4
 39 |   ious = box_iou(boxes, net_output_boxes) # 1 * N
 40 |   ious = ious.squeeze(0)
 41 | 
 42 |   value, index = ious.max(0)
 43 |   value, index = value.item(), index.item()
 44 | 
 45 |   if value > 0.5:
 46 |     descs = net_output['descs']
 47 |     order = [i for i in range(len(descs))]
 48 |     order[0] = index
 49 |     order[index] = 0
 50 | 
 51 |     descs = descs[order]
 52 |   else:
 53 |     descs = None
 54 | 
 55 |   return descs
 56 | 
 57 | 
 58 | def match_objects(object_descs, last_object_descs):
 59 |   '''
 60 |   calculate gt_matrix, match_matrix
 61 |   '''
 62 |   if object_descs is None or last_object_descs is None:
 63 |     return None, None
 64 | 
 65 |   # generate groundtruth pair matrix
 66 |   def get_gt_and_match(descs1, descs2):
 67 |     N = len(descs2)
 68 |     gt_matrix = torch.zeros(N)
 69 |     gt_matrix[0] = 1.0
 70 |  
 71 |     tracked_desc = descs1[0].unsqueeze(0) # 1 * D
 72 |     match_matrix = torch.einsum('nd,dm->nm', tracked_desc, descs2.t())  # 1 * M
 73 |     return gt_matrix, match_matrix
 74 | 
 75 |   gt1, match1 = get_gt_and_match(last_object_descs, object_descs)
 76 |   gt2, match2 = get_gt_and_match(object_descs, last_object_descs)
 77 |   
 78 |   return [gt1, gt2], [match1, match2]
 79 | 
 80 | 
 81 | def calculate_pr_curves(object_descs_list, intervals):
 82 |   pr_curves = {}
 83 |   for interval in intervals:
 84 |     gts, matches = [], []
 85 |     last_object_descs = None
 86 |     for i in range(len(object_descs_list)):
 87 |       if (i % interval) != 0:
 88 |         continue
 89 |       
 90 |       object_descs = object_descs_list[i]
 91 |       if last_object_descs is not None and object_descs is not None:
 92 |         gt_matrix, match_matrix = match_objects(object_descs, last_object_descs)
 93 |         if gt_matrix is not None and match_matrix is not None:
 94 |           gts += gt_matrix
 95 |           matches += match_matrix
 96 | 
 97 |       last_object_descs = object_descs
 98 | 
 99 |     pr_curve = calculate_pr_curve(gts, matches)
100 |     pr_curves[interval] = pr_curve
101 |   return pr_curves
102 | 
103 | 
104 | def single_object_tracking(configs):
105 | 
106 |   # read configs
107 |   save_dir = configs['save_dir']
108 |   data_root = configs['data_root']
109 |   model_dir = configs['model_dir']
110 |   dataset_name = configs['data']['name']
111 |   configs['num_gpu'] = [0]
112 |   configs['public_model'] = 0
113 | 
114 |   superpoint_model_path = os.path.join(model_dir, "points_model.pth")
115 |   maskrcnn_model_path = os.path.join(model_dir, "maskrcnn_model.pth")
116 |   gcn_model_path = os.path.join(model_dir, "gcn_model.pth")
117 |   configs["maskrcnn_model_path"] = maskrcnn_model_path
118 |   configs["superpoint_model_path"] = superpoint_model_path
119 |   configs["graph_model_path"] = gcn_model_path
120 | 
121 |   # model 
122 |   superpoint_model = build_superpoint_model(configs, requires_grad=False)
123 |   superpoint_model.eval()
124 | 
125 |   maskrcnn_model = build_maskrcnn(configs)
126 |   maskrcnn_model.eval()
127 | 
128 |   gcn_model = build_gcn(configs)
129 |   gcn_model.eval()
130 | 
131 |   intervals = [1, 2, 3, 5, 10]
132 |   seqs = {'kitti':['0002', '0003', '0006', '0010'],
133 |           'otb': ['BlurBody', 'BlurCar2', 'Human2', 'Human7', 'Liquor'],
134 |           'vot': ['bluecar', 'bus6', 'humans_corridor_occ_2_A']}
135 |   datasets = {'kitti':KittiTracking, 'otb':OtbTracking, 'vot':VotTracking}
136 |   DATASET = datasets[dataset_name]
137 |   SEQNAMES = seqs[dataset_name]
138 |   # SEQNAMES = [seqs[dataset_name][0]]
139 | 
140 |   pr_curves_list = []
141 |   for seq in SEQNAMES:
142 |     dataset = DATASET(data_root, seq)
143 | 
144 |     image_size = dataset.image_size()
145 |     configs['data']['normal_size'] = update_normal_size(image_size)
146 |     
147 |     object_descs_list = []
148 |     for data in dataset:
149 |       image = data['image']
150 |       image_name = data['image_name']
151 |       print(image_name)
152 | 
153 |       net_output = network_output(image, superpoint_model, maskrcnn_model, gcn_model, configs)
154 |       net_output = {'points': net_output[0], 'objects': net_output[1], 'descs': net_output[2]}
155 | 
156 |       tracking_gt = dataset.get_label(image_name)
157 |       if tracking_gt is None or net_output['points'] is None:
158 |         object_descs_list.append(None)
159 |         continue
160 | 
161 |       object_descs = reorder_descs(net_output, tracking_gt)
162 |       object_descs_list.append(object_descs)
163 | 
164 |     pr_curves = calculate_pr_curves(object_descs_list, intervals)
165 |     pr_curves_list.append(pr_curves)
166 |     print(pr_curves)
167 | 
168 |   # plot
169 |   new_pr_curves, areas = {}, {}
170 |   for k in pr_curves_list[0].keys():
171 |     pr_curve_list = [torch.tensor(pr_curves[k]) for pr_curves in pr_curves_list]
172 |     pr_curve_list = torch.stack(pr_curve_list)  # N * 10 * 2
173 |     new_pr_curve = torch.mean(pr_curve_list, 0)
174 |     new_pr_curves[k] = new_pr_curve.cpu().numpy().tolist()
175 |     areas[k] = get_pr_curve_area(new_pr_curves[k])
176 |   
177 |   plot_pr_curves(new_pr_curves, "otb_tracking", save_dir)
178 | 
179 |   # # save results to yaml
180 |   results = {'dataset':dataset_name, 'model':"ours", 'areas': areas, 'pr_curves': new_pr_curves}
181 |   save_tracking_results(results, save_dir)
182 | 
183 | 
184 | def main():
185 |   parser = argparse.ArgumentParser(description="show match")
186 |   parser.add_argument(
187 |       "-c", "--config_file",
188 |       dest = "config_file",
189 |       type = str, 
190 |       default = ""
191 |   )
192 |   parser.add_argument(
193 |       "-g", "--gpu",
194 |       dest = "gpu",
195 |       type = int, 
196 |       default = 0 
197 |   )
198 |   parser.add_argument(
199 |       "-s", "--save_dir",
200 |       dest = "save_dir",
201 |       type = str, 
202 |       default = "" 
203 |   )
204 |   parser.add_argument(
205 |       "-d", "--data_root",
206 |       dest = "data_root",
207 |       type = str, 
208 |       default = "" 
209 |   )
210 |   parser.add_argument(
211 |       "-m", "--model_dir",
212 |       dest = "model_dir",
213 |       type = str, 
214 |       default = "" 
215 |   )
216 |   args = parser.parse_args()
217 |   config_file = args.config_file
218 |   f = open(config_file, 'r', encoding='utf-8')
219 |   configs = f.read()
220 |   configs = yaml.load(configs)
221 |   configs['use_gpu'] = args.gpu
222 |   configs['data_root'] = args.data_root
223 |   configs['model_dir'] = args.model_dir
224 |   configs['save_dir'] = args.save_dir
225 | 
226 |   single_object_tracking(configs)
227 | 
228 | if __name__ == "__main__":
229 |   main()
230 | 


--------------------------------------------------------------------------------
/experiments/place_recogination/offline_process.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | sys.path.append('.')
  7 | import os
  8 | 
  9 | import torch
 10 | from torchvision import transforms
 11 | import yaml
 12 | import cv2
 13 | import numpy as np
 14 | import argparse
 15 | 
 16 | from datasets.utils.pipeline import makedir
 17 | from datasets.kitti.kitti_odomery import KittiOdometry
 18 | from datasets.utils import postprocess as post
 19 | 
 20 | from experiments.object_tracking.object_tracking import update_normal_size, network_output, calculate_pr_curve, box_iou
 21 | from experiments.utils.utils import save_tracking_results, plot_pr_curves, plot_tracking_details, get_pr_curve_area
 22 | 
 23 | 
 24 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 25 | 
 26 | def filter_objects(descs, objects, target_labels):
 27 |   new_descs = []
 28 |   new_objects = {}
 29 |   for k in objects.keys():
 30 |     new_objects[k] = []
 31 | 
 32 |   for i in range(len(descs)):
 33 |     label = objects['labels'][i]
 34 |     score = objects['scores'][i]
 35 |     if label in target_labels and score > 0.5:
 36 |       new_descs.append(descs[i])
 37 |       for k in objects.keys():
 38 |         new_objects[k].append(objects[k][i])
 39 | 
 40 |   if len(new_descs) < 1:
 41 |     return None, None
 42 | 
 43 |   new_descs = np.vstack(new_descs)
 44 |   for k in objects.keys():
 45 |     new_objects[k] = np.vstack(new_objects[k])
 46 |   
 47 |   return new_descs, new_objects
 48 | 
 49 | def relocalization_offline(configs):
 50 | 
 51 |   # read configs
 52 |   save_dir = configs['save_dir']
 53 |   data_root = configs['data_root']
 54 |   net_output_dir = configs['net_output_dir']
 55 |   dataset_name = configs['data']['name']
 56 | 
 57 |   # data
 58 |   seqs = ['00', '05', '06']
 59 |   similarity_thr = 0.95
 60 |   results = {}
 61 |   for seq in seqs:
 62 |     # dataset
 63 |     dataset = KittiOdometry(data_root, seq)
 64 |     dis_thr = dataset.dis_thr
 65 |     angle_thr = dataset.angle_thr
 66 |     interval = dataset.interval
 67 |     loop_gt, num_loop_gt = dataset.get_loop_gt()
 68 | 
 69 |     seq_net_output_dir = os.path.join(net_output_dir, seq)
 70 |     descs_list = []
 71 |     image_name_list = []
 72 |     image_indexes = []
 73 |     predict_loops = {}
 74 |     for data in dataset:
 75 |       image = data['image']
 76 |       image_name = data['image_name']
 77 |       gt = dataset.get_label(image_name)
 78 |       print(image_name)
 79 | 
 80 |       image_net_output_dir = os.path.join(seq_net_output_dir, image_name)
 81 |       # load points
 82 |       points = []
 83 |       points_dir = os.path.join(image_net_output_dir, "points")
 84 |       if not os.path.exists(points_dir):
 85 |         continue
 86 |       points_file_names = os.listdir(points_dir)
 87 |       for points_file_name in points_file_names:
 88 |         p_path = os.path.join(points_dir, points_file_name)
 89 |         p = np.load(p_path, allow_pickle=True)
 90 |         points.append(p)
 91 | 
 92 |       # load descs
 93 |       descs_path = os.path.join(image_net_output_dir, "descs.npy")
 94 |       descs = np.load(descs_path)
 95 | 
 96 |       # load objects
 97 |       objects = {}
 98 |       objects_dir = os.path.join(image_net_output_dir, "objects")
 99 |       objects_file_names = os.listdir(objects_dir)
100 |       for objects_file_name in objects_file_names:
101 |         key = objects_file_name.split('.')[0]
102 |         value_path = os.path.join(objects_dir, objects_file_name)
103 |         objects[key] = np.load(value_path)
104 |       #   objects[key] = torch.tensor(objects[key])
105 |  
106 |       # original_images = [image]
107 |       # image_names = [image_name]
108 |       # detections = [objects]
109 |       # results = post.save_detection_results(original_images, image_names, save_dir, detections,
110 |       #    None, None, True, False)
111 |     
112 | 
113 |       target_labels = [3, 8]
114 |       descs, objects = filter_objects(descs, objects, target_labels)
115 |       if descs is None:
116 |         continue
117 | 
118 |       if gt['index'] > interval and len(descs_list) > 0:
119 |         # find loop
120 |         max_score = 0
121 |         match_image = ""
122 |         for descs_i, image_name_i, image_index_i in zip(descs_list, image_name_list, image_indexes):
123 |           if gt['index'] - image_index_i < interval:
124 |             break
125 |           
126 |           descs_similarity = descs.dot(descs_i.T)  # m * n
127 |           matches = descs_similarity > similarity_thr
128 |           matches = matches * descs_similarity
129 |           matches = np.max(matches, 1)
130 |           # decide to match
131 |           good_match = 0
132 |           score = np.sum(matches)
133 |           num_nonzero = np.sum((matches > 0))
134 |           mean_match = np.mean(matches)
135 |           if score > 0 and mean_match > 0.3:
136 |             good_match = 1
137 | 
138 |           m, n = descs_similarity.shape
139 |           if m - n > 2 or n - m > 2:
140 |             good_match = 0
141 | 
142 |           if good_match and score > max_score:
143 |             max_score = score
144 |             match_image = image_name_i
145 |         predict_loops[image_name] = {'image_name': match_image, 'score': max_score}
146 |         
147 |       descs_list.append(descs)
148 |       image_name_list.append(image_name)
149 |       image_indexes.append(gt['index'])
150 | 
151 |     # calculate pr
152 |     # calculate prediction
153 |     num_loop_prediction, num_correct_prediction = 0, 0
154 |     for image_name in predict_loops.keys():
155 |       predict_loop = predict_loops[image_name]
156 |       if predict_loop['score'] > 0:
157 |         num_loop_prediction += 1
158 |         loop_image_name = predict_loop['image_name']
159 |         gt1 = dataset.get_label(image_name)
160 |         gt2 = dataset.get_label(loop_image_name)
161 | 
162 |         idx1, p1, R1 = gt1['index'], gt1['position'], gt1['rotation']
163 |         idx2, p2, R2 = gt2['index'], gt2['position'], gt2['rotation']
164 |         
165 |         dp = np.linalg.norm((p1-p2))
166 |         dR = R1.dot(R2.T)
167 |         dr, _ = cv2.Rodrigues(dR)
168 |         d_angle = np.linalg.norm(dr)
169 |         d_idx = idx2 - idx1 
170 |         d_idx = d_idx if d_idx > 0 else (-d_idx)
171 |         if (d_idx > interval) and (dp < dis_thr) and (d_angle < angle_thr):
172 |           num_correct_prediction += 1
173 |         # else:
174 |         #   print("img1 = {}, img2 = {}".format(image_name, loop_image_name))
175 | 
176 |     precision = float(num_correct_prediction) / num_loop_prediction if num_loop_prediction > 0 else 1
177 |     recall = float(num_correct_prediction) / num_loop_gt if num_loop_gt > 0 else 1
178 | 
179 |     results[seq] = {'precision': precision, 'recall': recall}
180 |   
181 |   print(results)
182 |   file_name = "kitti_odometry_pr.yaml"
183 |   file_path = os.path.join(save_dir, file_name)
184 |   fp = open(file_path, 'w')
185 |   fp.write(yaml.dump(results))
186 | 
187 | 
188 | def main():
189 |   parser = argparse.ArgumentParser(description="show match")
190 |   parser.add_argument(
191 |       "-c", "--config_file",
192 |       dest = "config_file",
193 |       type = str, 
194 |       default = ""
195 |   )
196 |   parser.add_argument(
197 |       "-s", "--save_dir",
198 |       dest = "save_dir",
199 |       type = str, 
200 |       default = "" 
201 |   )
202 |   parser.add_argument(
203 |       "-n", "--net_output_dir",
204 |       dest = "net_output_dir",
205 |       type = str, 
206 |       default = "" 
207 |   )
208 |   parser.add_argument(
209 |       "-d", "--data_root",
210 |       dest = "data_root",
211 |       type = str, 
212 |       default = "" 
213 |   )
214 |   args = parser.parse_args()
215 |   config_file = args.config_file
216 |   f = open(config_file, 'r', encoding='utf-8')
217 |   configs = f.read()
218 |   configs = yaml.load(configs)
219 |   configs['data_root'] = args.data_root
220 |   configs['net_output_dir'] = args.net_output_dir
221 |   configs['save_dir'] = args.save_dir
222 | 
223 |   relocalization_offline(configs)
224 | 
225 | if __name__ == "__main__":
226 |   main()
227 | 


--------------------------------------------------------------------------------
/experiments/place_recogination/offline_topK.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | sys.path.append('.')
  7 | import os
  8 | 
  9 | import torch
 10 | from torchvision import transforms
 11 | import yaml
 12 | import cv2
 13 | import numpy as np
 14 | import argparse
 15 | 
 16 | from datasets.utils.pipeline import makedir
 17 | from datasets.kitti.kitti_odomery import KittiOdometry
 18 | from datasets.utils import postprocess as post
 19 | 
 20 | from experiments.object_tracking.object_tracking import update_normal_size, network_output, calculate_pr_curve, box_iou
 21 | from experiments.utils.utils import save_tracking_results, plot_pr_curves, plot_tracking_details, get_pr_curve_area
 22 | 
 23 | 
 24 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 25 | 
 26 | def filter_objects(descs, objects, target_labels):
 27 |   new_descs = []
 28 |   new_objects = {}
 29 |   for k in objects.keys():
 30 |     new_objects[k] = []
 31 | 
 32 |   for i in range(len(descs)):
 33 |     label = objects['labels'][i]
 34 |     score = objects['scores'][i]
 35 |     if label in target_labels and score > 0.5:
 36 |       new_descs.append(descs[i])
 37 |       for k in objects.keys():
 38 |         new_objects[k].append(objects[k][i])
 39 | 
 40 |   if len(new_descs) < 1:
 41 |     return None, None
 42 | 
 43 |   new_descs = np.vstack(new_descs)
 44 |   for k in objects.keys():
 45 |     new_objects[k] = np.vstack(new_objects[k])
 46 |   
 47 |   return new_descs, new_objects
 48 | 
 49 | def relocalization_offline(configs):
 50 | 
 51 |   # read configs
 52 |   save_dir = configs['save_dir']
 53 |   data_root = configs['data_root']
 54 |   net_output_dir = configs['net_output_dir']
 55 |   dataset_name = configs['data']['name']
 56 | 
 57 |   # data
 58 |   seqs = ['00', '05', '06']
 59 |   similarity_thr = 0.88
 60 |   interval = 100
 61 |   pr_curves_list = []
 62 |   datasets_results = {}
 63 |   for seq in seqs:
 64 |     # dataset
 65 |     dataset = KittiOdometry(data_root, seq)
 66 |     dis_thr = dataset.dis_thr
 67 |     angle_thr = dataset.angle_thr
 68 |     interval = dataset.interval
 69 | 
 70 |     seq_net_output_dir = os.path.join(net_output_dir, seq)
 71 |     descs_list = []
 72 |     image_name_list = []
 73 |     image_indexes = []
 74 |     predict_loops = {}
 75 |     for data in dataset:
 76 |       image = data['image']
 77 |       image_name = data['image_name']
 78 |       gt = dataset.get_label(image_name)
 79 |       print(image_name)
 80 | 
 81 |       image_net_output_dir = os.path.join(seq_net_output_dir, image_name)
 82 |       # load points
 83 |       points = []
 84 |       points_dir = os.path.join(image_net_output_dir, "points")
 85 |       if not os.path.exists(points_dir):
 86 |         continue
 87 |       points_file_names = os.listdir(points_dir)
 88 |       for points_file_name in points_file_names:
 89 |         p_path = os.path.join(points_dir, points_file_name)
 90 |         p = np.load(p_path, allow_pickle=True)
 91 |         points.append(p)
 92 | 
 93 |       # load descs
 94 |       descs_path = os.path.join(image_net_output_dir, "descs.npy")
 95 |       descs = np.load(descs_path)
 96 | 
 97 |       # load objects
 98 |       objects = {}
 99 |       objects_dir = os.path.join(image_net_output_dir, "objects")
100 |       objects_file_names = os.listdir(objects_dir)
101 |       for objects_file_name in objects_file_names:
102 |         key = objects_file_name.split('.')[0]
103 |         value_path = os.path.join(objects_dir, objects_file_name)
104 |         objects[key] = np.load(value_path)
105 |     
106 | 
107 |       target_labels = [3, 8]
108 |       descs, objects = filter_objects(descs, objects, target_labels)
109 |       if descs is None:
110 |         continue
111 | 
112 |       if gt['index'] > interval and len(descs_list) > 0:
113 |         # find loop
114 |         scores = []
115 |         match_images = []
116 |         for descs_i, image_name_i, image_index_i in zip(descs_list, image_name_list, image_indexes):
117 |           if gt['index'] - image_index_i < interval:
118 |             break
119 |           
120 |           descs_similarity = descs.dot(descs_i.T)  # m * n
121 |           matches = descs_similarity > similarity_thr
122 |           matches = matches * descs_similarity
123 |           matches = np.max(matches, 1)
124 |           # decide to match
125 |           good_match = 0
126 |           score = np.sum(matches)
127 | 
128 |           m, n = descs_similarity.shape
129 |           num_diff = m - n
130 |           num_diff = num_diff if num_diff > 0 else (-num_diff)
131 |           score = score - num_diff * 0
132 | 
133 |           scores.append(score)
134 |           match_images.append(image_name_i)
135 | 
136 |         predict_loops[image_name] = {'match_images': match_images, 'scores': scores}
137 |         
138 |       descs_list.append(descs)
139 |       image_name_list.append(image_name)
140 |       image_indexes.append(gt['index'])
141 | 
142 |     # find groundtruth
143 |     loop_gt, num_loop_gt = dataset.get_loop_gt()
144 |  
145 |     # recall
146 |     topk_k = [i for i in range(1, 21)]
147 |     recalls = {}
148 |     for k in topk_k:
149 |       pred_loop = 0
150 |       for image_name in loop_gt.keys():
151 |         if loop_gt[image_name]:
152 |           if image_name not in predict_loops.keys():
153 |             continue
154 | 
155 |           predict_loop = predict_loops[image_name]
156 |           scores, match_images = predict_loop['scores'], predict_loop['match_images']
157 |           scores = torch.tensor(scores)
158 |           k_loop_images = []
159 |           k_loop_scores = []
160 |           if k > len(scores):
161 |             k_loop_images = match_images
162 |           else:
163 |             value, _ = scores.topk(k)
164 |             min_v = value[-1].item()
165 |             indices = []
166 |             for i_score in range(len(scores)):
167 |               if(scores[i_score] >= min_v):
168 |                 indices.append(i_score)
169 |            # _, indices = scores.topk(k)
170 |            # indices = indices.numpy().tolist()
171 |             for idx in indices:
172 |               k_loop_images.append(match_images[idx])
173 | 
174 |           # if correct image in k_loop_images
175 |           for k_image_name in k_loop_images:
176 |             gt1 = dataset.get_label(image_name)
177 |             gt2 = dataset.get_label(k_image_name)
178 | 
179 |             idx1, p1, R1 = gt1['index'], gt1['position'], gt1['rotation']
180 |             idx2, p2, R2 = gt2['index'], gt2['position'], gt2['rotation']
181 |             
182 |             dp = np.linalg.norm((p1-p2))
183 |             dR = R1.dot(R2.T)
184 |             dr, _ = cv2.Rodrigues(dR)
185 |             d_angle = np.linalg.norm(dr)
186 |             d_idx = idx2 - idx1 
187 |             d_idx = d_idx if d_idx > 0 else (-d_idx)
188 |             if (d_idx > interval) and (dp < dis_thr) and (d_angle < angle_thr):
189 |               pred_loop += 1
190 |               break
191 |       print("k = {}, pred_loop = {}, num_loop_gt = {}".format(k, pred_loop, num_loop_gt))
192 |       recall = float(pred_loop) / num_loop_gt if num_loop_gt > 0 else 1
193 |       recalls[k] = recall
194 |     datasets_results[seq] = recalls
195 | 
196 | 
197 |   file_name = "kitti_odometry.yaml"
198 |   file_path = os.path.join(save_dir, file_name)
199 |   fp = open(file_path, 'w')
200 |   fp.write(yaml.dump(datasets_results))
201 | 
202 | 
203 | 
204 | def main():
205 |   parser = argparse.ArgumentParser(description="show match")
206 |   parser.add_argument(
207 |       "-c", "--config_file",
208 |       dest = "config_file",
209 |       type = str, 
210 |       default = ""
211 |   )
212 |   parser.add_argument(
213 |       "-s", "--save_dir",
214 |       dest = "save_dir",
215 |       type = str, 
216 |       default = "" 
217 |   )
218 |   parser.add_argument(
219 |       "-n", "--net_output_dir",
220 |       dest = "net_output_dir",
221 |       type = str, 
222 |       default = "" 
223 |   )
224 |   parser.add_argument(
225 |       "-d", "--data_root",
226 |       dest = "data_root",
227 |       type = str, 
228 |       default = "" 
229 |   )
230 |   args = parser.parse_args()
231 |   config_file = args.config_file
232 |   f = open(config_file, 'r', encoding='utf-8')
233 |   configs = f.read()
234 |   configs = yaml.load(configs)
235 |   configs['data_root'] = args.data_root
236 |   configs['net_output_dir'] = args.net_output_dir
237 |   configs['save_dir'] = args.save_dir
238 | 
239 |   relocalization_offline(configs)
240 | 
241 | if __name__ == "__main__":
242 |   main()
243 | 


--------------------------------------------------------------------------------
/experiments/place_recogination/online_relocalization.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | sys.path.append('.')
  7 | import os
  8 | 
  9 | import torch
 10 | from torchvision import transforms
 11 | import yaml
 12 | import cv2
 13 | import numpy as np
 14 | import argparse
 15 | 
 16 | from model.build_model import build_maskrcnn, build_gcn, build_superpoint_model
 17 | from datasets.utils.pipeline import makedir
 18 | from datasets.kitti.kitti_odomery import KittiOdometry
 19 | 
 20 | from experiments.object_tracking.object_tracking import update_normal_size, network_output, calculate_pr_curve, box_iou
 21 | 
 22 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 23 | 
 24 | 
 25 | def relocalization(configs):
 26 | 
 27 |   # read configs
 28 |   save_dir = configs['save_dir']
 29 |   data_root = configs['data_root']
 30 |   model_dir = configs['model_dir']
 31 |   dataset_name = configs['data']['name']
 32 |   configs['num_gpu'] = [0]
 33 |   configs['public_model'] = 0
 34 | 
 35 |   superpoint_model_path = os.path.join(model_dir, "points_model.pth")
 36 |   maskrcnn_model_path = os.path.join(model_dir, "maskrcnn_model.pth")
 37 |   gcn_model_path = os.path.join(model_dir, "gcn_model.pth")
 38 |   configs["maskrcnn_model_path"] = maskrcnn_model_path
 39 |   configs["superpoint_model_path"] = superpoint_model_path
 40 |   configs["graph_model_path"] = gcn_model_path
 41 | 
 42 |   # model 
 43 |   superpoint_model = build_superpoint_model(configs, requires_grad=False)
 44 |   superpoint_model.eval()
 45 | 
 46 |   maskrcnn_model = build_maskrcnn(configs)
 47 |   maskrcnn_model.eval()
 48 | 
 49 |   gcn_model = build_gcn(configs)
 50 |   gcn_model.eval()
 51 | 
 52 |   # data
 53 |   seqs = ['00', '05', '06']
 54 |   
 55 |   pr_curves_list = []
 56 |   for seq in seqs:
 57 |     dataset = KittiOdometry(data_root, seq)
 58 |     dis_thr = dataset.dis_thr
 59 |     angle_thr = dataset.angle_thr
 60 |     interval = dataset.interval
 61 |     
 62 |     image_size = dataset.image_size()
 63 |     configs['data']['normal_size'] = update_normal_size(image_size)
 64 |     
 65 |     seq_save_dir = os.path.join(save_dir, seq)
 66 |     makedir(seq_save_dir)
 67 |     for data in dataset:
 68 |       image = data['image']
 69 |       image_name = data['image_name']
 70 |       print(image_name)
 71 | 
 72 |       net_output = network_output(image, superpoint_model, maskrcnn_model, gcn_model, configs)
 73 |       net_output = {'points': net_output[0], 'objects': net_output[1], 'descs': net_output[2]}
 74 |       if net_output['points'] is None:
 75 |         continue
 76 | 
 77 |       image_save_dir = os.path.join(seq_save_dir, image_name)
 78 |       makedir(image_save_dir)
 79 |       
 80 |       # save points
 81 |       points_dir = os.path.join(image_save_dir, "points")
 82 |       makedir(points_dir)
 83 |       points = net_output['points']
 84 |       for i in range(len(points)):
 85 |         p = points[i].cpu().numpy
 86 |         p_path = os.path.join(points_dir, (str(i) + ".npy"))
 87 |         np.save(p_path, p)
 88 | 
 89 |       # save descs
 90 |       descs_path = os.path.join(image_save_dir, "descs.npy")
 91 |       descs = net_output['descs'].cpu().numpy()
 92 |       np.save(descs_path, descs)
 93 | 
 94 |       # save objects
 95 |       objects = net_output['objects']
 96 |       objects_dir = os.path.join(image_save_dir, "objects")
 97 |       makedir(objects_dir)
 98 |       for k in objects.keys():
 99 |         value = objects[k].cpu().numpy()
100 |         value_path = os.path.join(objects_dir, (k+".npy"))
101 |         np.save(value_path, value)
102 | 
103 | 
104 | 
105 | def main():
106 |   parser = argparse.ArgumentParser(description="show match")
107 |   parser.add_argument(
108 |       "-c", "--config_file",
109 |       dest = "config_file",
110 |       type = str, 
111 |       default = ""
112 |   )
113 |   parser.add_argument(
114 |       "-g", "--gpu",
115 |       dest = "gpu",
116 |       type = int, 
117 |       default = 0 
118 |   )
119 |   parser.add_argument(
120 |       "-s", "--save_dir",
121 |       dest = "save_dir",
122 |       type = str, 
123 |       default = "" 
124 |   )
125 |   parser.add_argument(
126 |       "-d", "--data_root",
127 |       dest = "data_root",
128 |       type = str, 
129 |       default = "" 
130 |   )
131 |   parser.add_argument(
132 |       "-m", "--model_dir",
133 |       dest = "model_dir",
134 |       type = str, 
135 |       default = "" 
136 |   )
137 |   args = parser.parse_args()
138 |   config_file = args.config_file
139 |   f = open(config_file, 'r', encoding='utf-8')
140 |   configs = f.read()
141 |   configs = yaml.load(configs)
142 |   configs['use_gpu'] = args.gpu
143 |   configs['data_root'] = args.data_root
144 |   configs['model_dir'] = args.model_dir
145 |   configs['save_dir'] = args.save_dir
146 | 
147 |   relocalization(configs)
148 | 
149 | if __name__ == "__main__":
150 |   main()
151 | 


--------------------------------------------------------------------------------
/experiments/show_object_matching/draw_object.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | import os
  7 | 
  8 | import torch
  9 | from torchvision import transforms
 10 | import yaml
 11 | import cv2
 12 | import numpy as np
 13 | import argparse
 14 | import copy
 15 | 
 16 | from utils.tools import tensor_to_numpy
 17 | from utils import cv2_util
 18 | 
 19 | 
 20 | def compute_colors_for_labels(labels):
 21 |   """
 22 |   Simple function that adds fixed colors depending on the class
 23 |   """
 24 |   palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])
 25 |   colors = labels[:, None] * palette
 26 |   colors = (colors % 255).numpy().astype("uint8")
 27 |   return colors
 28 | 
 29 | 
 30 | def overlay_boxes(image, boxes, colors):
 31 |   """
 32 |   Adds the predicted boxes on top of the image
 33 | 
 34 |   Arguments:
 35 |       image (np.ndarray): an image as returned by OpenCV
 36 |   """
 37 | 
 38 |   for box, color in zip(boxes, colors):
 39 |     box = box.to(torch.int64)
 40 |     top_left, bottom_right = box[:2].tolist(), box[2:].tolist()
 41 |     image = cv2.rectangle(
 42 |         image, tuple(top_left), tuple(bottom_right), tuple(color), 1
 43 |     )
 44 | 
 45 |   return image
 46 | 
 47 | 
 48 | def overlay_class_names(image, boxes, textes, colors):
 49 |   """
 50 |   Adds detected class names and scores in the positions defined by the
 51 |   top-left corner of the predicted bounding box
 52 | 
 53 |   Arguments:
 54 |       image (np.ndarray): an image as returned by OpenCV
 55 |   """
 56 | 
 57 |   for box, text, color in zip(boxes, textes, colors):
 58 |     x, y = (box[0] + box[2]) / 2 - 100, (box[1] + box[3]) / 2
 59 |     cv2.putText(
 60 |         image, text, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 0, 255), 2
 61 |     )
 62 | 
 63 |   return image
 64 | 
 65 | 
 66 | def overlay_mask(image, masks, colors):
 67 |     """
 68 |     Adds the instances contours for each predicted object.
 69 |     Each label has a different color.
 70 | 
 71 |     Arguments:
 72 |         image (np.ndarray): an image as returned by OpenCV
 73 |         predictions (BoxList): the result of the computation by the model.
 74 |             It should contain the field `mask` and `labels`.
 75 |     """
 76 |     for mask, color in zip(masks, colors):
 77 |       if len(mask.shape) == 3:
 78 |         mask = mask.squeeze(0)
 79 |       thresh = tensor_to_numpy(mask[None, :, :])
 80 |       thresh = thresh[:, :, 0]
 81 |       contours, hierarchy = cv2_util.findContours(
 82 |           thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
 83 |       )
 84 |       image = cv2.drawContours(image, contours, -1, color, 4)
 85 | 
 86 |     composite = image
 87 | 
 88 |     return composite
 89 | 
 90 | 
 91 | def draw_object(data, colors=None, match_idx_list=None):
 92 |   image = data['image']
 93 |   points = data['points']
 94 |   objects = data['objects']
 95 |   labels = objects['labels']
 96 |   boxes = objects['boxes']
 97 |   masks = objects['masks']
 98 |   if colors is None:
 99 |     colors = compute_colors_for_labels(labels).tolist()
100 |   
101 |   # image = overlay_boxes(image, boxes, colors)
102 |   image = overlay_mask(image, masks, colors)
103 | 
104 |   textes = []
105 |   for idx in range(len(boxes)):
106 |     if idx in match_idx_list:
107 |       i = match_idx_list.index(idx)
108 |       text = "object" + str(i+1)
109 |     else:
110 |       text = "no matching object"
111 |     textes.append(text)
112 |   image = overlay_class_names(image, boxes, textes, colors)
113 | 
114 |   H, W = image.shape[:2]
115 | 
116 |   for p, c in zip(points, colors):
117 |     p = p.cpu().numpy()
118 |     if len(p) == 0:
119 |       continue
120 |     for i in range(len(p)):
121 |       x = round(p[i][1] * W + W/2)
122 |       y = round(p[i][0] * H + H/2)
123 |       if x < 0:
124 |         continue
125 |       cv2.circle(image, (x,y), 7, tuple(c), thickness=-1)
126 | 
127 |   return image, colors


--------------------------------------------------------------------------------
/experiments/show_object_matching/show_object_matching.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | sys.path.append('.')
  7 | import os
  8 | 
  9 | import torch
 10 | from torchvision import transforms
 11 | import yaml
 12 | import cv2
 13 | import numpy as np
 14 | import argparse
 15 | import copy
 16 | 
 17 | from model.inference import maskrcnn_inference
 18 | from model.build_model import build_maskrcnn, build_gcn, build_superpoint_model
 19 | from model.inference import detection_inference
 20 | from datasets.utils.pipeline import makedir
 21 | from kornia.feature import match_nn
 22 | from datasets.utils import preprocess
 23 | from experiments.show_object_matching.draw_object import draw_object, compute_colors_for_labels
 24 | 
 25 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 26 | 
 27 | def filter_objects(data, target_labels=None):
 28 |   '''
 29 |   data = {'image':image, 'points': output[0], 'objects': output[1],
 30 |       'descs': output[2], 'keeps': output[3]} 
 31 |   '''
 32 |   points = data['points']
 33 |   descs = data['descs']
 34 | 
 35 |   keeps = data['keeps']
 36 |   objects = data['objects']
 37 |   labels = objects['labels'][keeps]
 38 |   boxes = objects['boxes'][keeps]
 39 |   masks = objects['masks'][keeps]
 40 | 
 41 |   if target_labels is not None:
 42 |     new_labels, new_boxes, new_masks = [], [], []
 43 |     new_points, new_descs = [], []
 44 |     for i in range(len(points)):
 45 |       if labels[i].item() in target_labels:
 46 |         new_labels.append(labels[i])
 47 |         new_boxes.append(boxes[i])
 48 |         new_masks.append(masks[i])  
 49 |         new_points.append(points[i])  
 50 |         new_descs.append(descs[i])  
 51 |     
 52 |     labels = torch.stack(new_labels)
 53 |     boxes = torch.stack(new_boxes)
 54 |     masks = torch.stack(new_masks)
 55 |     points = new_points
 56 |     descs = torch.stack(new_descs)
 57 | 
 58 |   objects['labels'] = labels
 59 |   objects['boxes'] = boxes
 60 |   objects['masks'] = masks
 61 |   data['objects'] = objects
 62 | 
 63 |   data['points'] = points
 64 |   data['descs'] = descs
 65 |   return data
 66 | 
 67 | 
 68 | def draw_results(tpl_data, data, save_dir, image_name, match_thr=0.95):
 69 | 
 70 |   # get colors
 71 |   tpl_obj_num = len(tpl_data['objects']['boxes'])
 72 |   obj_num = len(data['objects']['boxes'])
 73 |   sum_num = tpl_obj_num + obj_num
 74 |   index = [(i+1)*30 for i in range(sum_num)]
 75 |   index = torch.tensor(index)
 76 |   sum_colors = compute_colors_for_labels(index).tolist()
 77 |   tpl_colors = sum_colors[:tpl_obj_num]
 78 |   colors = sum_colors[tpl_obj_num:sum_num]
 79 |   print(tpl_colors)
 80 | 
 81 |   # match
 82 |   tpl_descs = tpl_data['descs']
 83 |   descs = data['descs']
 84 |   dis, match = match_nn(tpl_descs, descs)
 85 |   dis, match = dis.cpu().squeeze(1).numpy(), match.cpu().numpy()
 86 |   print(dis)
 87 | 
 88 |   # update match colors
 89 |   match_idx_list1, match_idx_list2 = [], []
 90 |   for i in range(match.shape[0]):
 91 |     if dis[i] > match_thr :
 92 |       continue
 93 |     idx1 = int(match[i, 0])
 94 |     idx2 = int(match[i, 1])
 95 |     colors[idx2] = tpl_colors[idx1]
 96 |     match_idx_list1.append(idx1)
 97 |     match_idx_list2.append(idx2)
 98 | 
 99 |   # draw object
100 |   tpl_image, _ = draw_object(tpl_data, tpl_colors, match_idx_list1)
101 |   image, _ = draw_object(data, colors, match_idx_list2)
102 |   img = np.concatenate([tpl_image, image], 1)
103 |   # img = np.concatenate([tpl_image, image], 0)
104 | 
105 |   # draw match
106 |   tpl_boxes = tpl_data['objects']['boxes']
107 |   boxes = data['objects']['boxes']
108 |   for i in range(match.shape[0]):
109 |     if dis[i] > match_thr :
110 |       continue
111 | 
112 |     idx1 = int(match[i, 0])
113 |     idx2 = int(match[i, 1])
114 | 
115 |     c = tpl_colors[idx1]
116 | 
117 |     tpl_box = tpl_boxes[idx1]
118 |     x1 = (int)((tpl_box[0] + tpl_box[2]) / 2)
119 |     y1 = (int)((tpl_box[1] + tpl_box[3]) / 2)   
120 |     cv2.circle(img, (x1, y1), 10, tuple(c), 2)
121 | 
122 |     box = boxes[idx2]
123 |     x2 = (int)((box[0] + box[2]) / 2 + tpl_image.shape[-2])
124 |     y2 = (int)((box[1] + box[3]) / 2)
125 |     # x2 = (int)((box[0] + box[2]) / 2)
126 |     # y2 = (int)((box[1] + box[3]) / 2  + tpl_image.shape[0])      
127 |     cv2.circle(img, (x2, y2), 10, tuple(c), 2)
128 | 
129 |     cv2.line(img, (x1, y1), (x2, y2), tuple(c), 2)
130 | 
131 |   save_path = os.path.join(save_dir, image_name)
132 |   cv2.imwrite(save_path, img)
133 | 
134 | 
135 | def network_output(image, points_model, maskrcnn_model, gcn_model, configs, filter_labes=None):  
136 |   with torch.no_grad():
137 |     data_config = configs['data']
138 |     superpoint_model_config = configs['model']['superpoint']
139 |     detection_threshold = superpoint_model_config['detection_threshold']
140 |     use_gpu = configs['use_gpu']
141 | 
142 |     transform = transforms.ToTensor()
143 |     image = transform(image)
144 |     image = image.unsqueeze(0)
145 |     batch = {'image': image}
146 | 
147 |     points_output, detections, _ = detection_inference(maskrcnn_model, points_model, batch, use_gpu, 1,
148 |         detection_threshold, data_config, save_dir=None)
149 | 
150 |     batch_points, batch_descs = preprocess.extract_points_clusters(points_output, list([detections[0]['masks']]))
151 | 
152 |     original_sizes = [list(img.shape[-2:]) for img in image]
153 | 
154 |     batch_points = preprocess.normalize_points(batch_points, original_sizes)
155 | 
156 |     batch_points = preprocess.batch_merge(batch_points)
157 |     batch_descs = preprocess.batch_merge(batch_descs)
158 | 
159 |     keeps = preprocess.select_good_clusters(batch_points)
160 |     
161 |     good_points, good_descs = [], []
162 |     for i in range(len(keeps)):
163 |       if keeps[i].item():
164 |         good_points.append(batch_points[i])
165 |         good_descs.append(batch_descs[i])
166 |     
167 |     batch_object_descs, _ = gcn_model(good_points, good_descs)
168 | 
169 |     return good_points, detections[0], batch_object_descs, keeps
170 | 
171 | 
172 | def read_image(img_path):
173 |   image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
174 |   if len(image.shape) == 2:
175 |     image = cv2.merge([image, image, image])
176 |   return image
177 | 
178 | def show_object_matching(configs):
179 | 
180 |   # read configs
181 |   save_dir = configs['save_dir']
182 |   data_root = configs['data_root']
183 |   model_dir = configs['model_dir']
184 |   use_gpu = configs['use_gpu']
185 |   superpoint_model_config = configs['model']['superpoint']
186 |   detection_thr = superpoint_model_config['detection_threshold']
187 |   configs['num_gpu'] = [0]
188 |   configs['public_model'] = 0
189 | 
190 |   superpoint_model_path = os.path.join(model_dir, "points_model.pth")
191 |   maskrcnn_model_path = os.path.join(model_dir, "maskrcnn_model.pth")
192 |   gcn_model_path = os.path.join(model_dir, "gcn_model.pth")
193 |   configs["maskrcnn_model_path"] = maskrcnn_model_path
194 |   configs["superpoint_model_path"] = superpoint_model_path
195 |   configs["graph_model_path"] = gcn_model_path
196 | 
197 |   # model 
198 |   superpoint_model = build_superpoint_model(configs, requires_grad=False)
199 |   superpoint_model.eval()
200 | 
201 |   maskrcnn_model = build_maskrcnn(configs)
202 |   maskrcnn_model.eval()
203 | 
204 |   gcn_model = build_gcn(configs)
205 |   gcn_model.eval()
206 | 
207 |   # template image
208 |   tpl_path = os.path.join(data_root, "template.jpg")
209 |   tpl_image = read_image(tpl_path)
210 |   tpl_output = network_output(tpl_image, superpoint_model, maskrcnn_model, gcn_model, configs)
211 |   tpl_data = {'image':tpl_image, 'points': tpl_output[0], 'objects': tpl_output[1],
212 |        'descs': tpl_output[2], 'keeps': tpl_output[3]}
213 | 
214 | 
215 |   # filter data
216 |   target_labels = [40]
217 |   # tpl_data = filter_objects(tpl_data, target_labels)
218 |   tpl_labels = tpl_data['objects']['labels']
219 |   print(tpl_labels)
220 | 
221 |   seq_path = os.path.join(data_root, "seq")
222 |   image_names = os.listdir(seq_path)
223 |   image_names.sort()
224 |   with torch.no_grad():
225 |     for image_name in image_names:
226 |       image_path = tpl_path = os.path.join(seq_path, image_name)
227 |       image = read_image(image_path)
228 |       output = network_output(image, superpoint_model, maskrcnn_model, gcn_model, configs)
229 |       data = {'image':image, 'points': output[0], 'objects': output[1],
230 |           'descs': output[2], 'keeps': output[3]}
231 |       
232 |       # data = filter_objects(data, target_labels)
233 |       labels = data['objects']['labels']
234 |       print(labels)
235 |       draw_results(tpl_data, data, save_dir, image_name)
236 |     
237 | 
238 | def main():
239 |   parser = argparse.ArgumentParser(description="show match")
240 |   parser.add_argument(
241 |       "-c", "--config_file",
242 |       dest = "config_file",
243 |       type = str, 
244 |       default = ""
245 |   )
246 |   parser.add_argument(
247 |       "-g", "--gpu",
248 |       dest = "gpu",
249 |       type = int, 
250 |       default = 0 
251 |   )
252 |   parser.add_argument(
253 |       "-s", "--save_dir",
254 |       dest = "save_dir",
255 |       type = str, 
256 |       default = "" 
257 |   )
258 |   parser.add_argument(
259 |       "-d", "--data_root",
260 |       dest = "data_root",
261 |       type = str, 
262 |       default = "" 
263 |   )
264 |   parser.add_argument(
265 |       "-m", "--model_dir",
266 |       dest = "model_dir",
267 |       type = str, 
268 |       default = "" 
269 |   )
270 |   args = parser.parse_args()
271 |   config_file = args.config_file
272 |   f = open(config_file, 'r', encoding='utf-8')
273 |   configs = f.read()
274 |   configs = yaml.load(configs)
275 |   configs['use_gpu'] = args.gpu
276 |   configs['data_root'] = args.data_root
277 |   configs['model_dir'] = args.model_dir
278 |   configs['save_dir'] = args.save_dir
279 | 
280 |   show_object_matching(configs)
281 | 
282 | if __name__ == "__main__":
283 |   main()
284 | 


--------------------------------------------------------------------------------
/experiments/utils/utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | import os
  7 | import yaml
  8 | import numpy as np
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | 
 12 | def plot_pr_curves(pr_curves, dataset_name, save_dir):
 13 |   '''
 14 |   plot pr curves
 15 |   input:
 16 |     pr_curves: Dict[interval: pr_curve]
 17 |     dataset_name: "dataset" + "seq"
 18 |     save_dir: save directory
 19 |   '''
 20 |   
 21 |   plt.title(dataset_name)
 22 |   colors = ['green', 'red', 'blue', 'yellow', 'darkviolet', 'sandybrown']
 23 |   for k, c in zip(pr_curves.keys(), colors):
 24 |     pr_curve = pr_curves[k]
 25 |     xs, ys = [], []
 26 |     for pr in pr_curve:
 27 |       xs.append(pr[0])
 28 |       ys.append(pr[1])
 29 | 
 30 |     plt.plot(xs, ys, color=c, label=str(k))
 31 | 
 32 |   plt.legend()
 33 |   plt.xlabel('precision')
 34 |   plt.ylabel('recall')
 35 | 
 36 |   image_name = dataset_name + ".jpg"
 37 |   save_path = os.path.join(save_dir, image_name)
 38 |   plt.savefig(save_path)
 39 | 
 40 | 
 41 | def get_pr_curve_area(pr_curve):
 42 |   '''
 43 |   pr_curve: [[p0, r0], [p1, r1]... [pn, rn]], thr: small->big, precision: small->big, recall: big->small
 44 |   '''
 45 |   area = 0.0
 46 |   for i in range(1, len(pr_curve)):
 47 |     p0, r0 = pr_curve[i-1]
 48 |     p1, r1 = pr_curve[i]
 49 | 
 50 |     area = area + (r0 - r1) * (p1 + p0) / 2
 51 | 
 52 |   return area    
 53 | 
 54 | 
 55 | def plot_tracking_details(results_list, save_dir, name=None, configs=None):
 56 |   if config is not None:
 57 |     title = configs['title']
 58 |     colors = configs['colors']
 59 |     linewidth = configs['linewidth']
 60 |     xlabel = configs['xlabel']
 61 |     ylabel = configs['ylabel']
 62 |     fontsize = configs['fontsize']
 63 |     figsize = configs['figsize']
 64 |     dpi = configs['dpi']
 65 |   else:
 66 |     title = results_list[0]['dataset'] if name is None else name
 67 |     colors = ['green', 'red', 'blue', 'yellow', 'darkviolet', 'sandybrown']
 68 |     linewidth = 3
 69 |     xlabel = "recall"
 70 |     ylabel = "precision"
 71 |     fontsize = 20
 72 |     figsize = (10, 10)
 73 |     dpi = 100
 74 | 
 75 |   plt.title(title)
 76 |   plt.xticks(fontsize=fontsize)
 77 |   plt.yticks(fontsize=fontsize)
 78 | 
 79 |   for i in range(len(results_list)):
 80 |     pr_curves = results_list[i]['pr_curves']
 81 |     areas = results_list[i]['areas']
 82 |     for k in pr_curves.keys():
 83 |       pr_curve = pr_curves[k]
 84 |       xs, ys = [], []
 85 |       for pr in pr_curve:
 86 |         xs.append(pr[1]) # recall
 87 |         ys.append(pr[0]) # precision
 88 |       
 89 |       area = round(areas[k], 4)
 90 | 
 91 |       label = "[{}] k = {}, {}".format(area, k, results_list[i]['model'])
 92 |       linestyle = '-' if i==0 else '--'
 93 |       plt.plot(xs, ys, color=colors[k], label=label, linewidth=linewidth, linestyle=linestyle)
 94 | 
 95 |   plt.legend(fontsize=fontsize)
 96 |   plt.grid()
 97 |   plt.xlabel(xlabel, fontsize=fontsize)
 98 |   plt.ylabel(ylabel, fontsize=fontsize)
 99 |   
100 |   image_name = title + ".jpg"
101 |   save_path = os.path.join(save_dir, image_name)
102 |   plt.savefig(save_path)
103 | 
104 | 
105 | def save_tracking_results(results, save_dir):
106 |   '''
107 |   saving tracking experiment results
108 | 
109 |   results:
110 |     dataset: *
111 |     model: *
112 |     pr_curves:
113 |       interval_0: [[p00, r00], [p01, r01]... [p0n, r0n]]
114 |       interval_1: [[p10, r10], [p11, r11]... [p1n, r1n]]
115 |       ...
116 |       interval_m: [[pm0, rm0], [pm1, rm1]... [pmn, rmn]]
117 |   '''
118 |   file_name = results['dataset'] + "_" + results['model'] + ".yaml"
119 |   file_path = os.path.join(save_dir, file_name)
120 |   fp = open(file_path, 'w')
121 |   fp.write(yaml.dump(results))
122 | 
123 | 
124 | def read_tracking_results(file_path):
125 |   f = open(config_file, 'r', encoding='utf-8')
126 |   results = f.read()
127 |   f.close()
128 |   return results


--------------------------------------------------------------------------------
/model/backbone/fcn.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.optim as optim
  6 | from torchvision import models
  7 | from torchvision.models.vgg import VGG
  8 | 
  9 | class FCNs(nn.Module):
 10 | 
 11 |     def __init__(self, pretrained_net):
 12 |         super(FCNs,self).__init__()
 13 |         self.pretrained_net = pretrained_net
 14 |         self.relu    = nn.ReLU(inplace=True)
 15 |         self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
 16 |         self.bn1     = nn.BatchNorm2d(512)
 17 |         self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
 18 |         self.bn2     = nn.BatchNorm2d(256)
 19 |         self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
 20 |         self.bn3     = nn.BatchNorm2d(128)
 21 |         self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
 22 |         self.bn4     = nn.BatchNorm2d(64)
 23 |         self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1)
 24 |         self.bn5     = nn.BatchNorm2d(32)
 25 |         self.output_dim = 32
 26 |         # self.classifier = nn.Conv2d(32, n_class, kernel_size=1)
 27 | 
 28 |     def forward(self, x):
 29 |         output = self.pretrained_net(x)
 30 |         x5 = output['x5']  # size=(N, 512, x.H/32, x.W/32)
 31 |         x4 = output['x4']  # size=(N, 512, x.H/16, x.W/16)
 32 |         x3 = output['x3']  # size=(N, 256, x.H/8,  x.W/8)
 33 |         x2 = output['x2']  # size=(N, 128, x.H/4,  x.W/4)
 34 |         x1 = output['x1']  # size=(N, 64, x.H/2,  x.W/2)
 35 | 
 36 |         score = self.bn1(self.relu(self.deconv1(x5)))     # size=(N, 512, x.H/16, x.W/16)
 37 |         score = score + x4                                # element-wise add, size=(N, 512, x.H/16, x.W/16)
 38 |         score = self.bn2(self.relu(self.deconv2(score)))  # size=(N, 256, x.H/8, x.W/8)
 39 |         score = score + x3                                # element-wise add, size=(N, 256, x.H/8, x.W/8)
 40 |         score = self.bn3(self.relu(self.deconv3(score)))  # size=(N, 128, x.H/4, x.W/4)
 41 |         score = score + x2                                # element-wise add, size=(N, 128, x.H/4, x.W/4)
 42 |         score = self.bn4(self.relu(self.deconv4(score)))  # size=(N, 64, x.H/2, x.W/2)
 43 |         score = score + x1                                # element-wise add, size=(N, 64, x.H/2, x.W/2)
 44 |         score = self.bn5(self.relu(self.deconv5(score)))  # size=(N, 32, x.H, x.W)
 45 |         # score = self.classifier(score)                    # size=(N, n_class, x.H/1, x.W/1)
 46 | 
 47 |         return score  # size=(N, n_class, x.H/1, x.W/1)
 48 | 
 49 | 
 50 | class VGGNet(VGG):
 51 |     def __init__(self, pretrained=True, model='vgg16', requires_grad=True, remove_fc=True, show_params=False, input_channel=3):
 52 |         super(VGGNet,self).__init__(make_layers(cfg[model], input_channel=input_channel))
 53 |         self.ranges = ranges[model]
 54 | 
 55 |         if pretrained:
 56 |             exec("self.load_state_dict(models.%s(pretrained=True).state_dict())" % model)
 57 | 
 58 |         if not requires_grad:
 59 |             for param in super().parameters():
 60 |                 param.requires_grad = False
 61 | 
 62 |         if remove_fc:  # delete redundant fully-connected layer params, can save memory
 63 |             del self.classifier
 64 | 
 65 |         if show_params:
 66 |             for name, param in self.named_parameters():
 67 |                 print(name, param.size())
 68 | 
 69 |     def forward(self, x):
 70 |         output = {}
 71 | 
 72 |         # get the output of each maxpooling layer (5 maxpool in VGG net)
 73 |         for idx in range(len(self.ranges)):
 74 |             for layer in range(self.ranges[idx][0], self.ranges[idx][1]):
 75 |                 x = self.features[layer](x)
 76 |             output["x%d"%(idx+1)] = x
 77 | 
 78 |         return output
 79 | 
 80 | 
 81 | ranges = {
 82 |     'vgg11': ((0, 3), (3, 6),  (6, 11),  (11, 16), (16, 21)),
 83 |     'vgg13': ((0, 5), (5, 10), (10, 15), (15, 20), (20, 25)),
 84 |     'vgg16': ((0, 5), (5, 10), (10, 17), (17, 24), (24, 31)),
 85 |     'vgg19': ((0, 5), (5, 10), (10, 19), (19, 28), (28, 37))
 86 | }
 87 | 
 88 | # cropped version from https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
 89 | cfg = {
 90 |     'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
 91 |     'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
 92 |     'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
 93 |     'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
 94 | }
 95 | 
 96 | def make_layers(cfg, batch_norm=False, input_channel=3):
 97 |     layers = []
 98 |     in_channels = input_channel
 99 |     for v in cfg:
100 |         if v == 'M':
101 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
102 |         else:
103 |             conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
104 |             if batch_norm:
105 |                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
106 |             else:
107 |                 layers += [conv2d, nn.ReLU(inplace=True)]
108 |             in_channels = v
109 |     return nn.Sequential(*layers)
110 | 
111 | 
112 | if __name__ == "__main__":
113 |     batch_size, n_class, h, w = 10, 20, 160, 160
114 | 
115 |     # test output size
116 |     vgg_model = VGGNet(requires_grad=True)
117 |     input = torch.autograd.Variable(torch.randn(batch_size, 3, 224, 224))
118 |     output = vgg_model(input)
119 |     assert output['x5'].size() == torch.Size([batch_size, 512, 7, 7])
120 | 
121 |     fcn_model = FCNs(pretrained_net=vgg_model, n_class=n_class)
122 |     input = torch.autograd.Variable(torch.randn(batch_size, 3, h, w))
123 |     output = fcn_model(input)
124 |     assert output.size() == torch.Size([batch_size, n_class, h, w])
125 | 
126 |     print("Pass size check")
127 | 
128 |     # test a random batch, loss should decrease
129 |     fcn_model = FCNs(pretrained_net=vgg_model, n_class=n_class)
130 |     criterion = nn.BCELoss()
131 |     optimizer = optim.SGD(fcn_model.parameters(), lr=1e-3, momentum=0.9)
132 |     input = torch.autograd.Variable(torch.randn(batch_size, 3, h, w))
133 |     y = torch.autograd.Variable(torch.randn(batch_size, n_class, h, w), requires_grad=False)
134 |     for iter in range(10):
135 |         optimizer.zero_grad()
136 |         output = fcn_model(input)
137 |         output = nn.functional.sigmoid(output)
138 |         loss = criterion(output, y)
139 |         loss.backward()
140 |         print("iter{}, loss {}".format(iter, loss.data[0]))
141 |         optimizer.step()


--------------------------------------------------------------------------------
/model/backbone/resnet_fpn.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from collections import OrderedDict
  3 | from torch import nn
  4 | from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool
  5 | from torchvision.ops import misc as misc_nn_ops
  6 | from torchvision.models._utils import IntermediateLayerGetter
  7 | from torchvision.models import resnet
  8 | 
  9 | 
 10 | class BackboneWithFPN(nn.Module):
 11 |     """
 12 |     Adds a FPN on top of a model.
 13 |     Internally, it uses torchvision.models._utils.IntermediateLayerGetter to
 14 |     extract a submodel that returns the feature maps specified in return_layers.
 15 |     The same limitations of IntermediatLayerGetter apply here.
 16 |     Args:
 17 |         backbone (nn.Module)
 18 |         return_layers (Dict[name, new_name]): a dict containing the names
 19 |             of the modules for which the activations will be returned as
 20 |             the key of the dict, and the value of the dict is the name
 21 |             of the returned activation (which the user can specify).
 22 |         in_channels_list (List[int]): number of channels for each feature map
 23 |             that is returned, in the order they are present in the OrderedDict
 24 |         out_channels (int): number of channels in the FPN.
 25 |     Attributes:
 26 |         out_channels (int): the number of channels in the FPN
 27 |     """
 28 |     def __init__(self, backbone, return_layers, in_channels_list, out_channels, extra_blocks=None):
 29 |         super(BackboneWithFPN, self).__init__()
 30 | 
 31 |         if extra_blocks is None:
 32 |             extra_blocks = LastLevelMaxPool()
 33 | 
 34 |         self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
 35 |         self.fpn = FeaturePyramidNetwork(
 36 |             in_channels_list=in_channels_list,
 37 |             out_channels=out_channels,
 38 |             extra_blocks=extra_blocks,
 39 |         )
 40 |         self.in_channels_list = in_channels_list
 41 |         self.out_channels = out_channels
 42 | 
 43 |     def forward(self, x):
 44 |         resnet_x = self.body(x)
 45 |         fpn_x = self.fpn(resnet_x)
 46 |         return fpn_x, resnet_x
 47 | 
 48 | 
 49 | def resnet_fpn_backbone(
 50 |     backbone_name,
 51 |     pretrained,
 52 |     norm_layer=misc_nn_ops.FrozenBatchNorm2d,
 53 |     trainable_layers=3,
 54 |     returned_layers=None,
 55 |     extra_blocks=None
 56 | ):
 57 |     """
 58 |     Constructs a specified ResNet backbone with FPN on top. Freezes the specified number of layers in the backbone.
 59 | 
 60 |     Examples::
 61 | 
 62 |         >>> from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
 63 |         >>> backbone = resnet_fpn_backbone('resnet50', pretrained=True, trainable_layers=3)
 64 |         >>> # get some dummy image
 65 |         >>> x = torch.rand(1,3,64,64)
 66 |         >>> # compute the output
 67 |         >>> output = backbone(x)
 68 |         >>> print([(k, v.shape) for k, v in output.items()])
 69 |         >>> # returns
 70 |         >>>   [('0', torch.Size([1, 256, 16, 16])),
 71 |         >>>    ('1', torch.Size([1, 256, 8, 8])),
 72 |         >>>    ('2', torch.Size([1, 256, 4, 4])),
 73 |         >>>    ('3', torch.Size([1, 256, 2, 2])),
 74 |         >>>    ('pool', torch.Size([1, 256, 1, 1]))]
 75 | 
 76 |     Args:
 77 |         backbone_name (string): resnet architecture. Possible values are 'ResNet', 'resnet18', 'resnet34', 'resnet50',
 78 |              'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2'
 79 |         norm_layer (torchvision.ops): it is recommended to use the default value. For details visit:
 80 |             (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267)
 81 |         pretrained (bool): If True, returns a model with backbone pre-trained on Imagenet
 82 |         trainable_layers (int): number of trainable (not frozen) resnet layers starting from final block.
 83 |             Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable.
 84 |     """
 85 |     backbone = resnet.__dict__[backbone_name](
 86 |         pretrained=pretrained,
 87 |         norm_layer=norm_layer)
 88 | 
 89 |     # select layers that wont be frozen
 90 |     assert trainable_layers <= 5 and trainable_layers >= 0
 91 |     layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers]
 92 |     # freeze layers only if pretrained backbone is used
 93 |     for name, parameter in backbone.named_parameters():
 94 |         if all([not name.startswith(layer) for layer in layers_to_train]):
 95 |             parameter.requires_grad_(False)
 96 | 
 97 |     if extra_blocks is None:
 98 |         extra_blocks = LastLevelMaxPool()
 99 | 
100 |     if returned_layers is None:
101 |         returned_layers = [1, 2, 3, 4]
102 |     assert min(returned_layers) > 0 and max(returned_layers) < 5
103 |     return_layers = {f'layer{k}': str(v) for v, k in enumerate(returned_layers)}
104 | 
105 |     in_channels_stage2 = backbone.inplanes // 8
106 |     in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers]
107 |     out_channels = 256
108 |     return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks)
109 | 
110 | 
111 | def _validate_resnet_trainable_layers(pretrained, trainable_backbone_layers):
112 |     # dont freeze any layers if pretrained model or backbone is not used
113 |     if not pretrained:
114 |         if trainable_backbone_layers is not None:
115 |             warnings.warn(
116 |                 "Changing trainable_backbone_layers has not effect if "
117 |                 "neither pretrained nor pretrained_backbone have been set to True, "
118 |                 "falling back to trainable_backbone_layers=5 so that all layers are trainable")
119 |         trainable_backbone_layers = 5
120 |     # by default, freeze first 2 blocks following Faster R-CNN
121 |     if trainable_backbone_layers is None:
122 |         trainable_backbone_layers = 3
123 |     assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0
124 |     return trainable_backbone_layers
125 | 


--------------------------------------------------------------------------------
/model/build_model.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import logging
  3 | import os
  4 | import time
  5 | import argparse
  6 | import yaml
  7 | import copy
  8 | 
  9 | import torch
 10 | import torch.distributed as dist
 11 | 
 12 | import torch.optim as optim
 13 | from torch.autograd import Variable
 14 | from torch.optim import lr_scheduler
 15 | # from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
 16 | 
 17 | from model.backbone.resnet_fpn import resnet_fpn_backbone
 18 | from model.backbone.fcn import VGGNet
 19 | from model.mask_rcnn.mask_rcnn import MaskRCNN
 20 | from model.superpoint.vgg_like import VggLike
 21 | from model.superpoint.superpoint_public_model import SuperPointNet
 22 | from model.graph_models.object_descriptor import ObjectDescriptor
 23 | 
 24 | def build_maskrcnn(configs):
 25 |   ## command line config
 26 |   num_gpu = configs['num_gpu']
 27 |   use_gpu = (len(num_gpu) > 0) and configs['use_gpu']
 28 |   pretrained_model_path = configs['maskrcnn_model_path']
 29 |   public_model = configs['public_model']
 30 |   ## data cofig
 31 |   nclass = configs['data']['nclass']
 32 |   ## mask_rcnn config
 33 |   maskrcnn_model_config = configs['model']['maskrcnn']
 34 |   backbone_type = maskrcnn_model_config['backbone_type']
 35 |   image_mean = maskrcnn_model_config['image_mean']
 36 |   image_std = maskrcnn_model_config['image_std']
 37 |   trainable_layers = maskrcnn_model_config['trainable_layers']
 38 | 
 39 |   # model 
 40 |   # backbone = ResNetFPN(pretrained_type)
 41 |   backbone = resnet_fpn_backbone(backbone_type, False, trainable_layers=trainable_layers)
 42 |   model = MaskRCNN(backbone, nclass, image_mean=image_mean, image_std=image_std)
 43 | 
 44 |   if pretrained_model_path != "" and public_model:
 45 |     model_dict = model.state_dict()
 46 |     pretrained_dict = torch.load(pretrained_model_path)
 47 |     remove_dict = ['roi_heads.box_predictor.cls_score.weight',
 48 |                    'roi_heads.box_predictor.cls_score.bias',
 49 |                    'roi_heads.box_predictor.bbox_pred.weight',
 50 |                    'roi_heads.box_predictor.bbox_pred.bias',
 51 |                    'roi_heads.mask_predictor.mask_fcn_logits.weight',
 52 |                    'roi_heads.mask_predictor.mask_fcn_logits.bias']
 53 |     pretrained_dict = {k:v for k, v in pretrained_dict.items() if ((k in model_dict) and (k not in remove_dict))}
 54 |     model_dict.update(pretrained_dict)
 55 |     model.load_state_dict(model_dict)
 56 |     print("load model from {}".format(pretrained_model_path))
 57 |     print("load parameters : {}".format(pretrained_dict.keys()))
 58 |     
 59 | 
 60 |   if use_gpu:
 61 |     model = model.cuda()
 62 |     model = torch.nn.DataParallel(model, device_ids=num_gpu)
 63 |     print("Finish cuda loading")
 64 | 
 65 |   if pretrained_model_path != "" and (not public_model):
 66 |     model_dict = model.state_dict()
 67 |     pretrained_dict = torch.load(pretrained_model_path)
 68 |     model_dict.update(pretrained_dict)
 69 |     model.load_state_dict(model_dict)
 70 |     print("load model from {}".format(pretrained_model_path))
 71 | 
 72 |   return model
 73 | 
 74 | def build_superpoint_model(configs, requires_grad=True):
 75 |   ## command line config
 76 |   num_gpu = configs['num_gpu']
 77 |   use_gpu = (len(num_gpu) > 0) and configs['use_gpu']
 78 |   pretrained_model_path = configs['superpoint_model_path']
 79 | 
 80 |   vgg_model = VGGNet(requires_grad=requires_grad)
 81 |   model = VggLike(vgg_model)
 82 |   
 83 |   # model = SuperPointNet()
 84 |   # if pretrained_model_path != "":
 85 |   #   model_dict = model.state_dict()
 86 |   #   pretrained_dict = torch.load(pretrained_model_path)
 87 |   #   model_dict.update(pretrained_dict)
 88 |   #   model.load_state_dict(model_dict)
 89 |   #   print("load model from {}".format(pretrained_model_path))
 90 | 
 91 | 
 92 |   if use_gpu:
 93 |     model = model.cuda()
 94 |     model = torch.nn.DataParallel(model, device_ids=num_gpu)
 95 |     print("Finish cuda loading")
 96 | 
 97 |   if pretrained_model_path != "":
 98 |     model_dict = model.state_dict()
 99 |     pretrained_dict = torch.load(pretrained_model_path)
100 |     model_dict.update(pretrained_dict)
101 |     model.load_state_dict(model_dict)
102 |     print("load model from {}".format(pretrained_model_path))
103 |   
104 |   return model
105 | 
106 | def build_gcn(configs):
107 |   num_gpu = configs['num_gpu']
108 |   use_gpu = (len(num_gpu) > 0)
109 |   gcn_config = configs['model']['gcn']
110 |   pretrained_model_path = configs['graph_model_path']
111 | 
112 |   model = ObjectDescriptor(gcn_config)
113 | 
114 |   if use_gpu:
115 |     model = model.cuda()
116 |     model = torch.nn.DataParallel(model, device_ids=num_gpu)
117 |     print("Finish cuda loading")
118 | 
119 |   if pretrained_model_path != "":
120 |     if use_gpu:  
121 |       model.load_state_dict(torch.load(pretrained_model_path))
122 |     else:
123 |       model.load_state_dict(torch.load(pretrained_model_path, map_location=torch.device('cpu')))
124 |     print("load model from {}".format(pretrained_model_path))
125 | 
126 |   return model


--------------------------------------------------------------------------------
/model/graph_models/attention.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import math
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | class GraphAtten(nn.Module):
 9 |   def __init__(self, nfeat, nhid, nout, alpha=0.2, nheads=8):
10 |     super(GraphAtten, self).__init__()
11 |     self.attns = [Attention(nfeat, nhid, alpha) for _ in range(nheads)]
12 |     for i, attention in enumerate(self.attns):
13 |         self.add_module('attention_{}'.format(i), attention)
14 | 
15 |     self.relu = nn.ReLU()
16 | 
17 |     self.merge = nn.Linear(nheads*nhid, nhid)
18 | 
19 |     self.mlp1 = nn.Linear((nfeat+nhid), (nfeat+nhid))
20 |     self.bn1 = nn.BatchNorm1d((nfeat+nhid))
21 |     self.mlp2 = nn.Linear((nfeat+nhid), nout)
22 |     self.bn2 = nn.BatchNorm1d(nout)
23 | 
24 |   def print_para(self, layer):
25 |     model_dict = self.state_dict()
26 |     para = model_dict[layer]
27 |     print("layer = {}".format(para))
28 | 
29 |   def forward(self, x):
30 |     m = torch.cat([attn(x) for attn in self.attns], dim=1)
31 |     m = self.relu(self.merge(m))
32 |     x = torch.cat([x, m], 1)
33 |     x = self.relu(self.bn1(self.mlp1(x)))
34 |     x = self.relu(self.bn2(self.mlp2(x)))
35 |     return x
36 | 
37 |     
38 | class Attention(nn.Module):
39 |   def __init__(self, in_features, out_features, alpha):
40 |     super(Attention, self).__init__()
41 |     self.tranq = nn.Linear(in_features, out_features)
42 |     self.trank = nn.Linear(in_features, out_features)
43 |     self.tranv = nn.Linear(in_features, out_features)
44 |     self.norm = nn.Sequential(nn.Softmax(dim=1))
45 |     self.leakyrelu = nn.LeakyReLU(alpha)
46 |     self.relu = nn.ReLU()
47 | 
48 |   def forward(self, x):
49 |     q = self.relu(self.tranq(x))   # n * dim
50 |     k = self.relu(self.trank(x))   # n * dim
51 |     v = self.relu(self.tranv(x))
52 | 
53 |     adj = torch.einsum('nd,dm->nm', q, k.t())  # n * n
54 |     adj = self.leakyrelu(adj)
55 |     adj = self.norm(adj)
56 | 
57 |     m = adj @ v
58 |     return m


--------------------------------------------------------------------------------
/model/graph_models/descriptor_loss.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function
 5 | import sys
 6 | sys.path.append('.')
 7 | import numpy as np
 8 | import torch
 9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 | 
12 | class DescriptorLoss(nn.Module):
13 |   '''
14 |   loss for object descriptor
15 |   '''
16 |   def __init__(self, config):
17 |     super().__init__()
18 |     self.config = config
19 | 
20 |   def forward(self, descs, conns):
21 |     '''
22 |     descs: N * D
23 |     conns: N * N
24 |     '''
25 |     similarity = torch.einsum('nd,dm->nm', descs, descs.t())  # N * N
26 | 
27 |     print(similarity)
28 | 
29 |     pos_idx = conns
30 |     pos_similarity = similarity * pos_idx
31 | 
32 |     neg_idx0 = torch.ones_like(conns) - conns
33 |     neg_idx0 = neg_idx0 - torch.eye(len(conns), device=conns.device, dtype=conns.dtype)
34 |     neg_similarity0 = similarity * neg_idx0
35 |     value, index = neg_similarity0.topk(1, largest=True)
36 |     value = value.repeat(1, similarity.shape[1])
37 |     neg_idx1 = (neg_similarity0 == value).float()
38 | 
39 |     zero = torch.tensor(0.0, dtype=similarity.dtype, device=similarity.device)
40 |     positive_dist = torch.max(zero, self.config['train']['positive_margin'] - similarity)
41 |     negative_dist = torch.max(zero, similarity - self.config['train']['negative_margin'])
42 | 
43 |     ploss = torch.sum(pos_idx * positive_dist) / torch.sum(pos_idx)
44 |     nloss = torch.sum(neg_idx1 * negative_dist) / torch.sum(neg_idx1)
45 | 
46 |     return ploss, nloss


--------------------------------------------------------------------------------
/model/graph_models/object_descriptor.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import math
  5 | import torch
  6 | import torch.nn as nn
  7 | 
  8 | from model.graph_models.attention import GraphAtten
  9 | 
 10 | class ObjectDescriptor(nn.Module):
 11 |   def __init__(self, config):
 12 |     super(ObjectDescriptor, self).__init__()
 13 |     points_encoder_dims = config['points_encoder_dims']
 14 |     descriptor_dim = config['descriptor_dim']
 15 |     nhid = config['hidden_dim']
 16 |     alpha = config['alpha']
 17 |     nheads = config['nheads']
 18 |     nout = config['nout']
 19 |     nfeat = descriptor_dim + points_encoder_dims[-1]
 20 |     self.points_encoder = PointsEncoder(points_encoder_dims)
 21 |     self.gcn = GCN(nfeat, nhid, nout, alpha, nheads)
 22 | 
 23 |   def forward(self, batch_points, batch_descs):
 24 |     '''
 25 |     inputs:
 26 |       batch_points: List[Tensor], normalized points, each tensor belong to a object
 27 |       batch_descs: List[Tensor]
 28 |     '''
 29 |     batch_features, locations = [], []
 30 |     for points, descs in zip(batch_points, batch_descs):
 31 |       encoded_points = self.points_encoder(points)
 32 |       features = torch.cat((descs, encoded_points), dim=1)
 33 |       features, w = self.gcn(features)
 34 |       batch_features.append(features)
 35 |       locations.append(w)
 36 |     batch_features = torch.stack(batch_features)
 37 |     batch_features = nn.functional.normalize(batch_features, p=2, dim=-1)
 38 |     locations = torch.cat(locations, 0)
 39 |     return batch_features, locations
 40 | 
 41 | 
 42 | class PointsEncoder(nn.Module):
 43 |   def __init__(self, dims):
 44 |     super(PointsEncoder, self).__init__()  
 45 |     layers = []
 46 |     for i in range(len(dims)-1):
 47 |       layers.append(nn.Linear(dims[i], dims[i+1]))
 48 |       if i != len(dims)-2:
 49 |         layers.append(nn.BatchNorm1d((dims[i+1])))
 50 |         layers.append(nn.ReLU())
 51 | 
 52 |     self.layers = layers
 53 |     for i, layer in enumerate(self.layers):
 54 |       self.add_module('point_encoder{}'.format(i), layer)
 55 | 
 56 |   def forward(self, x):
 57 |     for layer in self.layers:
 58 |       x = layer(x)
 59 |     x = nn.functional.normalize(x, p=2, dim=-1)
 60 |     return x
 61 | 
 62 | 
 63 | class GCN(nn.Module):
 64 |   def __init__(self, nfeat, nhid, nout, alpha=0.2, nheads=8):
 65 |     super(GCN, self).__init__()
 66 | 
 67 |     self.atten1 = GraphAtten(nfeat, nhid, nfeat, alpha, nheads)
 68 |     self.atten2 = GraphAtten(nfeat, nhid, nfeat, alpha, nheads)
 69 |     self.tran1 = nn.Linear(nfeat, nfeat)
 70 |     self.relu = nn.ReLU()
 71 |     self.sparsification = Sparsification(nfeat, nout)
 72 | 
 73 |   def forward(self, x):
 74 |     x = self.atten1(x)
 75 |     x = self.atten2(x)
 76 |     x = self.relu(self.tran1(x))
 77 |     x, w = self.sparsification(x)
 78 | 
 79 |     return x, w
 80 | 
 81 | 
 82 | class Sparsification(nn.Module):
 83 |   def __init__(self, input_dim, output_dim):
 84 |     super(Sparsification, self).__init__()
 85 | 
 86 |     self.relu = nn.ReLU()
 87 |     self.softmax = nn.Softmax(dim=-1)
 88 |     self.location_encoder1 = nn.Linear(input_dim, input_dim)
 89 |     self.location_encoder2 = nn.Linear(input_dim, output_dim)
 90 | 
 91 |     self.feature_encoder1 = nn.Linear(input_dim, input_dim)
 92 |     self.feature_encoder2 = nn.Linear(input_dim, output_dim)
 93 |     self.feature_encoder3 = nn.Linear(output_dim, output_dim)
 94 | 
 95 | 
 96 |   def forward(self, x):
 97 | 
 98 | 
 99 |     descriptor = self.relu(self.feature_encoder1(x))
100 |     descriptor = self.relu(self.feature_encoder2(descriptor))
101 | 
102 |     locations = self.relu(self.location_encoder1(x))
103 |     locations = self.relu(self.location_encoder2(locations))
104 |     norm_locations = nn.functional.normalize(locations, p=2, dim=-1)
105 | 
106 |     descriptor = locations * descriptor
107 |     descriptor = torch.sum(descriptor, 0)
108 |     descriptor = self.feature_encoder3(descriptor)
109 | 
110 |     return descriptor, norm_locations


--------------------------------------------------------------------------------
/model/inference.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | sys.path.append('.')
  7 | import os
  8 | import copy
  9 | import numpy as np
 10 | import cv2
 11 | import torch
 12 | import torch.distributed as dist
 13 | from torchvision import transforms
 14 | 
 15 | from utils.tools import tensor_to_numpy
 16 | from datasets.utils.preprocess import preprocess_validation_data
 17 | from datasets.utils import postprocess as post
 18 | from utils.tools import tensor_to_numpy
 19 | from datasets.utils.pipeline import makedir
 20 | 
 21 | def detection_inference(maskrcnn_model, superpoint_model, batch, use_gpu, gaussian_radius, detection_threshold,
 22 |     data_config, save_dir=None):
 23 |   with torch.no_grad():
 24 |     original_images = batch['image']
 25 |     original_images = [tensor_to_numpy(img.clone()) for img in original_images]
 26 | 
 27 |     # preprocess
 28 |     images, sizes, maskrcnn_targets, superpoint_targets = preprocess_validation_data(batch, 
 29 |         use_gpu, gaussian_radius, data_config)
 30 |     original_sizes = sizes['original_sizes']
 31 |     new_sizes = sizes['new_sizes']
 32 | 
 33 |     # model inference
 34 |     _, detections = maskrcnn_model(images, sizes) 
 35 |     points_output = superpoint_model(images) 
 36 | 
 37 |     # postprocess
 38 |     detections, points_output = post.postprocess(new_sizes, original_sizes, detection_threshold,
 39 |        detections, points_output)
 40 | 
 41 |     # save results
 42 |     if save_dir is not None:
 43 |       image_names = batch['image_name']
 44 |       results = post.save_detection_results(original_images, image_names, save_dir, detections,
 45 |          None, points_output, True, True)
 46 | 
 47 |   return points_output, detections, maskrcnn_targets
 48 | 
 49 | 
 50 | def maskrcnn_inference(model, batch, use_gpu, gaussian_radius, data_config, save_dir=None):
 51 |   with torch.no_grad():
 52 |     original_images = batch['image']
 53 |     original_images = [tensor_to_numpy(img.clone()) for img in original_images]
 54 | 
 55 |     # preprocess
 56 |     images, sizes, maskrcnn_targets, _ = preprocess_validation_data(batch, use_gpu, gaussian_radius, data_config)
 57 |     original_sizes = sizes['original_sizes']
 58 |     new_sizes = sizes['new_sizes']
 59 | 
 60 |     # model inference
 61 |     _, detections = model(images, sizes) 
 62 | 
 63 |     # postprocess
 64 |     detections, _ = post.postprocess(new_sizes, original_sizes, detections=detections)
 65 | 
 66 |     # save results
 67 |     if save_dir is not None:
 68 |       image_names = batch['image_name']
 69 |       results = post.save_detection_results(original_images, image_names, save_dir, detections, None, None, True, False)
 70 | 
 71 |   return detections, maskrcnn_targets
 72 | 
 73 | 
 74 | def superpoint_inference(model, batch, use_gpu, gaussian_radius, data_config, detection_threshold, save_dir=None):
 75 |   with torch.no_grad():
 76 |     original_images = batch['image']
 77 |     original_images = [tensor_to_numpy(img.clone()) for img in original_images]
 78 | 
 79 |     # preprocess
 80 |     images, sizes, maskrcnn_targets, superpoint_targets = preprocess_validation_data(batch, use_gpu, gaussian_radius, data_config)
 81 |     original_sizes = sizes['original_sizes']
 82 |     new_sizes = sizes['new_sizes']
 83 | 
 84 |     # model inference
 85 |     points_output = model(images) 
 86 | 
 87 |     # postprocess
 88 |     _, points_output = post.postprocess(new_sizes, original_sizes, detection_threshold, None, points_output)
 89 | 
 90 |     # save gt 
 91 |     if save_dir is not None:
 92 |       print("save_dir = {}".format(save_dir))
 93 |       save_dir_list = [os.path.join(save_dir, image_name) for image_name in batch['image_name']]
 94 |       for d in save_dir_list:
 95 |         makedir(d)
 96 |       images = copy.deepcopy(original_images)
 97 |       images = post.overlay_points(images, points_output)
 98 |       post.save_images(images, save_dir_list, "points")
 99 | 
100 |   return points_output, maskrcnn_targets, superpoint_targets


--------------------------------------------------------------------------------
/model/superpoint/superpoint_loss.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | sys.path.append('.')
  7 | import numpy as np
  8 | import torch
  9 | import torch.nn.functional as F
 10 | 
 11 | from datasets.utils.homographies import warp_points_bacth
 12 | 
 13 | 
 14 | def detector_loss(pred, heatmap, valid_mask):
 15 |   ''' 
 16 |   Modified focal loss. Exactly the same as CornerNet.
 17 |   Runs faster and costs a little bit more memory
 18 |   inputs:
 19 |     pred: batch * c * h * w
 20 |     heatmap: batch * c * h * w
 21 |     valid_mask: batch * c * h * w
 22 |   '''
 23 |   pos_inds = heatmap.eq(1).float()
 24 |   neg_inds = heatmap.lt(1).float()
 25 | 
 26 |   neg_weights = torch.pow(1 - heatmap, 4)
 27 | 
 28 |   loss = 0
 29 |   eps = 1e-7
 30 | 
 31 |   num_pos = pos_inds.float().sum()
 32 | 
 33 |   pos_loss = torch.log(pred + eps) * torch.pow(1 - pred, 2) * pos_inds * valid_mask
 34 |   neg_loss = torch.log(1 - pred + eps) * torch.pow(pred, 2) * neg_weights * neg_inds * valid_mask
 35 |  
 36 |   pos_loss = pos_loss.sum()
 37 |   neg_loss = neg_loss.sum()
 38 | 
 39 |   if num_pos == 0:
 40 |     loss = loss - neg_loss
 41 |   else:
 42 |     loss = loss - (pos_loss + neg_loss) / num_pos
 43 |   
 44 |   return loss
 45 | 
 46 | 
 47 | def descriptor_loss(descriptors, warped_descriptors, homographies,
 48 |                     valid_mask, warped_valid_mask, **config):
 49 |     # Compute the position of the center pixel of every cell in the image
 50 |     batch_size, Dc, Hc, Wc = descriptors.shape
 51 |     coord_cells = np.stack(np.meshgrid(range(Hc), range(Wc), indexing='ij'), axis=-1)
 52 |     coord_cells = coord_cells * config['cell'] + config['cell'] // 2 # (Hc, Wc, 2)
 53 |     coord_cells = coord_cells.astype(float)
 54 |     # coord_cells is now a grid containing the coordinates of the Hc * Wc
 55 |     # center pixels of the 8x8 cells of the image
 56 | 
 57 |     # Compute the position of the warped center pixels
 58 |     H_list = np.squeeze(homographies.cpu().numpy(), axis = 1) 
 59 |     warped_coord_cells = warp_points_bacth(H_list, np.reshape(coord_cells, [-1, 2]))
 60 |     # warped_coord_cells is now a list of the warped coordinates of all the center
 61 |     # pixels of the 8x8 cells of the image, shape (N, Hc * Wc, 2)
 62 | 
 63 |     # Compute the pairwise distances and filter the ones less than a threshold
 64 |     # The distance is just the pairwise norm of the difference of the two grids
 65 |     # Using shape broadcasting, cell_distances has shape (N, Hc, Wc, Hc, Wc)
 66 |     coord_cells = np.reshape(coord_cells, [1, 1, 1, Hc, Wc, 2])  # represent warped_image coord_cells
 67 |     warped_coord_cells = np.reshape(warped_coord_cells, [batch_size, Hc, Wc, 1, 1, 2]) # represent oridin image coord_cells
 68 | 
 69 |     cell_distances = coord_cells - warped_coord_cells
 70 | 
 71 |     cell_distances = np.linalg.norm(cell_distances, axis=-1)
 72 | 
 73 |     # s = np.less_equal(cell_distances, config['cell'] - 0.5).astype(float)
 74 |     # s = torch.tensor(s, dtype=descriptors.dtype, device=descriptors.device)
 75 |     s = (cell_distances <= config['cell'] - 0.5)
 76 |     s = torch.tensor(s, device=descriptors.device)
 77 |     # s[id_batch, h, w, h', w'] == 1 if the point of coordinates (h, w) warped by the
 78 |     # homography is at a distance from (h', w') less than config['cell']
 79 |     # and 0 otherwise
 80 | 
 81 |     # valid_mask
 82 |     normalization = torch.sum(warped_valid_mask).float()
 83 |     valid_mask = torch.nn.functional.interpolate(valid_mask.unsqueeze(1).float(), scale_factor=1.0/config['cell'], mode='bilinear')
 84 |     warped_valid_mask = torch.nn.functional.interpolate(warped_valid_mask.unsqueeze(1).float(), scale_factor=1.0/config['cell'], mode='bilinear')
 85 | 
 86 |     valid_mask = valid_mask.squeeze(1) > 0.5
 87 |     warped_valid_mask = warped_valid_mask.squeeze(1) > 0.5
 88 | 
 89 |     valid_mask = torch.reshape(valid_mask, [batch_size, Hc, Wc, 1, 1])
 90 |     warped_valid_mask = torch.reshape(warped_valid_mask, [batch_size, 1, 1, Hc, Wc])
 91 |     valid_mask = valid_mask * warped_valid_mask
 92 | 
 93 |     # Normalize the descriptors and
 94 |     # compute the pairwise dot product between descriptors: d^t * d'
 95 |     descriptors = descriptors.permute(0, 2, 3, 1) # B * C * H * W -> B * H * W *C
 96 |     descriptors = torch.reshape(descriptors, [batch_size, Hc, Wc, 1, 1, -1]) # B * Hc * Wc * 1 * 1 * 256
 97 |     descriptors = F.normalize(descriptors, dim=-1)
 98 | 
 99 |     warped_descriptors = warped_descriptors.permute(0, 2, 3, 1) # B * C * H * W -> B * H * W *C
100 |     warped_descriptors = torch.reshape(warped_descriptors, [batch_size, 1, 1, Hc, Wc, -1]) # B * 1 * 1 * Hc * Wc * 256
101 |     warped_descriptors = F.normalize(warped_descriptors, dim=-1)
102 | 
103 |     dot_product_desc = (warped_descriptors * descriptors).sum(dim=-1) # B * Hc * Wc * Hc * Wc
104 |     dot_product_desc = F.relu(dot_product_desc) # B * Hc * Wc * Hc * Wc
105 | 
106 |     zero = torch.tensor(0.0, dtype=descriptors.dtype, device=descriptors.device)
107 | 
108 |     positive_dist = torch.max(zero, config['train']['positive_margin'] - dot_product_desc)
109 |     negative_dist = torch.max(zero, dot_product_desc - config['train']['negative_margin'])
110 | 
111 |     loss = (config['train']['lambda_d'] * s * positive_dist + (~s) * negative_dist) * valid_mask
112 |     loss = torch.sum(loss)/normalization
113 | 
114 |     return loss
115 | 
116 | 
117 | class SuperPointLoss(torch.nn.Module):
118 |   '''
119 |   loss for magicpoint: detector loss
120 |   '''
121 |   def __init__(self, config):
122 |     super(SuperPointLoss, self).__init__()
123 |     self.detector_loss = detector_loss
124 |     self.descriptor_loss = descriptor_loss
125 |     self.config = config
126 | 
127 |   def forward(self, inputs, outputs):
128 |     loss = self.detector_loss(outputs['outputs']['prob'], inputs['ht'], inputs['valid_mask'])
129 |     loss_dict = {'points_loss': loss}
130 |     if 'warped_image' in inputs:
131 |       warped_loss = self.detector_loss(outputs['warped_outputs']['prob'], inputs['warped_ht'], inputs['warped_valid_mask'])
132 | 
133 |       loss_dict['warped_points_loss'] = warped_loss
134 |       loss = loss + warped_loss
135 |       if self.config['train']['add_descriptor']:
136 |         descriptor_loss = self.descriptor_loss(outputs['outputs']['desc_raw'], outputs['warped_outputs']['desc_raw'], 
137 |             inputs['H'], inputs['valid_mask'], inputs['warped_valid_mask'], **self.config)
138 |         loss_dict['descriptor_loss'] = descriptor_loss
139 |         loss = loss + self.config['train']['lambda_loss'] * descriptor_loss
140 | 
141 |     return loss, loss_dict
142 | 


--------------------------------------------------------------------------------
/model/superpoint/superpoint_public_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | import torch
 4 | import numpy as np
 5 | 
 6 | class SuperPointNet(torch.nn.Module):
 7 |   """ Pytorch definition of SuperPoint Network. """
 8 |   def __init__(self):
 9 |     super(SuperPointNet, self).__init__()
10 |     self.relu = torch.nn.ReLU(inplace=True)
11 |     self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2)
12 |     c1, c2, c3, c4, c5, d1 = 64, 64, 128, 128, 256, 256
13 |     # Shared Encoder.
14 |     self.conv1a = torch.nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1)
15 |     self.conv1b = torch.nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1)
16 |     self.conv2a = torch.nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1)
17 |     self.conv2b = torch.nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1)
18 |     self.conv3a = torch.nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1)
19 |     self.conv3b = torch.nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1)
20 |     self.conv4a = torch.nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1)
21 |     self.conv4b = torch.nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1)
22 |     # Detector Head.
23 |     self.convPa = torch.nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
24 |     self.convPb = torch.nn.Conv2d(c5, 65, kernel_size=1, stride=1, padding=0)
25 |     # Descriptor Head.
26 |     self.convDa = torch.nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
27 |     self.convDb = torch.nn.Conv2d(c5, d1, kernel_size=1, stride=1, padding=0)
28 | 
29 |   def forward(self, x):
30 |     """ Forward pass that jointly computes unprocessed point and descriptor
31 |     tensors.
32 |     Input
33 |       x: Image pytorch tensor shaped N x 1 x H x W.
34 |     Output
35 |       semi: Output point pytorch tensor shaped N x 65 x H/8 x W/8.
36 |       desc: Output descriptor pytorch tensor shaped N x 256 x H/8 x W/8.
37 |     """
38 |     # Shared Encoder.
39 |     if x.shape[1] > 1:
40 |       x = x[:, 0:1, :, :]
41 | 
42 |     x = self.relu(self.conv1a(x))
43 |     x = self.relu(self.conv1b(x))
44 |     x = self.pool(x)
45 |     x = self.relu(self.conv2a(x))
46 |     x = self.relu(self.conv2b(x))
47 |     x = self.pool(x)
48 |     x = self.relu(self.conv3a(x))
49 |     x = self.relu(self.conv3b(x))
50 |     x = self.pool(x)
51 |     x = self.relu(self.conv4a(x))
52 |     x = self.relu(self.conv4b(x))
53 |     # Detector Head.
54 |     cPa = self.relu(self.convPa(x))
55 |     semi = self.convPb(cPa)
56 | 
57 |     prob = torch.nn.functional.softmax(semi, dim=1)
58 |     prob = prob[:, :-1, :, :]
59 |     prob = torch.nn.functional.pixel_shuffle(prob, 8)
60 | 
61 |     # Descriptor Head.
62 |     cDa = self.relu(self.convDa(x))
63 |     desc_raw = self.convDb(cDa)
64 | 
65 |     desc = torch.nn.functional.interpolate(desc_raw, scale_factor=8, mode='bilinear')
66 |     desc = torch.nn.functional.normalize(desc, p=2, dim=1)
67 | 
68 |     return {'logits': semi, 'prob':prob, 'desc_raw': desc_raw, 'desc': desc}


--------------------------------------------------------------------------------
/model/superpoint/vgg_like.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.optim as optim
 6 | from torchvision import models
 7 | 
 8 | class VggLike(nn.Module):
 9 |   
10 |   def __init__(self, pretrained_net):
11 |     super(VggLike, self).__init__()
12 |     self.pretrained_net = pretrained_net
13 |     self.relu = nn.ReLU(inplace=True)
14 | 
15 |     c1, c2, h1, h2 = 256, 256, 65, 256
16 |     self.convPa = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1)
17 |     self.bnPa = nn.BatchNorm2d(c2)
18 |     self.convPb = nn.Conv2d(c2, h1, kernel_size=1, stride=1, padding=0)
19 |     self.bnPb = nn.BatchNorm2d(h1)
20 | 
21 |     self.convDa = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1)
22 |     self.bnDa = nn.BatchNorm2d(c2)
23 |     self.convDb = nn.Conv2d(c2, h2, kernel_size=1, stride=1, padding=0)
24 |     self.bnDb = nn.BatchNorm2d(h2)
25 | 
26 |   def forward(self, x):
27 |   
28 |     output = self.pretrained_net(x)
29 |     x3 = output['x3']
30 | 
31 |     cPa = self.bnPa(self.relu(self.convPa(x3)))
32 |     semi = self.bnPb(self.convPb(cPa))
33 | 
34 |     prob = nn.functional.softmax(semi, dim=1)
35 |     prob = prob[:, :-1, :, :]
36 |     prob = nn.functional.pixel_shuffle(prob, 8)
37 | 
38 |     # descriptor extraction
39 |     cDa = self.bnDa(self.relu(self.convDa(x3)))
40 |     desc_raw = self.bnDb(self.convDb(cDa))
41 |     desc = nn.functional.interpolate(desc_raw, scale_factor=8, mode='bilinear')
42 |     desc = nn.functional.normalize(desc, p=2, dim=1)
43 | 
44 |     return {'logits': semi, 'prob':prob, 'desc_raw': desc_raw, 'desc': desc}


--------------------------------------------------------------------------------
/structures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sair-lab/AirCode/6f7aaeafa3b6f8c762170431447568855601c684/structures/__init__.py


--------------------------------------------------------------------------------
/train_gcn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | import sys
  6 | sys.path.append('.')   
  7 | import datetime
  8 | import logging
  9 | import os
 10 | import time
 11 | import argparse
 12 | import yaml
 13 | 
 14 | import torch
 15 | import torch.distributed as dist
 16 | 
 17 | import torch.optim as optim
 18 | from torch.autograd import Variable
 19 | from torch.optim import lr_scheduler
 20 | from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
 21 | 
 22 | from datasets.utils.build_data import coco_loader
 23 | from datasets.utils import pipeline as pp
 24 | from model.build_model import build_maskrcnn, build_gcn
 25 | from datasets.utils.preprocess import warp_batch_data, match_points_clusters
 26 | from model.graph_models.descriptor_loss import DescriptorLoss
 27 | from model.build_model import build_superpoint_model
 28 | from model.inference import superpoint_inference
 29 | from model.backbone.fcn import VGGNet
 30 | from model.superpoint.vgg_like import VggLike
 31 | 
 32 | os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 33 | 
 34 | def train(configs):
 35 |   # read configs
 36 |   ## command line config
 37 |   use_gpu = configs['use_gpu']
 38 |   save_dir = configs['save_dir']
 39 |   data_root = configs['data_root']
 40 |   ## data cofig
 41 |   data_config = configs['data']
 42 |   data_aug_config = data_config['augmentation']
 43 |   # train_data_name = data_config['TRAIN']
 44 |   train_data_name = data_config['VAL']
 45 |   ## superpoint model config
 46 |   detection_threshold = configs['model']['superpoint']['detection_threshold']
 47 |   ## graph model config
 48 |   gcn_config = configs['model']['gcn']
 49 |   batch_szie = gcn_config['train']['batch_szie']
 50 |   epochs = gcn_config['train']['epochs']
 51 |   lr = gcn_config['train']['lr']
 52 |   momentum = gcn_config['train']['momentum']
 53 |   w_decay = gcn_config['train']['w_decay']
 54 |   milestones = gcn_config['train']['milestones']
 55 |   gamma = gcn_config['train']['gamma']
 56 |   checkpoint = gcn_config['train']['checkpoint']
 57 |   lambda_d = gcn_config['train']['lambda_d']
 58 |   weight_lambda = gcn_config['train']['weight_lambda']
 59 |   ## others
 60 |   configs['num_gpu'] = [0]
 61 |   configs['public_model'] = 0
 62 | 
 63 |   # data
 64 |   data_loader = coco_loader(data_root=data_root, name=train_data_name, config=data_config, 
 65 |       batch_size=batch_szie, remove_images_without_annotations=True)
 66 | 
 67 |   # model 
 68 |   superpoint_model = build_superpoint_model(configs, requires_grad=False)
 69 |   superpoint_model.eval()
 70 | 
 71 |   gcn_model = build_gcn(configs)
 72 |   gcn_model.train()
 73 | 
 74 |   # optimizer
 75 |   optimizer = optim.RMSprop(gcn_model.parameters(), lr=lr, momentum=momentum, weight_decay=w_decay)
 76 |   scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma)
 77 | 
 78 |   # loss
 79 |   criterion = DescriptorLoss(gcn_config)
 80 |   
 81 |   sum_iter = 0
 82 |   for _ in range(epochs):
 83 |     for _, batch in enumerate(data_loader):
 84 |       optimizer.zero_grad()
 85 |       original_images = batch['image']
 86 |       original_sizes = [list(img.shape[-2:]) for img in original_images]
 87 |       points_output, maskrcnn_targets, _ = superpoint_inference(
 88 |           superpoint_model, batch, use_gpu, 1, data_config, detection_threshold, save_dir=None)
 89 | 
 90 |       warped_batch = warp_batch_data(batch, data_config)
 91 |       warped_points_output, warped_maskrcnn_targets, _ = superpoint_inference(
 92 |           superpoint_model, warped_batch, use_gpu, 1, data_config, detection_threshold, save_dir=None)
 93 |       
 94 |       masks = maskrcnn_targets['masks']
 95 |       warped_masks = warped_maskrcnn_targets['masks']
 96 |       if 'gcn_mask' in data_aug_config:
 97 |         gcn_aug = data_aug_config['gcn_mask']
 98 |         if gcn_aug['enable']:
 99 |           masks = pp.mask_augmentation(masks, gcn_aug)
100 |           masks = torch.tensor(masks)
101 | 
102 |       batch_points, batch_descs, connections = match_points_clusters(points_output, masks, 
103 |           warped_points_output, warped_masks)
104 | 
105 |       if len(connections) < 2:
106 |         print("no object")
107 |         continue
108 |       
109 |       batch_points = [points.cuda() for points in batch_points]
110 |       batch_descs = [descs.cuda() for descs in batch_descs]
111 |       batch_object_descs, locations = gcn_model(batch_points, batch_descs)
112 |       connections = torch.stack(connections).cuda()
113 | 
114 |       # descriptor loss
115 |       ploss, nloss = criterion(batch_object_descs, connections)
116 | 
117 |       # location loss
118 |       locations_mean_loss = locations.mean()
119 |       location_sum = torch.sum(locations, 0) 
120 |       norm_locations_sum = torch.nn.functional.normalize(location_sum, p=2, dim=-1)
121 |       # locations_norm_loss = 1 - norm_locations_sum.mean()
122 |       zero = torch.tensor(0.0, dtype=norm_locations_sum.dtype, device=norm_locations_sum.device)
123 |       locations_norm_loss = torch.max(zero, 0.1 - norm_locations_sum.mean())
124 | 
125 |       loss = ploss * lambda_d + nloss + locations_mean_loss * weight_lambda[0] + locations_norm_loss * weight_lambda[1]
126 | 
127 |       loss.backward()
128 |       optimizer.step()
129 |       scheduler.step()
130 |       sum_iter = sum_iter + 1
131 | 
132 |       if sum_iter%1 == 0:
133 |         print("sum_iter = {}, loss = {}".format(sum_iter, loss.item()))        
134 |         print("ploss = {}, nloss = {}, locations_mean_loss = {}, locations_norm_loss = {}".format(
135 |             ploss.item(), nloss.item(), locations_mean_loss.item(), locations_norm_loss.item()))        
136 | 
137 |       if sum_iter % checkpoint == 0:
138 |         model_saving_path = os.path.join(save_dir, "gcn_model_{}.pth".format(sum_iter))
139 |         torch.save(gcn_model.state_dict(), model_saving_path)
140 |         print("saving model to {}".format(model_saving_path))
141 | 
142 | 
143 | def main():
144 |   parser = argparse.ArgumentParser(description="Training")
145 |   parser.add_argument(
146 |       "-c", "--config_file",
147 |       dest = "config_file",
148 |       type = str, 
149 |       default = ""
150 |   )
151 |   parser.add_argument(
152 |       "-g", "--gpu",
153 |       dest = "gpu",
154 |       type = int, 
155 |       default = 0 
156 |   )
157 |   parser.add_argument(
158 |       "-s", "--save_dir",
159 |       dest = "save_dir",
160 |       type = str, 
161 |       default = ""
162 |   )
163 |   parser.add_argument(
164 |       "-d", "--data_root",
165 |       dest = "data_root",
166 |       type = str, 
167 |       default = "" 
168 |   )
169 |   parser.add_argument(
170 |       "-sm", "--superpoint_model_path",
171 |       dest = "superpoint_model_path",
172 |       type = str, 
173 |       default = "" 
174 |   )
175 |   parser.add_argument(
176 |       "-gm", "--graph_model_path",
177 |       dest = "graph_model_path",
178 |       type = str, 
179 |       default = "" 
180 |   )
181 |   args = parser.parse_args()
182 | 
183 |   config_file = args.config_file
184 |   f = open(config_file, 'r', encoding='utf-8')
185 |   configs = f.read()
186 |   configs = yaml.load(configs)
187 |   configs['use_gpu'] = args.gpu
188 |   configs['save_dir'] = args.save_dir
189 |   configs['data_root'] = args.data_root
190 |   configs['superpoint_model_path'] = args.superpoint_model_path
191 |   configs['graph_model_path'] = args.graph_model_path
192 | 
193 |   train(configs)
194 | 
195 | if __name__ == "__main__":
196 |     main()


--------------------------------------------------------------------------------
/train_maskrcnn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import datetime
  3 | import logging
  4 | import os
  5 | import time
  6 | import argparse
  7 | import yaml
  8 | import copy
  9 | 
 10 | import torch
 11 | import torch.distributed as dist
 12 | 
 13 | import torch.optim as optim
 14 | from torch.autograd import Variable
 15 | from torch.optim import lr_scheduler
 16 | 
 17 | from model.mask_rcnn.mask_rcnn import MaskRCNN
 18 | from datasets.utils.build_data import coco_loader
 19 | from datasets.utils.preprocess import preprocess_maskrcnn_train_data
 20 | from model.build_model import build_maskrcnn
 21 |   
 22 | os.environ["CUDA_VISIBLE_DEVICES"] = "2, 3"
 23 | 
 24 | def train(configs):
 25 |   # read configs
 26 |   ## command line config
 27 |   use_gpu = configs['use_gpu']
 28 |   model_dir = configs['model_dir']
 29 |   data_root = configs['data_root']
 30 |   ## data cofig
 31 |   data_config = configs['data']
 32 |   train_data_name = data_config['TRAIN']
 33 |   ## model config
 34 |   model_config = configs['model']['maskrcnn']
 35 |   train_batch_size = model_config['batch_size']
 36 |   epochs = model_config['epochs']
 37 |   lr = model_config['lr']
 38 |   momentum = model_config['momentum']
 39 |   w_decay = model_config['w_decay']
 40 |   milestones = model_config['milestones']
 41 |   gamma = model_config['gamma']
 42 |   checkpoint = model_config['checkpoint']
 43 |   ## others
 44 |   configs['num_gpu'] = [0, 1]
 45 | 
 46 |   # data
 47 |   train_loader = coco_loader(
 48 |       data_root=data_root, name=train_data_name, config=data_config, batch_size=train_batch_size, 
 49 |       remove_images_without_annotations=True)
 50 | 
 51 |   # model 
 52 |   model = build_maskrcnn(configs)
 53 | 
 54 |   model.train()
 55 |   
 56 |   # optimizer
 57 |   optimizer = optim.RMSprop(model.parameters(), lr=lr, momentum=momentum, weight_decay=w_decay)
 58 |   scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma)
 59 | 
 60 |   sum_iter = 0
 61 |   for _ in range(epochs):
 62 |     for iter, batch in enumerate(train_loader):
 63 |       optimizer.zero_grad()
 64 |       images, sizes, maskrcnn_targets  = preprocess_maskrcnn_train_data(batch, use_gpu, data_config)
 65 |       result = model(images, sizes, maskrcnn_targets) 
 66 | 
 67 |       losses_dict = result[0]
 68 |       losses_dict_print = {}
 69 |       for k in losses_dict:
 70 |         losses_dict[k] = torch.sum(losses_dict[k])
 71 |         losses_dict_print[k] = losses_dict[k].cpu().item()
 72 | 
 73 |       losses = [losses_dict[k] for k in losses_dict.keys()]
 74 |       losses = sum(losses)
 75 |       losses.backward()
 76 |       optimizer.step()
 77 | 
 78 |       if iter%10 == 0:
 79 |         print("sum_iter = {}, loss = {}".format(sum_iter, losses.item()))
 80 |         print("loss_dict = {}".format(losses_dict_print))
 81 | 
 82 |       if sum_iter % checkpoint == 0:
 83 |         model_saving_path = os.path.join(model_dir, "maskrcnn_iter{}.pth".format(sum_iter))
 84 |         torch.save(model.state_dict(), model_saving_path)
 85 |         print("saving model to {}".format(model_saving_path))
 86 | 
 87 |       scheduler.step()  
 88 |       sum_iter += 1
 89 | 
 90 | def main():
 91 |   parser = argparse.ArgumentParser(description="Training")
 92 |   parser.add_argument(
 93 |       "-c", "--config_file",
 94 |       dest = "config_file",
 95 |       type = str, 
 96 |       default = ""
 97 |   )
 98 |   parser.add_argument(
 99 |       "-g", "--gpu",
100 |       dest = "gpu",
101 |       type = int, 
102 |       default = 0 
103 |   )
104 |   parser.add_argument(
105 |       "-s", "--save_dir",
106 |       dest = "save_dir",
107 |       type = str, 
108 |       default = ""
109 |   )
110 |   parser.add_argument(
111 |       "-d", "--data_root",
112 |       dest = "data_root",
113 |       type = str, 
114 |       default = "" 
115 |   )
116 |   parser.add_argument(
117 |       "-m", "--model_path",
118 |       dest = "pretrained_model_path",
119 |       type = str, 
120 |       default = "" 
121 |   )
122 |   parser.add_argument(
123 |       "-p", "--public_model",
124 |       dest = "public_model",
125 |       type = int, 
126 |       default = 0 
127 |   )
128 |   args = parser.parse_args()
129 | 
130 |   config_file = args.config_file
131 |   f = open(config_file, 'r', encoding='utf-8')
132 |   configs = f.read()
133 |   configs = yaml.load(configs)
134 |   configs['use_gpu'] = args.gpu
135 |   configs['model_dir'] = args.save_dir
136 |   configs['data_root'] = args.data_root
137 |   configs['maskrcnn_model_path'] = args.pretrained_model_path
138 |   configs['public_model'] = args.public_model
139 | 
140 |   train(configs)
141 | 
142 | if __name__ == "__main__":
143 |     main()
144 | 
145 | 
146 |     
147 | 


--------------------------------------------------------------------------------
/train_superpoint.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import datetime
  3 | import logging
  4 | import os
  5 | import time
  6 | import argparse
  7 | import yaml
  8 | import copy
  9 | 
 10 | import torch
 11 | import torch.distributed as dist
 12 | 
 13 | import torch.optim as optim
 14 | from torch.autograd import Variable
 15 | from torch.optim import lr_scheduler
 16 | from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
 17 | from torch.utils.data import DataLoader
 18 | 
 19 | from model.build_model import build_superpoint_model
 20 | from model.superpoint.superpoint_loss import SuperPointLoss
 21 | from datasets.utils.build_data import coco_loader
 22 | from datasets.synthetic.synthetic import SyntheticDataset
 23 | from datasets.utils.batch_collator import BatchCollator
 24 | from datasets.utils.preprocess import preprocess_superpoint_train_data
 25 | 
 26 | 
 27 | os.environ["CUDA_VISIBLE_DEVICES"] = "2, 3"
 28 | 
 29 | def update_gaussian_radius(gaussian_radius, iter, gaussian_gamma, gaussian_milestones):
 30 |   r = gaussian_radius
 31 |   if r < 0:
 32 |     return 1, gaussian_radius
 33 |   
 34 |   for i in range(len(gaussian_milestones)):
 35 |     if iter > gaussian_milestones[i]:
 36 |       r = r * gaussian_gamma
 37 |     else:
 38 |       break
 39 |   
 40 |   r = int(r)
 41 |   if r < 2:
 42 |     gaussian_radius = -1
 43 |   return r, gaussian_radius
 44 | 
 45 | def train(configs):
 46 |   # read configs
 47 |   ## command line config
 48 |   use_gpu = configs['use_gpu']
 49 |   model_dir = configs['model_dir']
 50 |   data_root = configs['data_root']
 51 |   ## data cofig
 52 |   data_config = configs['data']
 53 |   dataset_name = data_config['name']
 54 |   ## superpoint model config
 55 |   superpoint_model_config = configs['model']['superpoint']
 56 |   train_batch_size = superpoint_model_config['train']['batch_size']
 57 |   epochs = superpoint_model_config['train']['epochs']
 58 |   lr = superpoint_model_config['train']['lr']
 59 |   momentum = superpoint_model_config['train']['momentum']
 60 |   w_decay = superpoint_model_config['train']['w_decay']
 61 |   milestones = superpoint_model_config['train']['milestones']
 62 |   gamma = superpoint_model_config['train']['gamma']
 63 |   gaussian_region = superpoint_model_config['train']['gaussian_region']
 64 |   gaussian_radius = gaussian_region['radius']
 65 |   gaussian_gamma = gaussian_region['gamma']
 66 |   gaussian_milestones = gaussian_region['milestones']
 67 |   train_batch_size = superpoint_model_config['train']['batch_size']
 68 |   checkpoint = superpoint_model_config['train']['checkpoint']
 69 |   ## others
 70 |   configs['num_gpu'] = [0, 1]
 71 |  
 72 |   # data
 73 |   if 'coco' in dataset_name:
 74 |     train_data_name = data_config['TRAIN']
 75 |     train_loader = coco_loader(
 76 |         data_root=data_root, name=train_data_name, config=data_config, batch_size=train_batch_size, 
 77 |         remove_images_without_annotations=True)
 78 |   elif 'synthetic' in dataset_name:
 79 |     train_dataset = SyntheticDataset(data_root=data_root, use_for='training')
 80 |     sampler = torch.utils.data.sampler.RandomSampler(train_dataset)
 81 |     batch_sampler = torch.utils.data.sampler.BatchSampler(sampler=sampler, batch_size=train_batch_size, drop_last=True)
 82 |     collator = BatchCollator()
 83 |     train_loader = DataLoader(train_dataset, batch_sampler=batch_sampler, collate_fn=collator, num_workers=8)
 84 | 
 85 |   # model 
 86 |   model = build_superpoint_model(configs)
 87 |   model.train()
 88 |   
 89 |   # optimizer
 90 |   optimizer = optim.RMSprop(model.parameters(), lr=lr, momentum=momentum, weight_decay=w_decay)
 91 |   scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma)
 92 | 
 93 |   # loss
 94 |   criterion = SuperPointLoss(config=superpoint_model_config)
 95 | 
 96 |   sum_iter = 0
 97 |   r = gaussian_radius
 98 |   for _ in range(epochs):
 99 |     for iter, batch in enumerate(train_loader):
100 |       optimizer.zero_grad()
101 |       batch = preprocess_superpoint_train_data(batch, use_gpu, r, data_config)
102 | 
103 |       if use_gpu:
104 |         for key in batch:
105 |           if key == 'image_name':
106 |             continue
107 |           batch[key] = batch[key].cuda()
108 | 
109 |       outputs = model(batch['image'])
110 |       batch_outputs = {'outputs': outputs}
111 |       if 'warped_image' in batch:
112 |         warped_outputs = model(batch['warped_image'])
113 |         batch_outputs['warped_outputs'] = warped_outputs
114 | 
115 |       loss, loss_dict = criterion(batch, batch_outputs)
116 |       loss = loss / train_batch_size
117 | 
118 |       for k in loss_dict:
119 |         loss_dict[k] = loss_dict[k].cpu().item() / train_batch_size
120 | 
121 |       loss.backward()
122 |       optimizer.step()
123 |     
124 |       if iter%10 == 0:
125 |         print("sum_iter = {}, gaussian_radius={}, loss = {}".format(sum_iter, r, loss.item()))
126 |         
127 |       sum_iter += 1
128 |       r, gaussian_radius = update_gaussian_radius(gaussian_radius, sum_iter, gaussian_gamma, gaussian_milestones)
129 |       scheduler.step()
130 | 
131 |       if sum_iter % checkpoint == 0:
132 |         model_saving_path = os.path.join(model_dir, "superpoint_iter{}.pth".format(sum_iter))
133 |         torch.save(model.state_dict(), model_saving_path)
134 |         print("saving model to {}".format(model_saving_path))
135 | 
136 | 
137 | def main():
138 |   parser = argparse.ArgumentParser(description="Training")
139 |   parser.add_argument(
140 |       "-c", "--config_file",
141 |       dest = "config_file",
142 |       type = str, 
143 |       default = ""
144 |   )
145 |   parser.add_argument(
146 |       "-g", "--gpu",
147 |       dest = "gpu",
148 |       type = int, 
149 |       default = 0 
150 |   )
151 |   parser.add_argument(
152 |       "-s", "--save_dir",
153 |       dest = "save_dir",
154 |       type = str, 
155 |       default = ""
156 |   )
157 |   parser.add_argument(
158 |       "-d", "--data_root",
159 |       dest = "data_root",
160 |       type = str, 
161 |       default = "" 
162 |   )
163 |   parser.add_argument(
164 |       "-m", "--model_path",
165 |       dest = "pretrained_model_path",
166 |       type = str, 
167 |       default = "" 
168 |   )
169 |   args = parser.parse_args()
170 | 
171 |   config_file = args.config_file
172 |   f = open(config_file, 'r', encoding='utf-8')
173 |   configs = f.read()
174 |   configs = yaml.load(configs)
175 |   configs['use_gpu'] = args.gpu
176 |   configs['model_dir'] = args.save_dir
177 |   configs['data_root'] = args.data_root
178 |   configs['superpoint_model_path'] = args.pretrained_model_path
179 | 
180 |   train(configs)
181 | 
182 | if __name__ == "__main__":
183 |     main()
184 | 
185 | 
186 |     
187 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sair-lab/AirCode/6f7aaeafa3b6f8c762170431447568855601c684/utils/__init__.py


--------------------------------------------------------------------------------
/utils/checkpoint.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import logging
  3 | import os
  4 | 
  5 | import torch
  6 | 
  7 | from maskrcnn_benchmark.utils.model_serialization import load_state_dict
  8 | from maskrcnn_benchmark.utils.c2_model_loading import load_c2_format
  9 | from maskrcnn_benchmark.utils.imports import import_file
 10 | from maskrcnn_benchmark.utils.model_zoo import cache_url
 11 | 
 12 | 
 13 | class Checkpointer(object):
 14 |     def __init__(
 15 |         self,
 16 |         model,
 17 |         optimizer=None,
 18 |         scheduler=None,
 19 |         save_dir="",
 20 |         save_to_disk=None,
 21 |         logger=None,
 22 |     ):
 23 |         self.model = model
 24 |         self.optimizer = optimizer
 25 |         self.scheduler = scheduler
 26 |         self.save_dir = save_dir
 27 |         self.save_to_disk = save_to_disk
 28 |         if logger is None:
 29 |             logger = logging.getLogger(__name__)
 30 |         self.logger = logger
 31 | 
 32 |     def save(self, name, **kwargs):
 33 |         if not self.save_dir:
 34 |             return
 35 | 
 36 |         if not self.save_to_disk:
 37 |             return
 38 | 
 39 |         data = {}
 40 |         data["model"] = self.model.state_dict()
 41 |         if self.optimizer is not None:
 42 |             data["optimizer"] = self.optimizer.state_dict()
 43 |         if self.scheduler is not None:
 44 |             data["scheduler"] = self.scheduler.state_dict()
 45 |         data.update(kwargs)
 46 | 
 47 |         save_file = os.path.join(self.save_dir, "{}.pth".format(name))
 48 |         self.logger.info("Saving checkpoint to {}".format(save_file))
 49 |         torch.save(data, save_file)
 50 |         self.tag_last_checkpoint(save_file)
 51 | 
 52 |     def load(self, f=None, use_latest=True):
 53 |         if self.has_checkpoint() and use_latest:
 54 |             # override argument with existing checkpoint
 55 |             f = self.get_checkpoint_file()
 56 |         if not f:
 57 |             # no checkpoint could be found
 58 |             self.logger.info("No checkpoint found. Initializing model from scratch")
 59 |             return {}
 60 |         self.logger.info("Loading checkpoint from {}".format(f))
 61 |         checkpoint = self._load_file(f)
 62 |         self._load_model(checkpoint)
 63 |         if "optimizer" in checkpoint and self.optimizer:
 64 |             self.logger.info("Loading optimizer from {}".format(f))
 65 |             self.optimizer.load_state_dict(checkpoint.pop("optimizer"))
 66 |         if "scheduler" in checkpoint and self.scheduler:
 67 |             self.logger.info("Loading scheduler from {}".format(f))
 68 |             self.scheduler.load_state_dict(checkpoint.pop("scheduler"))
 69 | 
 70 |         # return any further checkpoint data
 71 |         return checkpoint
 72 | 
 73 |     def has_checkpoint(self):
 74 |         save_file = os.path.join(self.save_dir, "last_checkpoint")
 75 |         return os.path.exists(save_file)
 76 | 
 77 |     def get_checkpoint_file(self):
 78 |         save_file = os.path.join(self.save_dir, "last_checkpoint")
 79 |         try:
 80 |             with open(save_file, "r") as f:
 81 |                 last_saved = f.read()
 82 |                 last_saved = last_saved.strip()
 83 |         except IOError:
 84 |             # if file doesn't exist, maybe because it has just been
 85 |             # deleted by a separate process
 86 |             last_saved = ""
 87 |         return last_saved
 88 | 
 89 |     def tag_last_checkpoint(self, last_filename):
 90 |         save_file = os.path.join(self.save_dir, "last_checkpoint")
 91 |         with open(save_file, "w") as f:
 92 |             f.write(last_filename)
 93 | 
 94 |     def _load_file(self, f):
 95 |         return torch.load(f, map_location=torch.device("cpu"))
 96 | 
 97 |     def _load_model(self, checkpoint):
 98 |         load_state_dict(self.model, checkpoint.pop("model"))
 99 | 
100 | 
101 | class DetectronCheckpointer(Checkpointer):
102 |     def __init__(
103 |         self,
104 |         cfg,
105 |         model,
106 |         optimizer=None,
107 |         scheduler=None,
108 |         save_dir="",
109 |         save_to_disk=None,
110 |         logger=None,
111 |     ):
112 |         super(DetectronCheckpointer, self).__init__(
113 |             model, optimizer, scheduler, save_dir, save_to_disk, logger
114 |         )
115 |         self.cfg = cfg.clone()
116 | 
117 |     def _load_file(self, f):
118 |         # catalog lookup
119 |         if f.startswith("catalog://"):
120 |             paths_catalog = import_file(
121 |                 "maskrcnn_benchmark.config.paths_catalog", self.cfg.PATHS_CATALOG, True
122 |             )
123 |             catalog_f = paths_catalog.ModelCatalog.get(f[len("catalog://") :])
124 |             self.logger.info("{} points to {}".format(f, catalog_f))
125 |             f = catalog_f
126 |         # download url files
127 |         if f.startswith("http"):
128 |             # if the file is a url path, download it and cache it
129 |             cached_f = cache_url(f)
130 |             self.logger.info("url {} cached in {}".format(f, cached_f))
131 |             f = cached_f
132 |         # convert Caffe2 checkpoint from pkl
133 |         if f.endswith(".pkl"):
134 |             return load_c2_format(self.cfg, f)
135 |         # load native detectron.pytorch checkpoint
136 |         loaded = super(DetectronCheckpointer, self)._load_file(f)
137 |         if "model" not in loaded:
138 |             loaded = dict(model=loaded)
139 |         return loaded
140 | 


--------------------------------------------------------------------------------
/utils/cv2_util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for cv2 utility functions and maintaining version compatibility
 3 | between 3.x and 4.x
 4 | """
 5 | import cv2
 6 | 
 7 | 
 8 | def findContours(*args, **kwargs):
 9 |     """
10 |     Wraps cv2.findContours to maintain compatiblity between versions
11 |     3 and 4
12 | 
13 |     Returns:
14 |         contours, hierarchy
15 |     """
16 |     if cv2.__version__.startswith('4'):
17 |         contours, hierarchy = cv2.findContours(*args, **kwargs)
18 |     elif cv2.__version__.startswith('3'):
19 |         _, contours, hierarchy = cv2.findContours(*args, **kwargs)
20 |     else:
21 |         raise AssertionError(
22 |             'cv2 must be either version 3 or 4 to call this method')
23 | 
24 |     return contours, hierarchy
25 | 


--------------------------------------------------------------------------------
/utils/imports.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | if torch._six.PY3:
 4 |     import importlib
 5 |     import importlib.util
 6 |     import sys
 7 | 
 8 |     def import_file(module_name, file_path, make_importable=False):
 9 |         spec = importlib.util.spec_from_file_location(module_name, file_path)
10 |         module = importlib.util.module_from_spec(spec)
11 |         spec.loader.exec_module(module)
12 |         if make_importable:
13 |             sys.modules[module_name] = module
14 |         return module
15 | else:
16 |     import imp
17 | 
18 |     def import_file(module_name, file_path, make_importable=None):
19 |         module = imp.load_source(module_name, file_path)
20 |         return module
21 | 


--------------------------------------------------------------------------------
/utils/tools.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import numpy as np
 3 | import importlib
 4 | import cv2
 5 | 
 6 | def get_module(path, name):
 7 |   if path == '':
 8 |       mod = importlib.import_module(name)
 9 |   else:
10 |       mod = importlib.import_module('{}.{}'.format(path, name))
11 |   return getattr(mod, name)
12 | 
13 | def tensor_to_numpy(image):
14 |   img = image.data.cpu().numpy()
15 |   img = img.transpose(1, 2, 0)
16 |   img = (img * 255.0 + 0.5).astype(np.uint8)
17 |   img = np.clip(img, 0, 255)
18 |   if img.shape[2] == 1:
19 |     img = cv2.merge([img, img, img])
20 |   else:
21 |     img = img.copy()
22 |   return img
23 | 
24 | 
25 | def dict_update(d, u):
26 |     """Improved update for nested dictionaries.
27 | 
28 |     Arguments:
29 |         d: The dictionary to be updated.
30 |         u: The update dictionary.
31 | 
32 |     Returns:
33 |         The updated dictionary.
34 |     """
35 |     for k, v in u.items():
36 |         if isinstance(v, collections.Mapping):
37 |             d[k] = dict_update(d.get(k, {}), v)
38 |         else:
39 |             d[k] = v
40 |     return d


--------------------------------------------------------------------------------
/validate_detection.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import datetime
  3 | import logging
  4 | import os
  5 | import time
  6 | import argparse
  7 | import yaml
  8 | 
  9 | import torch
 10 | import torch.distributed as dist
 11 | 
 12 | import torch.optim as optim
 13 | from torch.autograd import Variable
 14 | from torch.optim import lr_scheduler
 15 | 
 16 | from model.mask_rcnn.mask_rcnn import MaskRCNN
 17 | from datasets.utils.build_data import coco_loader
 18 | from model.build_model import build_maskrcnn, build_superpoint_model
 19 | from model.inference import detection_inference
 20 | 
 21 | os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 22 | 
 23 | def validate(configs):
 24 |   # read configs
 25 |   ## command line config
 26 |   use_gpu = configs['use_gpu']
 27 |   save_dir = configs['save_dir']
 28 |   data_root = configs['data_root']
 29 |   ## data cofig
 30 |   data_config = configs['data']
 31 |   val_data_name = data_config['VAL']
 32 |   ## superpoint model config
 33 |   superpoint_model_config = configs['model']['superpoint']
 34 |   detection_threshold = superpoint_model_config['detection_threshold']
 35 |   val_batch_size = superpoint_model_config['batch_size']
 36 |   gaussian_radius = 2
 37 |   ## others
 38 |   configs['num_gpu'] = [0]
 39 |   configs['public_model'] = 0
 40 | 
 41 |   # data
 42 |   val_loader = coco_loader(data_root=data_root, name=val_data_name, config=data_config, 
 43 |       batch_size=val_batch_size, remove_images_without_annotations=True)
 44 | 
 45 |   # model 
 46 |   maskrcnn_model = build_maskrcnn(configs)
 47 |   superpoint_model = build_superpoint_model(configs)
 48 | 
 49 |   with torch.no_grad():
 50 |     maskrcnn_model.eval()    
 51 |     for iter, batch in enumerate(val_loader):
 52 |       result = detection_inference(maskrcnn_model, superpoint_model, batch, use_gpu, gaussian_radius,
 53 |           detection_threshold, data_config, save_dir)
 54 | 
 55 | def main():
 56 |   parser = argparse.ArgumentParser(description="Training")
 57 |   parser.add_argument(
 58 |       "-c", "--config_file",
 59 |       dest = "config_file",
 60 |       type = str, 
 61 |       default = ""
 62 |   )
 63 |   parser.add_argument(
 64 |       "-g", "--gpu",
 65 |       dest = "gpu",
 66 |       type = int, 
 67 |       default = 0 
 68 |   )
 69 |   parser.add_argument(
 70 |       "-s", "--save_dir",
 71 |       dest = "save_dir",
 72 |       type = str, 
 73 |       default = ""
 74 |   )
 75 |   parser.add_argument(
 76 |       "-d", "--data_root",
 77 |       dest = "data_root",
 78 |       type = str, 
 79 |       default = "" 
 80 |   )
 81 |   parser.add_argument(
 82 |       "-mm", "--maskrcnn_model_path",
 83 |       dest = "maskrcnn_model_path",
 84 |       type = str, 
 85 |       default = "" 
 86 |   )
 87 |   parser.add_argument(
 88 |       "-sm", "--superpoint_model_path",
 89 |       dest = "superpoint_model_path",
 90 |       type = str, 
 91 |       default = "" 
 92 |   )
 93 |   args = parser.parse_args()
 94 | 
 95 |   config_file = args.config_file
 96 |   f = open(config_file, 'r', encoding='utf-8')
 97 |   configs = f.read()
 98 |   configs = yaml.load(configs)
 99 |   configs['use_gpu'] = args.gpu
100 |   configs['save_dir'] = args.save_dir
101 |   configs['data_root'] = args.data_root
102 |   configs['maskrcnn_model_path'] = args.maskrcnn_model_path
103 |   configs['superpoint_model_path'] = args.superpoint_model_path
104 | 
105 |   validate(configs)
106 | 
107 | if __name__ == "__main__":
108 |     main()
109 | 
110 | 
111 |     


--------------------------------------------------------------------------------
/validate_gcn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import datetime
  3 | import logging
  4 | import os
  5 | import time
  6 | import argparse
  7 | import yaml
  8 | 
  9 | import torch
 10 | import torch.distributed as dist
 11 | 
 12 | from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
 13 | 
 14 | from datasets.utils.build_data import coco_loader
 15 | from model.build_model import build_maskrcnn, build_gcn
 16 | from datasets.utils.preprocess import warp_batch_data, match_points_clusters
 17 | from validate import maskrcnn_inference
 18 | from model.graph_models.descriptor_loss import DescriptorLoss
 19 | 
 20 | 
 21 | def calculate_f1(maskrcnn_model, gcn_model, loader, configs):
 22 |   with torch.no_grad():
 23 |     maskrcnn_model.eval()
 24 |     gcn_model.eval()
 25 | 
 26 |     ## data cofig
 27 |     data_config = configs['data']
 28 |     ## superpoint model config
 29 |     superpoint_model_config = configs['model']['superpoint']
 30 |     detection_threshold = superpoint_model_config['eval']['detection_threshold']
 31 | 
 32 |     precisions, recalls, weights = [], [], []
 33 | 
 34 |     for iter, batch in enumerate(loader): 
 35 |       optimizer.zero_grad()
 36 |       original_images = batch['image']
 37 |       original_sizes = [list(img.shape[-2:]) for img in original_images]
 38 |       _, points_output, maskrcnn_targets, _ = maskrcnn_inference(
 39 |           maskrcnn_model, batch, use_gpu, 1, data_config, detection_threshold)
 40 | 
 41 |       warped_batch = warp_batch_data(batch, data_config)
 42 |       _, warped_points_output, warped_maskrcnn_targets, _ = maskrcnn_inference(
 43 |           maskrcnn_model, warped_batch, use_gpu, 1, data_config, detection_threshold)
 44 |       
 45 |       batch_points, batch_descs, connections = match_points_clusters(points_output, maskrcnn_targets['masks'], 
 46 |           warped_points_output, warped_maskrcnn_targets['masks'])
 47 | 
 48 |       if len(connections) < 2:
 49 |         print("no object")
 50 |         continue
 51 |       
 52 |       batch_points = [points.cuda() for points in batch_points]
 53 |       batch_descs = [descs.cuda() for descs in batch_descs]
 54 |       batch_object_descs = gcn_model(batch_points, batch_descs)
 55 |       connections = torch.stack(connections).cuda()
 56 | 
 57 |       distances = torch.einsum('nd,dm->nm', descs, descs.t())  # N * N
 58 |       good_matchs = (distances > dist_thr).float() 
 59 |       num_correct_matches = torch.sum(good_matchs * connections)
 60 |       num_connections = torch.sum(connections)
 61 | 
 62 |       recall = num_correct_matches / num_connections
 63 |       precision = num_correct_matches / torch.sum(good_matchs)
 64 | 
 65 |       recalls.append(recall)
 66 |       precisions.append(precision)
 67 |       weights.append(num_connections)
 68 | 
 69 |     if(len(weights) == 0):
 70 |       return 0., 0., 0.
 71 | 
 72 |     recalls = torch.tensor(recalls)
 73 |     precisions = torch.tensor(precisions)
 74 |     weights = torch.tensor(weights)
 75 | 
 76 |     total_number = torch.sum(weights)
 77 |     aver_recall = torch.sum(recalls * weights) / total_number
 78 |     aver_precision = torch.sum(precisions * weights) / total_number
 79 |     aver_f1 = 0. if (aver_recall + aver_precision) = 0 else aver_recall * aver_precision / (aver_recall + aver_precision)
 80 | 
 81 |     return aver_recall, aver_precision, aver_f1
 82 | 
 83 | 
 84 | def validate(configs):
 85 |   # read configs
 86 |   ## command line config
 87 |   use_gpu = configs['use_gpu']
 88 |   save_dir = configs['save_dir']
 89 |   data_root = configs['data_root']
 90 |   ## data cofig
 91 |   data_config = configs['data']
 92 |   validation_data_name = data_config['VAL']
 93 |   ## superpoint model config
 94 |   superpoint_model_config = configs['model']['superpoint']
 95 |   detection_threshold = superpoint_model_config['eval']['detection_threshold']
 96 |   ## graph model config
 97 |   gcn_config = configs['model']['gcn']
 98 |   batch_szie = gcn_config['train']['batch_szie']
 99 |   ## others
100 |   configs['num_gpu'] = [0]
101 |   configs['public_model'] = 0
102 | 
103 |   # data
104 |   data_loader = coco_loader(data_root=data_root, name=validation_data_name, config=data_config, 
105 |       batch_size=batch_szie, remove_images_without_annotations=True)
106 | 
107 |   # model 
108 |   maskrcnn_model = build_maskrcnn(configs)
109 |   gcn_model = build_gcn(configs)
110 | 
111 |   recall, precision, f1 = calculate_f1(maskrcnn_model, gcn_model, data_loader, configs)
112 |   print("recall = {}, precision = {}, f1 = {}".format(recall, precision, f1))
113 | 
114 | 
115 | def main():
116 |   parser = argparse.ArgumentParser(description="Validation")
117 |   parser.add_argument(
118 |       "-c", "--config_file",
119 |       dest = "config_file",
120 |       type = str, 
121 |       default = ""
122 |   )
123 |   parser.add_argument(
124 |       "-g", "--gpu",
125 |       dest = "gpu",
126 |       type = int, 
127 |       default = 0 
128 |   )
129 |   parser.add_argument(
130 |       "-s", "--save_dir",
131 |       dest = "save_dir",
132 |       type = str, 
133 |       default = ""
134 |   )
135 |   parser.add_argument(
136 |       "-d", "--data_root",
137 |       dest = "data_root",
138 |       type = str, 
139 |       default = "" 
140 |   )
141 |   parser.add_argument(
142 |       "-mm", "--maskrcnn_model_path",
143 |       dest = "maskrcnn_model_path",
144 |       type = str, 
145 |       default = "" 
146 |   )
147 |   parser.add_argument(
148 |       "-gm", "--graph_model_path",
149 |       dest = "graph_model_path",
150 |       type = str, 
151 |       default = "" 
152 |   )
153 |   args = parser.parse_args()
154 | 
155 |   config_file = args.config_file
156 |   f = open(config_file, 'r', encoding='utf-8')
157 |   configs = f.read()
158 |   configs = yaml.load(configs)
159 |   configs['use_gpu'] = args.gpu
160 |   configs['save_dir'] = args.save_dir
161 |   configs['data_root'] = args.data_root
162 |   configs['pretrained_model_path'] = args.maskrcnn_model_path
163 |   configs['graph_model_path'] = args.graph_model_path
164 | 
165 |   validate(configs)
166 | 
167 | if __name__ == "__main__":
168 |     main()
169 | 


--------------------------------------------------------------------------------