├── .gitignore ├── LICENSE ├── README.md ├── config ├── compare_tracking.yaml ├── experiment_place_recognition.yaml ├── experiment_tracking.yaml ├── tracking_compare_plot.yaml ├── train_gcn_coco.yaml ├── train_maskrcnn_coco.yaml ├── train_superpoint_coco.yaml ├── train_superpoint_synthetic.yaml ├── validate_detection.yaml └── validate_gcn.yaml ├── datasets ├── coco │ ├── coco.py │ ├── coco_cat.txt │ └── paths_catalog.py ├── evaluation │ └── coco │ │ ├── __init__.py │ │ └── coco_eval.py ├── kitti │ ├── kitti_odomery.py │ └── kitti_tracking.py ├── otb │ └── otb_tracking.py ├── synthetic │ └── synthetic.py ├── utils │ ├── __init__.py │ ├── augmentation_legacy.py │ ├── batch_collator.py │ ├── build_data.py │ ├── gcn_mask_augmentation.py │ ├── homographies.py │ ├── pipeline.py │ ├── postprocess.py │ ├── preprocess.py │ └── transforms.py └── vot │ └── vot_tracking.py ├── debug_tools ├── command.txt ├── draw_points.py ├── object_tracking.py ├── show_batch.py ├── show_detections.py ├── show_match.py ├── show_points_detection.py ├── test_batch_H.py └── test_data_process.py ├── experiments ├── compare_tracking.py ├── demo │ ├── kitti-relocalization.gif │ ├── object-matching1.gif │ └── object-matching2.gif ├── object_tracking │ ├── object_tracking.py │ └── single_object_tracking.py ├── place_recogination │ ├── offline_process.py │ ├── offline_topK.py │ └── online_relocalization.py ├── show_object_matching │ ├── draw_object.py │ └── show_object_matching.py └── utils │ └── utils.py ├── model ├── backbone │ ├── fcn.py │ └── resnet_fpn.py ├── build_model.py ├── graph_models │ ├── attention.py │ ├── descriptor_loss.py │ └── object_descriptor.py ├── inference.py ├── mask_rcnn │ ├── mask_rcnn.py │ └── transform.py └── superpoint │ ├── superpoint_loss.py │ ├── superpoint_public_model.py │ └── vgg_like.py ├── structures ├── __init__.py └── segmentation_mask.py ├── train_gcn.py ├── train_maskrcnn.py ├── train_superpoint.py ├── utils ├── __init__.py ├── checkpoint.py ├── cv2_util.py ├── imports.py └── tools.py ├── validate_detection.py └── validate_gcn.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # Dataset 107 | CityScapes/ 108 | Lane/ 109 | 110 | # Generated 111 | scores/ 112 | 113 | # tmp files 114 | *.pyc 115 | runs/ 116 | *.pickle 117 | *.pth 118 | saving/* 119 | *.swp 120 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023, SAIR Lab 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AirCode 2 | 3 | Xu, Kuan, [Chen Wang](https://chenwang.site), Chao Chen, Wei Wu, and Sebastian Scherer. "["AirCode: A Robust Object Encoding Method"](https://arxiv.org/abs/2105.00327)." IEEE Robotics and Automation Letters (2022). (Accepted to ICRA 2022) 4 | 5 | ## Demo 6 | Object matching comparison when the objects are non-rigid and the view is changed, left is the result of our method while right is the result of NetVLAD 7 | 8 | ![](experiments/demo/object-matching1.gif) ![](experiments/demo/object-matching2.gif) 9 | 10 | Relocalization on KITTI datasets 11 | 12 | ![](experiments/demo/kitti-relocalization.gif) 13 | 14 | 15 | ## Dependencies 16 | * Python 3.7 17 | * Torchvision 0.8.0 18 | * PyTorch 1.7.0 19 | * OpenCV 4.4.0 20 | * Matplotlib 3.3.3 21 | * NumPy 1.19.2 22 | * Pyyaml 5.3.1 23 | 24 | 25 | ## Data 26 | Four datasets are used in our experiments. 27 | 28 | ### KITTI Odometry 29 | For relocalization experiment. Three sequences are selected, and they are "00", "05" and "06". 30 | 31 | ### KITTI Tracking 32 | For multi-object matching experiment. Four sequences are selected, and they are "0002", "0003", "0006", "0010". 33 | 34 | ### VOT Datasets 35 | For single-object matching experiment. We select three sequences from VOT2019 datasets and they are "bluecar", "bus6" and "humans_corridor_occ_2_A", because the tracked objects in these sequences are included in coco datasets, which are the data we used to train mask-rcnn. 36 | 37 | ### OTB Datasets 38 | For single-object matching experiment. We select five sequences and they are "BlurBody", "BlurCar2", "Human2", "Human7" and "Liquor". 39 | 40 | 41 | ## Examples 42 | 43 | ### Relocalization on KITTI Datasets 44 | 45 | 1. Extract object descrptors 46 | ``` 47 | python experiments/place_recogination/online_relocalization.py -c config/experiment_tracking.yaml -g 1 -s PATH_TO_SAVE_MIDDLE_RESULTS -d PATH_TO_DATASET -m PATH_TO_MODELS 48 | ``` 49 | 50 | 2. Compute precision-recall curves 51 | ``` 52 | python experiments/place_recogination/offline_process.py -c config/experiment_place_recognization.yaml -d PATH_TO_DATASET -n PATH_TO_MIDDLE_RESULTS -s PATH_TO_SAVE_RESULTS 53 | ``` 54 | 55 | 3. Compute top-K relocalization results 56 | ``` 57 | python experiments/place_recogination/offline_topK.py -c config/experiment_place_recognization.yaml -d PATH_TO_DATASET -n PATH_TO_MIDDLE_RESULTS -s PATH_TO_SAVE_RESULTS 58 | ``` 59 | 60 | ### Object Matching on OTB, VOT or KITTI Tracking Datasets 61 | 62 | * Run multi-object matching experiment in KITTI Tracking Datasets 63 | Modify the [config file](config/experiment_tracking.yaml) and run 64 | ``` 65 | python experiments/object_tracking/object_tracking.py -c config/experiment_tracking.yaml -g 1 -s PATH_TO_SAVE_RESULTS -d PATH_TO_DATASET -m PATH_TO_MODELS 66 | ``` 67 | 68 | * Run single-object matching experiment in OTB or VOT Datasets 69 | Modify the [config file](config/experiment_tracking.yaml) and run 70 | ``` 71 | python experiments/object_tracking/single_object_tracking.py -c config/experiment_tracking.yaml -g 1 -s PATH_TO_SAVE_RESULTS -d PATH_TO_DATASET -m PATH_TO_MODELS 72 | ``` 73 | 74 | ### Pretrained Models 75 | 76 | * Go to [this link](https://github.com/sair-lab/AirCode/releases/tag/v2.0.0). 77 | -------------------------------------------------------------------------------- /config/compare_tracking.yaml: -------------------------------------------------------------------------------- 1 | intervals: [1, 3, 5] # select lines, the number represent the interval 2 | title: kitti # figure title 3 | colors: ['green', 'red', 'blue', 'yellow', 'darkviolet', 'sandybrown'] # lines colors 4 | linewidth: 3 # line width 5 | xlabel: recall # x-axis name 6 | ylabel: precision # y-axis name 7 | fontsize: 20 # font size 8 | figsize: (10, 10) # fingure size, inch 9 | dpi: 100 # dots per inch 10 | -------------------------------------------------------------------------------- /config/experiment_place_recognition.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: 'kitti' 3 | nclass: 81 4 | normal_size: [384, 1280] # min_size, max_size 5 | cache_in_memory: false 6 | validation_size: 96 7 | model: 8 | superpoint: 9 | cell : 8 10 | detection_threshold: 0.2 11 | maskrcnn: 12 | add_maskrcnn: true 13 | trainable_layers: 5 # backbone trainable layers 14 | fix_backbone: true 15 | backbone_type: 'resnet50' 16 | image_mean: [0.45, 0.45, 0.45] 17 | image_std: [0.225, 0.225, 0.225] 18 | gcn: 19 | descriptor_dim: 256 20 | points_encoder_dims: [2, 4, 8, 16] 21 | hidden_dim: 512 22 | dropout: 0 23 | alpha: 0.2 24 | nheads: 4 25 | nout: 2048 -------------------------------------------------------------------------------- /config/experiment_tracking.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | ### kitti datasets 3 | name: 'kitti' 4 | normal_size: [384, 1280] # min_size, max_size 5 | ### OTB or VOT datasets 6 | # name: 'otb' 7 | # normal_size: [384, 1280] # min_size, max_size 8 | ### 9 | nclass: 81 10 | cache_in_memory: false 11 | validation_size: 96 12 | model: 13 | superpoint: 14 | cell : 8 15 | detection_threshold: 0.2 16 | maskrcnn: 17 | add_maskrcnn: true 18 | trainable_layers: 5 # backbone trainable layers 19 | fix_backbone: true 20 | backbone_type: 'resnet50' 21 | image_mean: [0.45, 0.45, 0.45] 22 | image_std: [0.225, 0.225, 0.225] 23 | gcn: 24 | descriptor_dim: 256 25 | points_encoder_dims: [2, 4, 8, 16] 26 | hidden_dim: 512 27 | dropout: 0 28 | alpha: 0.2 29 | nheads: 4 30 | nout: 2048 -------------------------------------------------------------------------------- /config/tracking_compare_plot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sair-lab/AirCode/6f7aaeafa3b6f8c762170431447568855601c684/config/tracking_compare_plot.yaml -------------------------------------------------------------------------------- /config/train_gcn_coco.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: 'coco' 3 | nclass: 81 4 | # TRAIN: 'coco_2014_train' 5 | TRAIN: 'coco_2014_train' 6 | VAL: 'coco_2014_minival' 7 | TEST: 'coco_2014_valminusminival' 8 | normal_size: [320, 320] # min_size, max_size 9 | cache_in_memory: false 10 | validation_size: 96 11 | augmentation: 12 | photometric: 13 | enable: true 14 | primitives: [ 15 | 'random_brightness', 'random_contrast', 'additive_speckle_noise', 16 | 'additive_gaussian_noise', 'add_shade', 'motion_blur'] 17 | params: 18 | random_brightness: {max_change: 50} 19 | random_contrast: {max_change: [0.5, 1.5]} 20 | additive_gaussian_noise: {std: [0, 10]} 21 | additive_speckle_noise: {intensity: 5} 22 | add_shade: 23 | amplitude: [-0.5, 0.5] 24 | kernel_size_interval: [100, 150] 25 | motion_blur: {max_ksize: 3} 26 | homographic: 27 | enable: false # not implemented 28 | gcn_mask: 29 | enable: false 30 | primitives: [ 31 | 'erode', 'dilate', 'random_region_zero', 'random_block_zero', 32 | 'random_block_one'] 33 | params: 34 | erode: {kernel_size: 10} 35 | dilate: {kernel_size: 10} 36 | random_region_zero: 37 | scale_x: 0.3 38 | scale_y: 0.3 39 | random_block_zero: 40 | num: 5 41 | size: 10 42 | random_block_one: 43 | num: 5 44 | size: 10 45 | warped_pair: 46 | enable: false 47 | params: 48 | translation: true 49 | rotation: true 50 | scaling: true 51 | perspective: true 52 | scaling_amplitude: 0.2 53 | perspective_amplitude_x: 0.2 54 | perspective_amplitude_y: 0.2 55 | patch_ratio: 0.85 56 | max_angle: 1.57 57 | allow_artifacts: true 58 | valid_border_margin: 3 59 | model: 60 | superpoint: 61 | cell : 8 62 | detection_threshold: 0.2 63 | mask_rcnn: 64 | add_maskrcnn: true 65 | trainable_layers: 5 # backbone trainable layers 66 | fix_backbone: true 67 | backbone_type: 'resnet50' 68 | image_mean: [0.45, 0.45, 0.45] 69 | image_std: [0.225, 0.225, 0.225] 70 | gcn: 71 | descriptor_dim: 256 72 | points_encoder_dims: [2, 4, 8, 16] 73 | hidden_dim: 512 74 | dropout: 0 75 | alpha: 0.2 76 | nheads: 4 77 | nout: 2048 78 | train: 79 | batch_szie: 16 80 | positive_margin: 1 81 | negative_margin: 0.2 82 | lambda_d: 0.5 83 | epochs : 100 84 | lr : 0.0001 85 | momentum : 0 86 | w_decay : 0.00001 87 | milestones : [1000, 2000, 5000, 10000, 15000] # iter 88 | gamma : 0.3 89 | checkpoint: 1000 90 | weight_lambda: [0.1, 10] 91 | -------------------------------------------------------------------------------- /config/train_maskrcnn_coco.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: 'coco' 3 | nclass: 81 4 | # TRAIN: 'coco_2014_train' 5 | TRAIN: 'coco_2014_train' 6 | VAL: 'coco_2014_minival' 7 | TEST: 'coco_2014_valminusminival' 8 | normal_size: [320, 320] # min_size, max_size 9 | cache_in_memory: false 10 | validation_size: 96 11 | model: 12 | maskrcnn: 13 | trainable_layers: 5 # backbone trainable layers 14 | backbone_type: 'resnet50' 15 | image_mean: [0.45, 0.45, 0.45] 16 | image_std: [0.225, 0.225, 0.225] 17 | batch_size : 8 18 | epochs : 10 19 | lr : 0.00001 20 | momentum : 0 21 | w_decay : 0.0001 22 | milestones : [10000, 20000, 50000, 100000, 150000] # iter 23 | gamma : 0.3 24 | dataset_size : 10000 25 | checkpoint: 1000 -------------------------------------------------------------------------------- /config/train_superpoint_coco.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: 'coco' 3 | nclass: 81 4 | # TRAIN: 'coco_2014_train' 5 | TRAIN: 'coco_2014_train' 6 | VAL: 'coco_2014_minival' 7 | TEST: 'coco_2014_valminusminival' 8 | normal_size: [320, 320] # min_size, max_size 9 | cache_in_memory: false 10 | validation_size: 96 11 | augmentation: 12 | photometric: 13 | enable: true 14 | primitives: [ 15 | 'random_brightness', 'random_contrast', 'additive_speckle_noise', 16 | 'additive_gaussian_noise', 'add_shade', 'motion_blur'] 17 | params: 18 | random_brightness: {max_change: 50} 19 | random_contrast: {max_change: [0.5, 1.5]} 20 | additive_gaussian_noise: {std: [0, 10]} 21 | additive_speckle_noise: {intensity: 5} 22 | add_shade: 23 | amplitude: [-0.5, 0.5] 24 | kernel_size_interval: [100, 150] 25 | motion_blur: {max_ksize: 3} 26 | homographic: 27 | enable: false # not implemented 28 | warped_pair: 29 | enable: false 30 | params: 31 | translation: true 32 | rotation: true 33 | scaling: true 34 | perspective: true 35 | scaling_amplitude: 0.2 36 | perspective_amplitude_x: 0.2 37 | perspective_amplitude_y: 0.2 38 | patch_ratio: 0.85 39 | max_angle: 1.57 40 | allow_artifacts: true 41 | valid_border_margin: 3 42 | model: 43 | superpoint: 44 | cell : 8 45 | train: 46 | name: 'superpoint' 47 | add_descriptor: 0 48 | lambda_d: 250 49 | positive_margin: 1 50 | negative_margin: 0.2 51 | lambda_loss: 5 52 | kernel_reg: 0. 53 | nms: 4 54 | batch_size : 8 55 | epochs : 10 56 | lr : 0.00001 57 | momentum : 0 58 | w_decay : 0.0001 59 | milestones : [10000, 20000, 50000, 100000, 150000] # iter 60 | gamma : 0.3 61 | dataset_size : 10000 62 | checkpoint: 1000 63 | gaussian_region: 64 | radius : 8 65 | milestones : [20000, 50000, 100000] # iter 66 | gamma : 0.5 67 | eval: 68 | detection_threshold: 0.15 69 | batch_size : 1 70 | dataset_size : 500 -------------------------------------------------------------------------------- /config/train_superpoint_synthetic.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: 'synthetic' 3 | nclass: 81 4 | normal_size: [320, 320] # min_size, max_size 5 | cache_in_memory: false 6 | validation_size: 96 7 | augmentation: 8 | photometric: 9 | enable: true 10 | primitives: [ 11 | 'random_brightness', 'random_contrast', 'additive_speckle_noise', 12 | 'additive_gaussian_noise', 'add_shade', 'motion_blur'] 13 | params: 14 | random_brightness: {max_change: 50} 15 | random_contrast: {max_change: [0.5, 1.5]} 16 | additive_gaussian_noise: {std: [0, 10]} 17 | additive_speckle_noise: {intensity: 5} 18 | add_shade: 19 | amplitude: [-0.5, 0.5] 20 | kernel_size_interval: [100, 150] 21 | motion_blur: {max_ksize: 3} 22 | homographic: 23 | enable: false # not implemented 24 | warped_pair: 25 | enable: false 26 | params: 27 | translation: true 28 | rotation: true 29 | scaling: true 30 | perspective: true 31 | scaling_amplitude: 0.2 32 | perspective_amplitude_x: 0.2 33 | perspective_amplitude_y: 0.2 34 | patch_ratio: 0.85 35 | max_angle: 1.57 36 | allow_artifacts: true 37 | valid_border_margin: 3 38 | model: 39 | superpoint: 40 | cell : 8 41 | train: 42 | name: 'superpoint' 43 | add_descriptor: false 44 | lambda_d: 250 45 | positive_margin: 1 46 | negative_margin: 0.2 47 | lambda_loss: 5 48 | kernel_reg: 0. 49 | nms: 4 50 | batch_size : 2 51 | epochs : 10 52 | lr : 0.00001 53 | momentum : 0 54 | w_decay : 0.0001 55 | milestones : [10000, 20000, 50000, 100000, 150000] # iter 56 | gamma : 0.3 57 | dataset_size : 10000 58 | checkpoint: 1000 59 | gaussian_region: 60 | radius : 8 61 | milestones : [20000, 50000, 100000] # iter 62 | gamma : 0.5 63 | eval: 64 | detection_threshold: 0.15 65 | batch_size : 1 66 | dataset_size : 500 -------------------------------------------------------------------------------- /config/validate_detection.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: 'coco' 3 | nclass: 81 4 | TRAIN: 'coco_2014_train' 5 | VAL: 'coco_2014_minival' 6 | TEST: 'coco_2014_valminusminival' 7 | normal_size: [384, 1280] # min_size, max_size 8 | cache_in_memory: false 9 | validation_size: 96 10 | augmentation: 11 | photometric: 12 | enable: true 13 | primitives: [ 14 | 'random_brightness', 'random_contrast', 'additive_speckle_noise', 15 | 'additive_gaussian_noise', 'add_shade', 'motion_blur'] 16 | params: 17 | random_brightness: {max_change: 50} 18 | random_contrast: {max_change: [0.5, 1.5]} 19 | additive_gaussian_noise: {std: [0, 10]} 20 | additive_speckle_noise: {intensity: 5} 21 | add_shade: 22 | amplitude: [-0.5, 0.5] 23 | kernel_size_interval: [100, 150] 24 | motion_blur: {max_ksize: 3} 25 | homographic: 26 | enable: false # not implemented 27 | warped_pair: 28 | enable: false 29 | params: 30 | translation: true 31 | rotation: true 32 | scaling: true 33 | perspective: true 34 | scaling_amplitude: 0.2 35 | perspective_amplitude_x: 0.2 36 | perspective_amplitude_y: 0.2 37 | patch_ratio: 0.85 38 | max_angle: 1.57 39 | allow_artifacts: true 40 | valid_border_margin: 3 41 | model: 42 | superpoint: 43 | cell : 8 44 | detection_threshold: 0.2 45 | batch_size : 1 46 | maskrcnn: 47 | trainable_layers: 0 # backbone trainable layers 48 | fix_backbone: true 49 | backbone_type: 'resnet50' 50 | image_mean: [0.45, 0.45, 0.45] 51 | image_std: [0.225, 0.225, 0.225] 52 | gcn: 53 | descriptor_dim: 256 54 | points_encoder_dims: [2, 4, 8, 16] 55 | hidden_dim: 512 56 | dropout: 0 57 | alpha: 0.2 58 | nheads: 4 59 | nout: 2048 60 | -------------------------------------------------------------------------------- /config/validate_gcn.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | name: 'coco' 3 | nclass: 81 4 | # TRAIN: 'coco_2014_train' 5 | TRAIN: 'coco_2014_train' 6 | VAL: 'coco_2014_minival' 7 | TEST: 'coco_2014_valminusminival' 8 | normal_size: [320, 320] # min_size, max_size 9 | cache_in_memory: false 10 | validation_size: 96 11 | model: 12 | superpoint: 13 | cell : 8 14 | detection_threshold: 0.2 15 | maskrcnn: 16 | add_maskrcnn: true 17 | trainable_layers: 5 # backbone trainable layers 18 | fix_backbone: true 19 | backbone_type: 'resnet50' 20 | image_mean: [0.45, 0.45, 0.45] 21 | image_std: [0.225, 0.225, 0.225] 22 | gcn: 23 | descriptor_dim: 256 24 | points_encoder_dims: [2, 4, 8, 16] 25 | hidden_dim: 512 26 | dropout: 0 27 | alpha: 0.2 28 | nheads: 4 29 | nout: 2048 -------------------------------------------------------------------------------- /datasets/coco/coco.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import os 8 | 9 | import torch 10 | import torchvision 11 | import cv2 12 | import numpy as np 13 | 14 | from structures.segmentation_mask import SegmentationMask 15 | from datasets.utils import pipeline as pp 16 | from datasets.utils import transforms as T 17 | 18 | min_keypoints_per_image = 10 19 | 20 | 21 | def _count_visible_keypoints(anno): 22 | return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) 23 | 24 | 25 | def _has_only_empty_bbox(anno): 26 | return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) 27 | 28 | 29 | def has_valid_annotation(anno): 30 | # if it's empty, there is no annotation 31 | if len(anno) == 0: 32 | return False 33 | # if all boxes have close to zero area, there is no annotation 34 | if _has_only_empty_bbox(anno): 35 | return False 36 | # keypoints task have a slight different critera for considering 37 | # if an annotation is valid 38 | if "keypoints" not in anno[0]: 39 | return True 40 | # for keypoint detection tasks, only consider valid images those 41 | # containing at least min_keypoints_per_image 42 | if _count_visible_keypoints(anno) >= min_keypoints_per_image: 43 | return True 44 | return False 45 | 46 | 47 | class COCODataset(torchvision.datasets.coco.CocoDetection): 48 | def __init__( 49 | self, image_root, ann_file, config, remove_images_without_annotations, 50 | transforms=None 51 | ): 52 | super(COCODataset, self).__init__(image_root, ann_file) 53 | # sort indices for reproducible results 54 | self.ids = sorted(self.ids) 55 | 56 | # filter images without detection annotations 57 | if remove_images_without_annotations: 58 | ids = [] 59 | for img_id in self.ids: 60 | ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None) 61 | anno = self.coco.loadAnns(ann_ids) 62 | if has_valid_annotation(anno): 63 | ids.append(img_id) 64 | self.ids = ids 65 | 66 | self.categories = {cat['id']: cat['name'] for cat in self.coco.cats.values()} 67 | 68 | self.json_category_id_to_contiguous_id = { 69 | v: i + 1 for i, v in enumerate(self.coco.getCatIds()) 70 | } 71 | self.contiguous_category_id_to_json_id = { 72 | v: k for k, v in self.json_category_id_to_contiguous_id.items() 73 | } 74 | self.id_to_img_map = {k: v for k, v in enumerate(self.ids)} 75 | self._transforms = transforms 76 | 77 | # for superpoint 78 | self.length = len(self.ids) 79 | self.config = config 80 | self.points_root = image_root + "_points" 81 | 82 | def __getitem__(self, idx): 83 | data = {} 84 | 85 | # image 86 | image, anno = super(COCODataset, self).__getitem__(idx) 87 | image = cv2.cvtColor(np.asarray(image),cv2.COLOR_RGB2GRAY) 88 | image = cv2.merge([image, image, image]) 89 | image = self._transforms(image) 90 | data['image'] = image 91 | 92 | # for maskrcnn 93 | # filter crowd annotations 94 | # TODO might be better to add an extra field 95 | anno = [obj for obj in anno if obj["iscrowd"] == 0] 96 | boxes = [obj["bbox"] for obj in anno] 97 | boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes, xywh 98 | # remove small bbox 99 | keep = (boxes[:, 2] > 4) & (boxes[:, 3] > 4) 100 | boxes[:, 2] = boxes[:, 0] + boxes[:, 2] - 1 101 | boxes[:, 3] = boxes[:, 1] + boxes[:, 3] - 1 102 | data['boxes'] = boxes[keep] # [x1, y1, x2, y2] 103 | 104 | labels = [obj["category_id"] for obj in anno] 105 | labels = [self.json_category_id_to_contiguous_id[c] for c in labels] 106 | labels = torch.tensor(labels) 107 | data['labels'] = labels[keep] 108 | 109 | if anno and "segmentation" in anno[0]: 110 | masks = [obj["segmentation"] for obj in anno] 111 | masks = SegmentationMask(masks, (image.shape[2], image.shape[1]), mode='poly') 112 | masks = masks.get_mask_tensor() 113 | masks = masks 114 | if len(masks.shape) == 2: 115 | masks = masks.unsqueeze(0) 116 | data['masks'] = masks[keep] 117 | 118 | # for superpoint 119 | image_info = self.get_img_info(idx) 120 | image_name = image_info['file_name'].split('.')[0] 121 | data['image_name'] = image_name 122 | 123 | point_name = image_name + ".txt" 124 | point_path = os.path.join(self.points_root, point_name) 125 | points = np.loadtxt(point_path, dtype=np.float32, ndmin=2) 126 | if np.sum(points) < 0: 127 | points = np.empty((0, 2), dtype=np.float32) 128 | points = torch.tensor(points) 129 | data['points'] = points 130 | 131 | return data 132 | 133 | def get_img_info(self, index): 134 | img_id = self.id_to_img_map[index] 135 | img_data = self.coco.imgs[img_id] 136 | return img_data 137 | 138 | if __name__ == "__main__": 139 | 140 | import torchvision.transforms as transforms 141 | from debug_tools.show_batch import show_batch, show_numpy 142 | from torch.utils.data import Dataset, DataLoader 143 | from datasets.utils.batch_collator import BatchCollator 144 | import yaml 145 | 146 | root = "/home/haoyuefan/xk_data/superpoint/coco/full/coco/train2014" 147 | annFile = "/home/haoyuefan/xk_data/superpoint/coco/full/coco/annotations/instances_train2014.json" 148 | config = "/home/xukuan/code/object_rcnn/config/train_superpoint_coco.yaml" 149 | 150 | f = open(config, 'r', encoding='utf-8') 151 | configs = f.read() 152 | configs = yaml.load(configs) 153 | 154 | dataset = COCODataset(root, annFile, configs['data'], True, transforms=transforms.ToTensor()) 155 | 156 | print(dataset.categories) -------------------------------------------------------------------------------- /datasets/coco/coco_cat.txt: -------------------------------------------------------------------------------- 1 | {1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane', 6: 'bus', 7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light', 11: 'fire hydrant', 13: 'stop sign', 14: 'parking meter', 15: 'bench', 16: 'bird', 17: 'cat', 18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow', 22: 'elephant', 23: 'bear', 24: 'zebra', 25: 'giraffe', 27: 'backpack', 28: 'umbrella', 31: 'handbag', 32: 'tie', 33: 'suitcase', 34: 'frisbee', 35: 'skis', 36: 'snowboard', 37: 'sports ball', 38: 'kite', 39: 'baseball bat', 40: 'baseball glove', 41: 'skateboard', 42: 'surfboard', 43: 'tennis racket', 44: 'bottle', 46: 'wine glass', 47: 'cup', 48: 'fork', 49: 'knife', 50: 'spoon', 51: 'bowl', 52: 'banana', 53: 'apple', 54: 'sandwich', 55: 'orange', 56: 'broccoli', 57: 'carrot', 58: 'hot dog', 59: 'pizza', 60: 'donut', 61: 'cake', 62: 'chair', 63: 'couch', 64: 'potted plant', 65: 'bed', 67: 'dining table', 70: 'toilet', 72: 'tv', 73: 'laptop', 74: 'mouse', 75: 'remote', 76: 'keyboard', 77: 'cell phone', 78: 'microwave', 79: 'oven', 80: 'toaster', 81: 'sink', 82: 'refrigerator', 84: 'book', 85: 'clock', 86: 'vase', 87: 'scissors', 88: 'teddy bear', 89: 'hair drier', 90: 'toothbrush'} -------------------------------------------------------------------------------- /datasets/coco/paths_catalog.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | from copy import deepcopy 6 | 7 | class DatasetCatalog(object): 8 | DATA_DIR = "" 9 | DATASETS = { 10 | "coco_2017_train": { 11 | "img_dir": "coco/train2017", 12 | "ann_file": "coco/annotations/instances_train2017.json" 13 | }, 14 | "coco_2017_val": { 15 | "img_dir": "coco/val2017", 16 | "ann_file": "coco/annotations/instances_val2017.json" 17 | }, 18 | "coco_2014_train": { 19 | "img_dir": "coco/train2014", 20 | "ann_file": "coco/annotations/instances_train2014.json" 21 | }, 22 | "coco_2014_val": { 23 | "img_dir": "coco/val2014", 24 | "ann_file": "coco/annotations/instances_val2014.json" 25 | }, 26 | "coco_2014_minival": { 27 | "img_dir": "coco/val2014", 28 | "ann_file": "coco/annotations/instances_minival2014.json" 29 | }, 30 | "coco_2014_valminusminival": { 31 | "img_dir": "coco/val2014", 32 | "ann_file": "coco/annotations/instances_valminusminival2014.json" 33 | }, 34 | "keypoints_coco_2014_train": { 35 | "img_dir": "coco/train2014", 36 | "ann_file": "coco/annotations/person_keypoints_train2014.json", 37 | }, 38 | "keypoints_coco_2014_val": { 39 | "img_dir": "coco/val2014", 40 | "ann_file": "coco/annotations/person_keypoints_val2014.json" 41 | }, 42 | "keypoints_coco_2014_minival": { 43 | "img_dir": "coco/val2014", 44 | "ann_file": "coco/annotations/person_keypoints_minival2014.json", 45 | }, 46 | "keypoints_coco_2014_valminusminival": { 47 | "img_dir": "coco/val2014", 48 | "ann_file": "coco/annotations/person_keypoints_valminusminival2014.json", 49 | }, 50 | "voc_2007_train": { 51 | "data_dir": "voc/VOC2007", 52 | "split": "train" 53 | }, 54 | "voc_2007_train_cocostyle": { 55 | "img_dir": "voc/VOC2007/JPEGImages", 56 | "ann_file": "voc/VOC2007/Annotations/pascal_train2007.json" 57 | }, 58 | "voc_2007_val": { 59 | "data_dir": "voc/VOC2007", 60 | "split": "val" 61 | }, 62 | "voc_2007_val_cocostyle": { 63 | "img_dir": "voc/VOC2007/JPEGImages", 64 | "ann_file": "voc/VOC2007/Annotations/pascal_val2007.json" 65 | }, 66 | "voc_2007_test": { 67 | "data_dir": "voc/VOC2007", 68 | "split": "test" 69 | }, 70 | "voc_2007_test_cocostyle": { 71 | "img_dir": "voc/VOC2007/JPEGImages", 72 | "ann_file": "voc/VOC2007/Annotations/pascal_test2007.json" 73 | }, 74 | "voc_2012_train": { 75 | "data_dir": "voc/VOC2012", 76 | "split": "train" 77 | }, 78 | "voc_2012_train_cocostyle": { 79 | "img_dir": "voc/VOC2012/JPEGImages", 80 | "ann_file": "voc/VOC2012/Annotations/pascal_train2012.json" 81 | }, 82 | "voc_2012_val": { 83 | "data_dir": "voc/VOC2012", 84 | "split": "val" 85 | }, 86 | "voc_2012_val_cocostyle": { 87 | "img_dir": "voc/VOC2012/JPEGImages", 88 | "ann_file": "voc/VOC2012/Annotations/pascal_val2012.json" 89 | }, 90 | "voc_2012_test": { 91 | "data_dir": "voc/VOC2012", 92 | "split": "test" 93 | # PASCAL VOC2012 doesn't made the test annotations available, so there's no json annotation 94 | }, 95 | 96 | ############################################## 97 | # These ones are deprecated, should be removed 98 | "cityscapes_fine_instanceonly_seg_train_cocostyle": { 99 | "img_dir": "cityscapes/images", 100 | "ann_file": "cityscapes/annotations/instancesonly_filtered_gtFine_train.json" 101 | }, 102 | "cityscapes_fine_instanceonly_seg_val_cocostyle": { 103 | "img_dir": "cityscapes/images", 104 | "ann_file": "cityscapes/annotations/instancesonly_filtered_gtFine_val.json" 105 | }, 106 | "cityscapes_fine_instanceonly_seg_test_cocostyle": { 107 | "img_dir": "cityscapes/images", 108 | "ann_file": "cityscapes/annotations/instancesonly_filtered_gtFine_test.json" 109 | }, 110 | ############################################## 111 | 112 | "cityscapes_poly_instance_train": { 113 | "img_dir": "cityscapes/leftImg8bit/", 114 | "ann_dir": "cityscapes/gtFine/", 115 | "split": "train", 116 | "mode": "poly", 117 | }, 118 | "cityscapes_poly_instance_val": { 119 | "img_dir": "cityscapes/leftImg8bit", 120 | "ann_dir": "cityscapes/gtFine", 121 | "split": "val", 122 | "mode": "poly", 123 | }, 124 | "cityscapes_poly_instance_minival": { 125 | "img_dir": "cityscapes/leftImg8bit", 126 | "ann_dir": "cityscapes/gtFine", 127 | "split": "val", 128 | "mode": "poly", 129 | "mini": 10, 130 | }, 131 | "cityscapes_mask_instance_train": { 132 | "img_dir": "cityscapes/leftImg8bit/", 133 | "ann_dir": "cityscapes/gtFine/", 134 | "split": "train", 135 | "mode": "mask", 136 | }, 137 | "cityscapes_mask_instance_val": { 138 | "img_dir": "cityscapes/leftImg8bit", 139 | "ann_dir": "cityscapes/gtFine", 140 | "split": "val", 141 | "mode": "mask", 142 | }, 143 | "cityscapes_mask_instance_minival": { 144 | "img_dir": "cityscapes/leftImg8bit", 145 | "ann_dir": "cityscapes/gtFine", 146 | "split": "val", 147 | "mode": "mask", 148 | "mini": 10, 149 | }, 150 | } 151 | 152 | @staticmethod 153 | def get(name): 154 | if "coco" in name: 155 | data_dir = DatasetCatalog.DATA_DIR 156 | attrs = DatasetCatalog.DATASETS[name] 157 | args = dict( 158 | root=os.path.join(data_dir, attrs["img_dir"]), 159 | ann_file=os.path.join(data_dir, attrs["ann_file"]), 160 | ) 161 | return dict( 162 | factory="COCODataset", 163 | args=args, 164 | ) 165 | elif "voc" in name: 166 | data_dir = DatasetCatalog.DATA_DIR 167 | attrs = DatasetCatalog.DATASETS[name] 168 | args = dict( 169 | data_dir=os.path.join(data_dir, attrs["data_dir"]), 170 | split=attrs["split"], 171 | ) 172 | return dict( 173 | factory="PascalVOCDataset", 174 | args=args, 175 | ) 176 | elif "cityscapes" in name: 177 | data_dir = DatasetCatalog.DATA_DIR 178 | attrs = deepcopy(DatasetCatalog.DATASETS[name]) 179 | attrs["img_dir"] = os.path.join(data_dir, attrs["img_dir"]) 180 | attrs["ann_dir"] = os.path.join(data_dir, attrs["ann_dir"]) 181 | return dict(factory="CityScapesDataset", args=attrs) 182 | raise RuntimeError("Dataset not available: {}".format(name)) 183 | 184 | 185 | class ModelCatalog(object): 186 | S3_C2_DETECTRON_URL = "https://dl.fbaipublicfiles.com/detectron" 187 | C2_IMAGENET_MODELS = { 188 | "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl", 189 | "MSRA/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl", 190 | "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl", 191 | "MSRA/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl", 192 | "FAIR/20171220/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl", 193 | } 194 | 195 | C2_DETECTRON_SUFFIX = "output/train/{}coco_2014_train%3A{}coco_2014_valminusminival/generalized_rcnn/model_final.pkl" 196 | C2_DETECTRON_MODELS = { 197 | "35857197/e2e_faster_rcnn_R-50-C4_1x": "01_33_49.iAX0mXvW", 198 | "35857345/e2e_faster_rcnn_R-50-FPN_1x": "01_36_30.cUF7QR7I", 199 | "35857890/e2e_faster_rcnn_R-101-FPN_1x": "01_38_50.sNxI7sX7", 200 | "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "06_31_39.5MIHi1fZ", 201 | "35858791/e2e_mask_rcnn_R-50-C4_1x": "01_45_57.ZgkA7hPB", 202 | "35858933/e2e_mask_rcnn_R-50-FPN_1x": "01_48_14.DzEQe4wC", 203 | "35861795/e2e_mask_rcnn_R-101-FPN_1x": "02_31_37.KqyEK4tT", 204 | "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "06_35_59.RZotkLKI", 205 | "37129812/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x": "09_35_36.8pzTQKYK", 206 | # keypoints 207 | "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "08_42_54.kdzV35ao" 208 | } 209 | 210 | @staticmethod 211 | def get(name): 212 | if name.startswith("Caffe2Detectron/COCO"): 213 | return ModelCatalog.get_c2_detectron_12_2017_baselines(name) 214 | if name.startswith("ImageNetPretrained"): 215 | return ModelCatalog.get_c2_imagenet_pretrained(name) 216 | raise RuntimeError("model not present in the catalog {}".format(name)) 217 | 218 | @staticmethod 219 | def get_c2_imagenet_pretrained(name): 220 | prefix = ModelCatalog.S3_C2_DETECTRON_URL 221 | name = name[len("ImageNetPretrained/"):] 222 | name = ModelCatalog.C2_IMAGENET_MODELS[name] 223 | url = "/".join([prefix, name]) 224 | return url 225 | 226 | @staticmethod 227 | def get_c2_detectron_12_2017_baselines(name): 228 | # Detectron C2 models are stored following the structure 229 | # prefix//2012_2017_baselines/.yaml./suffix 230 | # we use as identifiers in the catalog Caffe2Detectron/COCO// 231 | prefix = ModelCatalog.S3_C2_DETECTRON_URL 232 | dataset_tag = "keypoints_" if "keypoint" in name else "" 233 | suffix = ModelCatalog.C2_DETECTRON_SUFFIX.format(dataset_tag, dataset_tag) 234 | # remove identification prefix 235 | name = name[len("Caffe2Detectron/COCO/"):] 236 | # split in and 237 | model_id, model_name = name.split("/") 238 | # parsing to make it match the url address from the Caffe2 models 239 | model_name = "{}.yaml".format(model_name) 240 | signature = ModelCatalog.C2_DETECTRON_MODELS[name] 241 | unique_name = ".".join([model_name, signature]) 242 | url = "/".join([prefix, model_id, "12_2017_baselines", unique_name, suffix]) 243 | return url 244 | -------------------------------------------------------------------------------- /datasets/evaluation/coco/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_eval import do_coco_evaluation as do_orig_coco_evaluation 2 | from .coco_eval_wrapper import do_coco_evaluation as do_wrapped_coco_evaluation 3 | from maskrcnn_benchmark.data.datasets import AbstractDataset, COCODataset 4 | 5 | 6 | def coco_evaluation( 7 | dataset, 8 | predictions, 9 | output_folder, 10 | box_only, 11 | iou_types, 12 | expected_results, 13 | expected_results_sigma_tol, 14 | ): 15 | if isinstance(dataset, COCODataset): 16 | return do_orig_coco_evaluation( 17 | dataset=dataset, 18 | predictions=predictions, 19 | box_only=box_only, 20 | output_folder=output_folder, 21 | iou_types=iou_types, 22 | expected_results=expected_results, 23 | expected_results_sigma_tol=expected_results_sigma_tol, 24 | ) 25 | elif isinstance(dataset, AbstractDataset): 26 | return do_wrapped_coco_evaluation( 27 | dataset=dataset, 28 | predictions=predictions, 29 | box_only=box_only, 30 | output_folder=output_folder, 31 | iou_types=iou_types, 32 | expected_results=expected_results, 33 | expected_results_sigma_tol=expected_results_sigma_tol, 34 | ) 35 | else: 36 | raise NotImplementedError( 37 | ( 38 | "Ground truth dataset is not a COCODataset, " 39 | "nor it is derived from AbstractDataset: type(dataset)=" 40 | "%s" % type(dataset) 41 | ) 42 | ) 43 | -------------------------------------------------------------------------------- /datasets/kitti/kitti_odomery.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import torch 8 | from torch.utils.data import Dataset 9 | from torchvision import transforms 10 | import numpy as np 11 | import numbers 12 | import random 13 | import os 14 | import cv2 15 | import yaml 16 | 17 | from datasets.utils.pipeline import makedir 18 | 19 | class KittiOdometry(Dataset): 20 | def __init__(self, data_root, id, dis_thr=15, angle_thr=1.0, interval=100): 21 | image_dir = os.path.join(data_root, "images", id, "image_0") 22 | label_file = os.path.join(data_root, "poses", (id+".txt")) 23 | image_names = os.listdir(image_dir) 24 | image_names.sort() 25 | 26 | 27 | self.data_root = data_root 28 | self.id = id 29 | self.dis_thr = dis_thr 30 | self.angle_thr = angle_thr 31 | self.interval = interval 32 | self.image_dir = image_dir 33 | self.image_names = image_names 34 | self.length = len(image_names) 35 | self.poses_gt = self.read_label_file(label_file) 36 | loop_gt, num_loop = self.find_loops(dis_thr, angle_thr, interval) 37 | # loop_gt, num_loop = None, None 38 | self.loop_gt = loop_gt 39 | self.num_loop = num_loop 40 | self.transform = transforms.ToTensor() 41 | 42 | def __len__(self): 43 | return self.length 44 | 45 | def __getitem__(self, idx): 46 | image_path = os.path.join(self.image_dir, self.image_names[idx]) 47 | image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) 48 | 49 | if len(image.shape) == 2: 50 | image = cv2.merge([image, image, image]) 51 | 52 | return {'image': image, 'image_name': self.image_names[idx]} 53 | 54 | 55 | def read_label_file(self, file_path): 56 | poses_gt = {} 57 | 58 | index = 0 59 | fo = open(file_path, "r") 60 | for line in fo.readlines(): 61 | line = line.strip('\n') 62 | line = line.split(' ') 63 | line = [float(l) for l in line] 64 | line = np.array(line) 65 | line = line.reshape(3, 4) 66 | position = line[:, -1] 67 | rotation = line[:, :3] 68 | 69 | gt = {'index':index, 'position':position, 'rotation':rotation} 70 | 71 | image_name = self.image_names[index] 72 | poses_gt[image_name] = gt 73 | index = index + 1 74 | 75 | fo.close() 76 | 77 | return poses_gt 78 | 79 | 80 | def get_label(self, r): 81 | if type(r) == type(0): 82 | image_name = self.image_names[r] 83 | else: 84 | image_name = r 85 | 86 | if image_name in self.poses_gt: 87 | pose_gt = self.poses_gt[image_name] 88 | else: 89 | pose_gt = None 90 | 91 | return pose_gt 92 | 93 | 94 | def find_loops(self, dis_thr, angle_thr, interval): 95 | loop_gt = {} 96 | num_loop = 0 97 | for i in range(len(self.image_names)): 98 | image_name = self.image_names[i] 99 | if i < interval: 100 | loop_gt[image_name] = 0 101 | continue 102 | 103 | gt_i = self.get_label(i) 104 | position_i = gt_i['position'] 105 | rotation_i = gt_i['rotation'] 106 | 107 | for j in range(i): 108 | if i - j < interval: 109 | loop_gt[image_name] = 0 110 | break 111 | 112 | gt_j = self.get_label(j) 113 | position_j = gt_j['position'] 114 | rotation_j = gt_j['rotation'] 115 | 116 | delta_dis = np.linalg.norm((position_i-position_j)) 117 | delta_R = rotation_i.dot(rotation_j.T) 118 | delta_r, _ = cv2.Rodrigues(delta_R) 119 | deleta_angle = np.linalg.norm(delta_r) 120 | 121 | if delta_dis < dis_thr and deleta_angle < angle_thr: 122 | loop_gt[image_name] = 1 123 | num_loop += 1 124 | break 125 | 126 | return loop_gt, num_loop 127 | 128 | 129 | def get_loop_gt(self): 130 | return self.loop_gt, self.num_loop 131 | 132 | def image_size(self): 133 | ''' 134 | H, W 135 | ''' 136 | image_path = os.path.join(self.image_dir, self.image_names[0]) 137 | image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) 138 | return image.shape[-2:] 139 | -------------------------------------------------------------------------------- /datasets/kitti/kitti_tracking.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import torch 8 | from torch.utils.data import Dataset 9 | from torchvision import transforms 10 | import numpy as np 11 | import numbers 12 | import random 13 | import os 14 | import cv2 15 | 16 | class KittiTracking(Dataset): 17 | def __init__(self, data_root, id): 18 | image_dir = os.path.join(data_root, "images", id) 19 | label_file = os.path.join(data_root, "labels", (id+".txt")) 20 | image_names = os.listdir(image_dir) 21 | image_names.sort() 22 | 23 | self.image_dir = image_dir 24 | self.image_names = image_names 25 | self.length = len(image_names) 26 | self.track_gt = self.read_label_file(label_file) 27 | self.transform = transforms.ToTensor() 28 | 29 | def __len__(self): 30 | return self.length 31 | 32 | def __getitem__(self, idx): 33 | image_path = os.path.join(self.image_dir, self.image_names[idx]) 34 | image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) 35 | 36 | if len(image.shape) == 2: 37 | image = cv2.merge([image, image, image]) 38 | 39 | # image = self.transform(image) 40 | 41 | return {'image': image, 'image_name': self.image_names[idx]} 42 | 43 | def read_label_file(self, file_path): 44 | track_gt = {} 45 | 46 | fo = open(file_path, "r") 47 | for line in fo.readlines(): 48 | line = line.strip('\n') 49 | line = line.split(' ') 50 | track_id = int(line[1]) 51 | if track_id < 0: 52 | continue 53 | frame_id = int(line[0]) 54 | object_type = line[2] 55 | truncated = int(line[3]) 56 | occulded = int(line[4]) 57 | # x1, y1, x2, y2 58 | box = [float(line[6]), float(line[7]), float(line[8]), float(line[9])] 59 | object_info = {'frame_id':frame_id, 'track_id':track_id, 'object_type':object_type, 60 | 'truncated':truncated, 'occulded':occulded, 'box':box, } 61 | 62 | image_name = self.image_names[frame_id] 63 | if image_name in track_gt: 64 | track_gt[image_name].append(object_info) 65 | else: 66 | track_gt[image_name] = [object_info] 67 | 68 | fo.close() 69 | 70 | # re-organioze groundtruth, Dict[List[Dict]] -> Dict[Dict[List]] 71 | new_track_gt = {} 72 | for image_name in track_gt.keys(): 73 | if len(track_gt[image_name]) > 0: 74 | new_track_gt[image_name] = {} 75 | for k in track_gt[image_name][0].keys(): 76 | data_list = [data[k] for data in track_gt[image_name]] 77 | new_track_gt[image_name][k] = data_list 78 | 79 | return new_track_gt 80 | 81 | def get_label(self, r): 82 | if type(r) == type(0): 83 | image_name = self.image_names[r] 84 | else: 85 | image_name = r 86 | 87 | if image_name in self.track_gt: 88 | image_info = self.track_gt[image_name] 89 | else: 90 | image_info = None 91 | 92 | return image_info 93 | 94 | def image_size(self): 95 | ''' 96 | H, W 97 | ''' 98 | image_path = os.path.join(self.image_dir, self.image_names[0]) 99 | image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) 100 | return image.shape[-2:] 101 | -------------------------------------------------------------------------------- /datasets/otb/otb_tracking.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import torch 8 | from torch.utils.data import Dataset 9 | from torchvision import transforms 10 | import numpy as np 11 | import numbers 12 | import random 13 | import os 14 | import cv2 15 | 16 | class OtbTracking(Dataset): 17 | def __init__(self, data_root, id): 18 | image_dir = os.path.join(data_root, id, "img") 19 | label_file = os.path.join(data_root, id, "groundtruth_rect.txt") 20 | image_names = os.listdir(image_dir) 21 | image_names.sort() 22 | 23 | self.image_dir = image_dir 24 | self.image_names = image_names 25 | self.length = len(image_names) 26 | self.track_gt = self.read_label_file(label_file) 27 | self.transform = transforms.ToTensor() 28 | 29 | def __len__(self): 30 | return self.length 31 | 32 | def __getitem__(self, idx): 33 | image_path = os.path.join(self.image_dir, self.image_names[idx]) 34 | image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) 35 | 36 | if len(image.shape) == 2: 37 | image = cv2.merge([image, image, image]) 38 | 39 | # image = self.transform(image) 40 | 41 | return {'image': image, 'image_name': self.image_names[idx]} 42 | 43 | def read_label_file(self, file_path): 44 | track_gt = {} 45 | 46 | fo = open(file_path, "r") 47 | i = 0 48 | for line in fo.readlines(): 49 | line = line.strip('\n') 50 | if ',' in line: 51 | line = line.split(',') 52 | else: 53 | line = line.split('\t') 54 | 55 | track_id = 0 56 | frame_id = i 57 | # x1, y1, w, h 58 | x1, y1, w, h = float(line[0]), float(line[1]), float(line[2]), float(line[3]) 59 | x2, y2 = (x1 + w - 1), (y1 + h - 1) 60 | box = [x1, y1, x2, y2] 61 | object_info = {'frame_id':frame_id, 'track_id':track_id, 'box':box, } 62 | i = i + 1 63 | image_name = self.image_names[frame_id] 64 | track_gt[image_name] = object_info 65 | 66 | fo.close() 67 | 68 | return track_gt 69 | 70 | def get_label(self, r): 71 | if type(r) == type(0): 72 | image_name = self.image_names[r] 73 | else: 74 | image_name = r 75 | 76 | if image_name in self.track_gt: 77 | image_info = self.track_gt[image_name] 78 | else: 79 | image_info = None 80 | 81 | return image_info 82 | 83 | def image_size(self): 84 | ''' 85 | H, W 86 | ''' 87 | image_path = os.path.join(self.image_dir, self.image_names[0]) 88 | image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) 89 | return image.shape[-2:] 90 | -------------------------------------------------------------------------------- /datasets/synthetic/synthetic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import torch 8 | from torch.utils.data import Dataset, DataLoader 9 | from torchvision import transforms 10 | import numpy as np 11 | import numbers 12 | import random 13 | import os 14 | import cv2 15 | 16 | from datasets.utils.pipeline import draw_umich_gaussian 17 | from datasets.utils import pipeline as pp 18 | 19 | class SyntheticDataset(Dataset): 20 | def __init__(self, data_root, use_for = None): 21 | image_dir = os.path.join(data_root, use_for, "images") 22 | point_dir = os.path.join(data_root, use_for, "points") 23 | image_names = os.listdir(image_dir) 24 | 25 | self.image_dir = image_dir 26 | self.point_dir = point_dir 27 | self.image_names = image_names 28 | self.length = len(image_names) 29 | 30 | self.transform = transforms.ToTensor() 31 | 32 | def __len__(self): 33 | return self.length 34 | 35 | def __getitem__(self, idx): 36 | image_path = os.path.join(self.image_dir, self.image_names[idx]) 37 | point_name = self.image_names[idx].split('.')[0] + ".txt" 38 | point_path = os.path.join(self.point_dir, point_name) 39 | image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) 40 | points = np.loadtxt(point_path, dtype=np.float32, ndmin=2) 41 | if np.sum(points) < 0: 42 | points = np.empty((0, 2), dtype=np.float32) 43 | 44 | image_name = self.image_names[idx].split('.') 45 | image_name = image_name[0] 46 | 47 | if len(image.shape) == 2: 48 | image = cv2.merge([image, image, image]) 49 | 50 | image = self.transform(image) 51 | 52 | points = torch.tensor(points) 53 | 54 | return {'image': image, 'image_name': image_name, 'points': points} -------------------------------------------------------------------------------- /datasets/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sair-lab/AirCode/6f7aaeafa3b6f8c762170431447568855601c684/datasets/utils/__init__.py -------------------------------------------------------------------------------- /datasets/utils/batch_collator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import torch 6 | 7 | class BatchCollator(object): 8 | ''' 9 | pack dict batch 10 | ''' 11 | def __init__(self): 12 | super(BatchCollator,self).__init__() 13 | 14 | def __call__(self, batch): 15 | data= {} 16 | size = len(batch) 17 | for key in batch[0]: 18 | l = [] 19 | for i in range(size): 20 | l = l + [batch[i][key]] 21 | data[key] = l 22 | return data 23 | -------------------------------------------------------------------------------- /datasets/utils/build_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import torch 6 | from torchvision import transforms 7 | from torch.utils.data import DataLoader 8 | 9 | from datasets.coco.coco import COCODataset 10 | from datasets.utils.batch_collator import BatchCollator 11 | from datasets.coco.paths_catalog import DatasetCatalog 12 | 13 | def coco_loader( 14 | data_root, name, config, batch_size=2, transforms=transforms.ToTensor(), drop_last=True, 15 | remove_images_without_annotations=False, oints_root="", num_workers=8): 16 | data_info = DatasetCatalog.get(name) 17 | 18 | data_dir = os.path.join(data_root, data_info['args']['root']) 19 | ann_file = os.path.join(data_root, data_info['args']['ann_file']) 20 | 21 | dataset = COCODataset( 22 | image_root=data_dir, ann_file=ann_file, config=config, transforms=transforms, 23 | remove_images_without_annotations=remove_images_without_annotations) 24 | 25 | sampler = torch.utils.data.sampler.RandomSampler(dataset) 26 | batch_sampler = torch.utils.data.sampler.BatchSampler(sampler=sampler, batch_size=batch_size, drop_last=drop_last) 27 | 28 | collator = BatchCollator() 29 | loader = DataLoader(dataset, batch_sampler=batch_sampler, collate_fn=collator, num_workers=num_workers) 30 | 31 | return loader -------------------------------------------------------------------------------- /datasets/utils/gcn_mask_augmentation.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | sys.path.append('.') 4 | import cv2 5 | import numpy as np 6 | import math 7 | import random 8 | 9 | """ Data augmentation for gcn masks """ 10 | 11 | augmentations = [ 12 | 'additive_gaussian_noise', 13 | 'additive_speckle_noise', 14 | 'random_brightness', 15 | 'random_contrast', 16 | 'affine_transform', 17 | 'perspective_transform', 18 | 'random_crop', 19 | 'add_shade', 20 | 'motion_blur' 21 | ] 22 | 23 | def erode(image, kernel_size): 24 | kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(kernel_size, kernel_size)) 25 | image = cv2.erode(image, kernel) 26 | return image 27 | 28 | def dilate(image, kernel_size): 29 | kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(kernel_size, kernel_size)) 30 | image = cv2.dilate(image, kernel) 31 | return image 32 | 33 | def random_region_zero(image, scale_x=0.3, scale_y=0.3): 34 | ys, xs = np.where(image > 0) 35 | x0, x1, y0, y1 = xs.min(), xs.max(), ys.min(), ys.max() 36 | 37 | region_width = (x1 - x0) * scale_x 38 | region_height = (y1 - y0) * scale_y 39 | 40 | x0 = random.uniform(x0, (x1 - region_width)) 41 | y0 = random.uniform(y0, (y1 - region_height)) 42 | 43 | x1 = x0 + region_width 44 | y1 = y0 + region_height 45 | 46 | x0, x1, y0, y1 = int(x0), int(x1), int(y0), int(y1) 47 | 48 | image[y0:y1, x0:x1] = 0 49 | 50 | return image 51 | 52 | def random_block_zero(image, num=5, size=10): 53 | ys, xs = np.where(image > 0) 54 | x0, x1, y0, y1 = xs.min(), xs.max(), ys.min(), ys.max() 55 | 56 | block_xs = random.sample(range(x0, x1), num) 57 | block_ys = random.sample(range(y0, y1), num) 58 | mask = np.ones_like(image) 59 | 60 | for y, x in zip(block_ys, block_xs): 61 | mask[y, x] = 0 62 | 63 | kernel_size = size 64 | mask = erode(mask, kernel_size) 65 | 66 | image = (image * mask).astype(np.uint8) 67 | 68 | return image 69 | 70 | def random_block_one(image, num=10, size=10): 71 | H, W = image.shape[-2:] 72 | 73 | block_xs = random.sample(range(0, (W-1)), num) 74 | block_ys = random.sample(range(0, (H-1)), num) 75 | 76 | mask = np.zeros_like(image) 77 | 78 | for y, x in zip(block_ys, block_xs): 79 | mask[y, x] = 1 80 | 81 | kernel_size = size 82 | mask = dilate(mask, kernel_size) 83 | 84 | img = (image > 0).astype(float) 85 | value = np.sum(image) / np.sum(img) 86 | 87 | img = img + mask 88 | 89 | img = (img > 0).astype(float) 90 | 91 | img = img * value 92 | img = img.astype(np.uint8) 93 | 94 | return img 95 | 96 | 97 | if __name__ == "__main__": 98 | 99 | from debug_tools.show_batch import show_numpy 100 | 101 | img1 = np.ones([640, 640]) 102 | img2 = np.zeros([640, 640]) 103 | img = np.concatenate([img1, img2], 1) 104 | img = (img * 150.0 + 0.5).astype(np.uint8) 105 | img = np.clip(img, 0, 255) 106 | 107 | img = dilate(img, kernel_size=10) 108 | 109 | img = cv2.merge([img, img, img]) 110 | show_numpy(img) -------------------------------------------------------------------------------- /datasets/utils/pipeline.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | from random import sample 5 | 6 | from datasets.utils import augmentation_legacy as photaug 7 | from datasets.utils import gcn_mask_augmentation as maskaug 8 | from datasets.utils.homographies import sample_homography, warp_points, filter_points 9 | 10 | def makedir(d): 11 | if not os.path.exists(d): 12 | os.makedirs(d) 13 | 14 | def parse_primitives(names, all_primitives): 15 | p = all_primitives if (names == 'all') \ 16 | else (names if isinstance(names, list) else [names]) 17 | assert set(p) <= set(all_primitives) 18 | return p 19 | 20 | def space_to_depth(data, cell_size, add_dustbin=False): 21 | H, W = data.shape[0], data.shape[1] 22 | Hc, Wc = H // cell_size, W // cell_size 23 | result = data[:, np.newaxis, :, np.newaxis] 24 | result = result.reshape(Hc, cell_size, Wc, cell_size) 25 | result = np.transpose(result, [1, 3, 0, 2]) 26 | result = result.reshape(1, cell_size ** 2, Hc, Wc) 27 | result = result.squeeze() 28 | if add_dustbin: 29 | dustbin = np.ones([Hc, Wc]) 30 | depth_sum = result.sum(axis=0) 31 | dustbin[depth_sum>0] = 0 32 | result = np.concatenate((result, dustbin[np.newaxis, :, :]), axis=0) 33 | return result 34 | 35 | ''' 36 | draw gaussian function 37 | ''' 38 | def gaussian2D(shape, sigma=1): 39 | # generate a gaussion in a box 40 | m, n = [(ss - 1.) / 2. for ss in shape] 41 | y, x = np.ogrid[-m:m+1,-n:n+1] 42 | 43 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) 44 | h[h < np.finfo(h.dtype).eps * h.max()] = 0 45 | return h 46 | 47 | def draw_umich_gaussian(heatmap, center, radius, k=1): 48 | diameter = 2 * radius + 1 49 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 50 | 51 | x, y = int(center[0]), int(center[1]) 52 | 53 | height, width = heatmap.shape[0:2] 54 | 55 | left, right = min(x, radius), min(width - x, radius + 1) 56 | top, bottom = min(y, radius), min(height - y, radius + 1) 57 | 58 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 59 | masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right] 60 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug 61 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) 62 | 63 | return heatmap 64 | 65 | def convert_to_guassian(label, radius): 66 | label_gaussian = np.zeros(label.shape) 67 | ys, xs = np.where(label > 0) 68 | if len(xs) != 0: 69 | for i in range(len(xs)): 70 | draw_umich_gaussian(label_gaussian, (xs[i], ys[i]), radius) 71 | 72 | return label_gaussian 73 | 74 | def generate_shape_gaussian(matrix, radius): 75 | ''' 76 | Generate 3D or 4D shape like gaussian 77 | ''' 78 | origin_shape = matrix.shape 79 | if len(origin_shape) == 2: 80 | return convert_to_guassian(matrix, radius) 81 | elif len(origin_shape) == 3: 82 | origin_matrix = matrix[np.newaxis, :, :, :] 83 | elif len(origin_shape) == 4: 84 | origin_matrix = matrix 85 | else: 86 | return None 87 | 88 | matrix_gaussian = np.zeros(origin_matrix.shape) 89 | for i in range(matrix_gaussian.shape[0]): 90 | for j in range(matrix_gaussian.shape[1]): 91 | matrix_gaussian[i, j, :, :] = convert_to_guassian(origin_matrix[i, j, :, :], radius) 92 | 93 | if len(origin_shape) == 3: 94 | matrix_gaussian = np.squeeze(matrix_gaussian, 0) 95 | 96 | return matrix_gaussian 97 | 98 | ''' 99 | generate valid mask, heatmap and keypoint map 100 | ''' 101 | def generate_valid_mask(img_shape, border_remove=2): 102 | ''' 103 | inputs : 104 | img_shape: [H, W] 105 | ''' 106 | H, W = img_shape[0:2] 107 | valid_mask = np.zeros((H, W), dtype=np.int) 108 | valid_mask[border_remove:(H-border_remove), border_remove:(W-border_remove)] = 1 109 | return valid_mask 110 | 111 | def generate_keypoint_map(img_shape, points): 112 | ''' 113 | inputs : 114 | img_shape: [H, W] 115 | points: N * 2, [hy, wx] 116 | ''' 117 | height, width = img_shape[:2] 118 | points = (points + 0.5).astype(int) 119 | points[:, 0] = np.clip(points[:, 0], 0, height - 1) 120 | points[:, 1] = np.clip(points[:, 1], 0, width -1) 121 | keypoint_map = np.zeros(img_shape[:2], dtype=np.float32) 122 | for h, w in points: 123 | keypoint_map[h, w] = 1.0 124 | return keypoint_map 125 | 126 | def generate_heatmap(img_shape, points, gaussian_radius): 127 | ''' 128 | inputs: 129 | img_shape: [H, W] 130 | points: N * 2, [hy, wx] 131 | gaussian_radius: int 132 | ''' 133 | if gaussian_radius < 2: 134 | heatmap = generate_keypoint_map(img_shape, points) 135 | else: 136 | heatmap = np.zeros(img_shape[:2]) 137 | for i in range(points.shape[0]): 138 | heatmap = draw_umich_gaussian(heatmap, (points[i][1], points[i][0]), gaussian_radius) 139 | return heatmap 140 | 141 | def generate_idx_map(points, shape): 142 | ''' 143 | inputs: 144 | image: numpy array, [H, W] 145 | points: N * 2, [hy, wx] 146 | ''' 147 | idx_map = np.zeros(shape) 148 | for i in range(len(points)): 149 | hy, wx = int(points[i][0]), int(points[i][1]) 150 | idx_map[hy, wx] = i 151 | 152 | return idx_map 153 | 154 | 155 | ''' 156 | homographic augmentation 157 | ''' 158 | def homographic_augmentation(image, points, config): 159 | ''' 160 | inputs: 161 | image: numpy array 162 | points: N * 2, [hy, wx] 163 | config 164 | ''' 165 | H = sample_homography(image.shape[:2], **config['params']) 166 | 167 | warped_image = cv2.warpPerspective(image, H, (image.shape[1], image.shape[0])) 168 | 169 | if points.shape[0] > 0: 170 | warped_points = warp_points(H, points) 171 | warped_points = filter_points(image.shape[:2], warped_points) 172 | else: 173 | warped_points = points 174 | 175 | valid_mask = np.ones(image.shape[:2], dtype=np.uint8) * 255 176 | warped_valid_mask = cv2.warpPerspective(valid_mask, H, (valid_mask.shape[1], valid_mask.shape[0])) 177 | k = np.ones((config['valid_border_margin'], config['valid_border_margin']), np.uint8) 178 | warped_valid_mask = cv2.erode(warped_valid_mask, k, iterations=1) 179 | warped_valid_mask = (warped_valid_mask > 0).astype(int) 180 | 181 | return warped_image, warped_points, warped_valid_mask, H 182 | 183 | ''' 184 | photometric augmentation 185 | ''' 186 | def photometric_augmentation(image, points, config): 187 | ''' 188 | inputs: 189 | image: numpy array 190 | points: N * 2, [hy, wx] 191 | config 192 | ''' 193 | primitives = config['primitives'] 194 | fun_name = sample(primitives, 1)[0] 195 | fun_config = config['params'][fun_name] 196 | 197 | if len(image.shape) == 3: 198 | img = image[:, :, 0] 199 | else: 200 | img = image 201 | 202 | aug = getattr(photaug, fun_name) 203 | img, points = aug(img, np.flip(points, 1), **fun_config) 204 | 205 | img = cv2.merge([img, img, img]) 206 | points = np.flip(points, 1) 207 | 208 | return img, points 209 | 210 | 211 | ''' 212 | mask augmentation 213 | ''' 214 | def mask_augmentation(masks, config): 215 | ''' 216 | inputs: 217 | image: numpy array or List[numpy array] 218 | config 219 | ''' 220 | primitives = config['primitives'] 221 | 222 | new_masks = [] 223 | for mask in masks: 224 | fun_name = sample(primitives, 1)[0] 225 | fun_config = config['params'][fun_name] 226 | aug = getattr(maskaug, fun_name) 227 | mask = aug(mask, **fun_config) 228 | new_masks.append(mask) 229 | 230 | return np.stack(new_masks) -------------------------------------------------------------------------------- /datasets/utils/transforms.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch import nn, Tensor 4 | from torch.nn import functional as F 5 | import torchvision 6 | from typing import List, Tuple, Dict, Optional 7 | 8 | @torch.jit.unused 9 | def _resize_image_and_masks_onnx(image, self_min_size, self_max_size, target): 10 | # type: (Tensor, float, float, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]] 11 | from torch.onnx import operators 12 | im_shape = operators.shape_as_tensor(image)[-2:] 13 | min_size = torch.min(im_shape).to(dtype=torch.float32) 14 | max_size = torch.max(im_shape).to(dtype=torch.float32) 15 | scale_factor = torch.min(self_min_size / min_size, self_max_size / max_size) 16 | 17 | image = torch.nn.functional.interpolate( 18 | image[None], scale_factor=scale_factor, mode='bilinear', recompute_scale_factor=True, 19 | align_corners=False)[0] 20 | 21 | if target is None: 22 | return image, target 23 | 24 | if "masks" in target: 25 | mask = target["masks"] 26 | mask = F.interpolate(mask[:, None].float(), scale_factor=scale_factor)[:, 0].byte() 27 | target["masks"] = mask 28 | return image, target 29 | 30 | 31 | def _resize_image_and_masks(image, self_min_size, self_max_size, target): 32 | # type: (Tensor, float, float, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]] 33 | im_shape = torch.tensor(image.shape[-2:]) 34 | min_size = float(torch.min(im_shape)) 35 | max_size = float(torch.max(im_shape)) 36 | scale_factor = self_min_size / min_size 37 | if max_size * scale_factor > self_max_size: 38 | scale_factor = self_max_size / max_size 39 | image = torch.nn.functional.interpolate( 40 | image[None], scale_factor=scale_factor, mode='bilinear', recompute_scale_factor=True, 41 | align_corners=False)[0] 42 | 43 | if target is None: 44 | return image, target 45 | 46 | if "masks" in target: 47 | mask = target["masks"] 48 | mask = F.interpolate(mask[:, None].float(), scale_factor=scale_factor)[:, 0].byte() 49 | target["masks"] = mask 50 | return image, target 51 | 52 | def normalize(image, image_mean, image_std): 53 | dtype, device = image.dtype, image.device 54 | mean = torch.as_tensor(image_mean, dtype=dtype, device=device) 55 | std = torch.as_tensor(image_std, dtype=dtype, device=device) 56 | return (image - mean[:, None, None]) / std[:, None, None] 57 | 58 | def resize(image, target, min_size, max_size): 59 | # type: (Tensor, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]] 60 | h, w = image.shape[-2:] 61 | if torchvision._is_tracing(): 62 | image, target = _resize_image_and_masks_onnx(image, min_size, float(max_size), target) 63 | else: 64 | image, target = _resize_image_and_masks(image, min_size, float(max_size), target) 65 | 66 | if target is None: 67 | return image, target 68 | 69 | bbox = target["boxes"] 70 | bbox = resize_boxes(bbox, (h, w), image.shape[-2:]) 71 | target["boxes"] = bbox 72 | 73 | if "keypoints" in target: 74 | keypoints = target["keypoints"] 75 | keypoints = resize_keypoints(keypoints, (h, w), image.shape[-2:]) 76 | target["keypoints"] = keypoints 77 | return image, target 78 | 79 | # _onnx_batch_images() is an implementation of 80 | # batch_images() that is supported by ONNX tracing. 81 | @torch.jit.unused 82 | def _onnx_batch_images(images, size_divisible=32): 83 | # type: (List[Tensor], int) -> Tensor 84 | max_size = [] 85 | for i in range(images[0].dim()): 86 | max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64) 87 | max_size.append(max_size_i) 88 | stride = size_divisible 89 | max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64) 90 | max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64) 91 | max_size = tuple(max_size) 92 | 93 | # work around for 94 | # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 95 | # which is not yet supported in onnx 96 | padded_imgs = [] 97 | for img in images: 98 | padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] 99 | padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0])) 100 | padded_imgs.append(padded_img) 101 | 102 | return torch.stack(padded_imgs) 103 | 104 | def resize_keypoints(keypoints, original_size, new_size): 105 | # type: (Tensor, List[int], List[int]) -> Tensor 106 | ratios = [ 107 | torch.tensor(s, dtype=torch.float32, device=keypoints.device) / 108 | torch.tensor(s_orig, dtype=torch.float32, device=keypoints.device) 109 | for s, s_orig in zip(new_size, original_size) 110 | ] 111 | ratio_h, ratio_w = ratios 112 | resized_data = keypoints.clone() 113 | if torch._C._get_tracing_state(): 114 | resized_data_0 = resized_data[:, :, 0] * ratio_w 115 | resized_data_1 = resized_data[:, :, 1] * ratio_h 116 | resized_data = torch.stack((resized_data_0, resized_data_1, resized_data[:, :, 2]), dim=2) 117 | else: 118 | resized_data[..., 0] *= ratio_w 119 | resized_data[..., 1] *= ratio_h 120 | return resized_data 121 | 122 | 123 | def resize_boxes(boxes, original_size, new_size): 124 | # type: (Tensor, List[int], List[int]) -> Tensor 125 | ratios = [ 126 | torch.tensor(s, dtype=torch.float32, device=boxes.device) / 127 | torch.tensor(s_orig, dtype=torch.float32, device=boxes.device) 128 | for s, s_orig in zip(new_size, original_size) 129 | ] 130 | ratio_height, ratio_width = ratios 131 | xmin, ymin, xmax, ymax = boxes.unbind(1) 132 | 133 | xmin = xmin * ratio_width 134 | xmax = xmax * ratio_width 135 | ymin = ymin * ratio_height 136 | ymax = ymax * ratio_height 137 | return torch.stack((xmin, ymin, xmax, ymax), dim=1) 138 | -------------------------------------------------------------------------------- /datasets/vot/vot_tracking.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import torch 8 | from torch.utils.data import Dataset 9 | from torchvision import transforms 10 | import numpy as np 11 | import numbers 12 | import random 13 | import os 14 | import cv2 15 | 16 | class VotTracking(Dataset): 17 | def __init__(self, data_root, id): 18 | image_dir = os.path.join(data_root, id, "images") 19 | label_file = os.path.join(data_root, id, "configs/groundtruth.txt") 20 | image_names = os.listdir(image_dir) 21 | image_names.sort() 22 | 23 | self.image_dir = image_dir 24 | self.image_names = image_names 25 | self.length = len(image_names) 26 | self.track_gt = self.read_label_file(label_file) 27 | self.transform = transforms.ToTensor() 28 | 29 | def __len__(self): 30 | return self.length 31 | 32 | def __getitem__(self, idx): 33 | image_path = os.path.join(self.image_dir, self.image_names[idx]) 34 | image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) 35 | 36 | if len(image.shape) == 2: 37 | image = cv2.merge([image, image, image]) 38 | 39 | # image = self.transform(image) 40 | 41 | return {'image': image, 'image_name': self.image_names[idx]} 42 | 43 | def read_label_file(self, file_path): 44 | track_gt = {} 45 | 46 | fo = open(file_path, "r") 47 | i = 0 48 | for line in fo.readlines(): 49 | line = line.strip('\n') 50 | line = line.split(',') 51 | track_id = 0 52 | frame_id = i 53 | # x1, y1, w, h 54 | if len(line) == 4: 55 | x1, y1, w, h = float(line[0]), float(line[1]), float(line[2]), float(line[3]) 56 | x2, y2 = (x1 + w - 1), (y1 + h - 1) 57 | else: 58 | assert len(line) == 8 59 | line = [float(l) for l in line] 60 | line = np.array(line) 61 | x1, x2 = min(line[0::2]), max(line[0::2]) 62 | y1, y2 = min(line[1::2]), max(line[1::2]) 63 | 64 | box = [x1, y1, x2, y2] 65 | object_info = {'frame_id':frame_id, 'track_id':track_id, 'box':box, } 66 | i = i + 1 67 | image_name = self.image_names[frame_id] 68 | track_gt[image_name] = object_info 69 | 70 | fo.close() 71 | 72 | return track_gt 73 | 74 | def get_label(self, r): 75 | if type(r) == type(0): 76 | image_name = self.image_names[r] 77 | else: 78 | image_name = r 79 | 80 | if image_name in self.track_gt: 81 | image_info = self.track_gt[image_name] 82 | else: 83 | image_info = None 84 | 85 | return image_info 86 | 87 | def image_size(self): 88 | ''' 89 | H, W 90 | ''' 91 | image_path = os.path.join(self.image_dir, self.image_names[0]) 92 | image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) 93 | return image.shape[-2:] 94 | -------------------------------------------------------------------------------- /debug_tools/command.txt: -------------------------------------------------------------------------------- 1 | python validate_maskrcnn.py -c config/maskrcnn_coco.yaml -g 1 -s saving/debug/ -d /home/haoyuefan/xk_data/superpoint/coco/full/ -m saving/models/object_rcnn3/object_rcnn_iter124000.pth 2 | 3 | python train_maskrcnn.py -c config/maskrcnn_coco.yaml -d /home/haoyuefan/xk_data/superpoint/coco/full/ -s saving/models/ -g 1 -m saving/models/object_rcnn3/object_rcnn_iter124000.pth 4 | 5 | python debug_tools/show_match.py -d /home/haoyuefan/xk_data/superpoint/match_debug/pairs/ -s /home/haoyuefan/xk_data/superpoint/match_debug/result/ -g 1 -c config/maskrcnn_coco.yaml -m saving/models/object_rcnn_iter205000.pth 6 | 7 | python train_synthetic.py -c config/superpoint_synthetic.yaml -d /home/haoyuefan/xk_data/superpoint/synthetic_dataset -g 1 -s saving/models/synthetic -p 1 -m saving/models/pretrained/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth -------------------------------------------------------------------------------- /debug_tools/draw_points.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | # sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages/') 8 | import numpy as np 9 | import time 10 | import sys 11 | import os 12 | import argparse 13 | from datetime import datetime 14 | import yaml 15 | import cv2 16 | 17 | from datasets.utils.pipeline import makedir 18 | 19 | data_root = "/home/haoyuefan/xk_data/superpoint/coco/full/coco" 20 | images_dir = os.path.join(data_root, "train2014") 21 | points_dir = os.path.join(data_root, "train2014_points") 22 | debug_dir = os.path.join(data_root, "show/points") 23 | makedir(debug_dir) 24 | 25 | image_names = os.listdir(images_dir) 26 | for image_name in image_names: 27 | image_path = os.path.join(images_dir, image_name) 28 | vis_image = os.path.join(debug_dir, image_name) 29 | file_name = image_name.split('.')[0] 30 | point_path = os.path.join(points_dir, '{}.txt'.format(file_name)) 31 | 32 | img = cv2.imread(image_path) 33 | points = np.loadtxt(point_path, ndmin=2) 34 | for j in range(points.shape[0]): 35 | x = points[j][0].astype(int) 36 | y = points[j][1].astype(int) 37 | if(x < 0) : 38 | break 39 | cv2.circle(img, (y,x), 1, (0,0,255), thickness=-1) 40 | 41 | cv2.imwrite(vis_image, img) 42 | -------------------------------------------------------------------------------- /debug_tools/show_batch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | from matplotlib import pyplot as plt 8 | import torchvision 9 | 10 | def show_batch(batch): 11 | grid = torchvision.utils.make_grid(batch) 12 | plt.imshow(grid.numpy()[::-1].transpose((1, 2, 0))) 13 | plt.title('Batch') 14 | plt.show() 15 | 16 | def show_batch_opencv(batch): 17 | T = torchvision.transforms.ToTensor() 18 | batch = [T(img) for img in batch] 19 | batch = torch.stack(batch) 20 | show_batch(batch) 21 | 22 | def show_numpy(img): 23 | plt.imshow(img) 24 | plt.title('Image') 25 | plt.show() -------------------------------------------------------------------------------- /debug_tools/show_detections.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import datetime 8 | import logging 9 | import os 10 | import time 11 | import argparse 12 | import yaml 13 | import cv2 14 | import torch 15 | import torch.distributed as dist 16 | from torchvision import transforms 17 | import torch.optim as optim 18 | from torch.autograd import Variable 19 | from torch.optim import lr_scheduler 20 | 21 | from model.build_model import build_maskrcnn, build_superpoint_model 22 | from model.inference import detection_inference 23 | 24 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 25 | 26 | def read_image(img_path): 27 | image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 28 | if len(image.shape) == 2: 29 | image = cv2.merge([image, image, image]) 30 | return image 31 | 32 | def show_detections(configs): 33 | # read configs 34 | ## command line config 35 | use_gpu = configs['use_gpu'] 36 | save_dir = configs['save_dir'] 37 | data_root = configs['data_root'] 38 | ## data cofig 39 | data_config = configs['data'] 40 | ## superpoint model config 41 | superpoint_model_config = configs['model']['superpoint'] 42 | detection_threshold = superpoint_model_config['detection_threshold'] 43 | gaussian_radius = 2 44 | ## others 45 | configs['num_gpu'] = [0] 46 | configs['public_model'] = 0 47 | 48 | # model 49 | maskrcnn_model = build_maskrcnn(configs) 50 | superpoint_model = build_superpoint_model(configs) 51 | 52 | # data 53 | image_names = os.listdir(data_root) 54 | 55 | transform = transforms.ToTensor() 56 | with torch.no_grad(): 57 | maskrcnn_model.eval() 58 | for image_name in image_names: 59 | print(image_name) 60 | image_path = os.path.join(data_root, image_name) 61 | image = read_image(image_path) 62 | image = transform(image) 63 | image = image.unsqueeze(0) 64 | batch = {'image': image, 'image_name': [image_name]} 65 | 66 | detection_inference(maskrcnn_model, superpoint_model, batch, use_gpu, 1, 67 | detection_threshold, data_config, save_dir=save_dir) 68 | 69 | 70 | def main(): 71 | parser = argparse.ArgumentParser(description="Training") 72 | parser.add_argument( 73 | "-c", "--config_file", 74 | dest = "config_file", 75 | type = str, 76 | default = "" 77 | ) 78 | parser.add_argument( 79 | "-g", "--gpu", 80 | dest = "gpu", 81 | type = int, 82 | default = 0 83 | ) 84 | parser.add_argument( 85 | "-s", "--save_dir", 86 | dest = "save_dir", 87 | type = str, 88 | default = "" 89 | ) 90 | parser.add_argument( 91 | "-d", "--data_root", 92 | dest = "data_root", 93 | type = str, 94 | default = "" 95 | ) 96 | parser.add_argument( 97 | "-mm", "--maskrcnn_model_path", 98 | dest = "maskrcnn_model_path", 99 | type = str, 100 | default = "" 101 | ) 102 | parser.add_argument( 103 | "-sm", "--superpoint_model_path", 104 | dest = "superpoint_model_path", 105 | type = str, 106 | default = "" 107 | ) 108 | args = parser.parse_args() 109 | 110 | config_file = args.config_file 111 | f = open(config_file, 'r', encoding='utf-8') 112 | configs = f.read() 113 | configs = yaml.load(configs) 114 | configs['use_gpu'] = args.gpu 115 | configs['save_dir'] = args.save_dir 116 | configs['data_root'] = args.data_root 117 | configs['maskrcnn_model_path'] = args.maskrcnn_model_path 118 | configs['superpoint_model_path'] = args.superpoint_model_path 119 | 120 | show_detections(configs) 121 | 122 | if __name__ == "__main__": 123 | main() 124 | 125 | 126 | -------------------------------------------------------------------------------- /debug_tools/show_match.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import os 8 | 9 | import torch 10 | from torchvision import transforms 11 | import yaml 12 | import cv2 13 | import numpy as np 14 | import argparse 15 | 16 | from model.build_model import build_superpoint_model 17 | from model.inference import maskrcnn_inference 18 | from datasets.utils.pipeline import makedir 19 | from debug_tools.show_batch import show_batch, show_numpy 20 | from utils.tools import tensor_to_numpy 21 | from datasets.utils.postprocess import nms_fast 22 | from kornia.feature import match_nn 23 | 24 | os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1, 2, 3" 25 | 26 | def process(prob, desc, border_remove, nms_dist): 27 | # Convert pytorch -> numpy. 28 | heatmap = prob.squeeze() # H * W 29 | desc_data = desc.squeeze() # 256 * H * W 30 | ys, xs = np.where(heatmap > 0) # Confidence threshold. 31 | if len(xs) == 0: 32 | return None, None 33 | pts = np.zeros((3, len(xs))) # Populate point data sized 3xN. 34 | pts[0, :] = xs 35 | pts[1, :] = ys 36 | pts[2, :] = heatmap[ys, xs] 37 | H, W = heatmap.shape[-2:] 38 | pts, _ = nms_fast(pts, H, W, dist_thresh=nms_dist) 39 | inds = np.argsort(pts[2,:]) 40 | pts = pts[:,inds[::-1]] # Sort by confidence. 41 | # Remove points along border. 42 | bord = border_remove 43 | toremoveW = np.logical_or(pts[0, :] < bord, pts[0, :] >= (W-bord)) 44 | toremoveH = np.logical_or(pts[1, :] < bord, pts[1, :] >= (H-bord)) 45 | toremove = np.logical_or(toremoveW, toremoveH) 46 | pts = pts[:, ~toremove] 47 | 48 | desc_point = [] 49 | for i in range(pts.shape[1]): 50 | xs = int(pts[0][i]) 51 | ys = int(pts[1][i]) 52 | desc_point = desc_point + [desc_data[:, ys, xs]] 53 | 54 | desc_point = np.stack(desc_point) 55 | return pts, desc_point 56 | 57 | def extract_desc(img_path, model, detection_thr, img_new_size): 58 | 59 | transform = transforms.ToTensor() 60 | 61 | image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 62 | image = cv2.resize(image, tuple(img_new_size[::-1]), interpolation=cv2.INTER_LINEAR) 63 | if len(image.shape) == 2: 64 | image = cv2.merge([image, image, image]) 65 | 66 | img = transform(image) 67 | img = img.unsqueeze(0) 68 | points_output = model(img) 69 | 70 | # process point 71 | prob = points_output['prob'].cpu().detach().gt(detection_thr).float().numpy() 72 | desc = points_output['desc'].cpu().detach().float().numpy() 73 | points, descs = process(prob, desc, 4, 4) 74 | 75 | return image, points, descs 76 | 77 | def draw_points(points, img, color=(255,0,0)): 78 | for j in range(points.shape[1]): 79 | x = points[0][j].astype(int) 80 | y = points[1][j].astype(int) 81 | if x < 0: 82 | break 83 | cv2.circle(img, (x,y), 1, color, thickness=-1) 84 | return img 85 | 86 | def generate_pair_result(dataroot, name, save_dir, model, detection_thr, img_new_size): 87 | with torch.no_grad(): 88 | model.eval() 89 | 90 | pair_path = os.path.join(dataroot, name) 91 | image_names = os.listdir(pair_path) 92 | 93 | img1_path = os.path.join(pair_path, image_names[0]) 94 | img2_path = os.path.join(pair_path, image_names[1]) 95 | 96 | img1, points1, desc1 = extract_desc(img1_path, model, detection_thr, img_new_size) 97 | img2, points2, desc2 = extract_desc(img2_path, model, detection_thr, img_new_size) 98 | 99 | h, w = img1.shape[:2] 100 | 101 | desc1 = torch.tensor(desc1) 102 | desc2 = torch.tensor(desc2) 103 | 104 | dis, match = match_nn(desc1, desc2) 105 | dis, match = dis.squeeze(1).numpy(), match.numpy() 106 | img1 = draw_points(points1, img1) 107 | img2 = draw_points(points2, img2, (0, 255, 0)) 108 | 109 | img = np.concatenate([img1, img2], 1) 110 | for i in range(match.shape[0]): 111 | if dis[i] > 0.7 : 112 | continue 113 | 114 | idx1 = int(match[i, 0]) 115 | idx2 = int(match[i, 1]) 116 | 117 | px1, py1 = int(points1[0][idx1]), int(points1[1][idx1]) 118 | px2, py2 = int(points2[0][idx2] + w), int(points2[1][idx2]) 119 | 120 | p1 = (px1, py1) 121 | p2 = (px2, py2) 122 | 123 | a = np.random.randint(0,255) 124 | b = np.random.randint(0,255) 125 | c = np.random.randint(0,255) 126 | 127 | cv2.line(img, (px1, py1), (px2, py2), (a, b, c), 1) 128 | 129 | save_name = name + ".png" 130 | save_path = os.path.join(save_dir, save_name) 131 | cv2.imwrite(save_path, img) 132 | 133 | def show_match(configs): 134 | 135 | # read configs 136 | save_dir = configs['save_dir'] 137 | data_root = configs['data_root'] 138 | superpoint_model_config = configs['model']['superpoint'] 139 | detection_thr = superpoint_model_config['detection_threshold'] 140 | img_new_size = configs['img_new_size'] 141 | configs['num_gpu'] = [0] 142 | configs['public_model'] = 0 143 | 144 | # model 145 | superpoint_model = build_superpoint_model(configs, requires_grad=False) 146 | superpoint_model.eval() 147 | 148 | 149 | pair_names = os.listdir(data_root) 150 | 151 | for pair_name in pair_names: 152 | generate_pair_result(data_root, pair_name, save_dir, superpoint_model, detection_thr, img_new_size) 153 | 154 | 155 | def main(): 156 | parser = argparse.ArgumentParser(description="show match") 157 | parser.add_argument( 158 | "-c", "--config_file", 159 | dest = "config_file", 160 | type = str, 161 | default = "" 162 | ) 163 | parser.add_argument( 164 | "-g", "--gpu", 165 | dest = "gpu", 166 | type = int, 167 | default = 0 168 | ) 169 | parser.add_argument( 170 | "-s", "--save_dir", 171 | dest = "save_dir", 172 | type = str, 173 | default = "" 174 | ) 175 | parser.add_argument( 176 | "-d", "--data_root", 177 | dest = "data_root", 178 | type = str, 179 | default = "" 180 | ) 181 | parser.add_argument( 182 | "-m", "--model_path", 183 | dest = "superpoint_model_path", 184 | type = str, 185 | default = "" 186 | ) 187 | args = parser.parse_args() 188 | config_file = args.config_file 189 | f = open(config_file, 'r', encoding='utf-8') 190 | configs = f.read() 191 | configs = yaml.load(configs) 192 | configs['use_gpu'] = args.gpu 193 | configs['data_root'] = args.data_root 194 | configs['superpoint_model_path'] = args.superpoint_model_path 195 | configs['save_dir'] = args.save_dir 196 | configs['img_new_size'] = [480, 640] 197 | 198 | show_match(configs) 199 | 200 | if __name__ == "__main__": 201 | main() 202 | -------------------------------------------------------------------------------- /debug_tools/show_points_detection.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import os 8 | 9 | import torch 10 | from torchvision import transforms 11 | import yaml 12 | import cv2 13 | import numpy as np 14 | import argparse 15 | 16 | from model.build_model import build_maskrcnn 17 | from model.inference import maskrcnn_inference 18 | from datasets.utils.pipeline import makedir 19 | from debug_tools.show_batch import show_batch, show_numpy 20 | from utils.tools import tensor_to_numpy 21 | from datasets.utils.postprocess import nms_fast 22 | from kornia.feature import match_nn 23 | 24 | os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1, 2, 3" 25 | 26 | def process(prob, border_remove, nms_dist): 27 | # Convert pytorch -> numpy. 28 | heatmap = prob.squeeze() # H * W 29 | ys, xs = np.where(heatmap > 0) # Confidence threshold. 30 | if len(xs) == 0: 31 | return None, None 32 | pts = np.zeros((3, len(xs))) # Populate point data sized 3xN. 33 | pts[0, :] = xs 34 | pts[1, :] = ys 35 | pts[2, :] = heatmap[ys, xs] 36 | H, W = heatmap.shape[-2:] 37 | pts, _ = nms_fast(pts, H, W, dist_thresh=nms_dist) 38 | inds = np.argsort(pts[2,:]) 39 | pts = pts[:,inds[::-1]] # Sort by confidence. 40 | # Remove points along border. 41 | bord = border_remove 42 | toremoveW = np.logical_or(pts[0, :] < bord, pts[0, :] >= (W-bord)) 43 | toremoveH = np.logical_or(pts[1, :] < bord, pts[1, :] >= (H-bord)) 44 | toremove = np.logical_or(toremoveW, toremoveH) 45 | pts = pts[:, ~toremove] 46 | 47 | return pts 48 | 49 | def extract_points(img_path, model, detection_thr): 50 | 51 | transform = transforms.ToTensor() 52 | 53 | image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 54 | if len(image.shape) == 2: 55 | image = cv2.merge([image, image, image]) 56 | 57 | sizes = {} 58 | original_image_sizes = [image.shape[-2:]] 59 | sizes['original_sizes'] = torch.tensor(original_image_sizes) 60 | sizes['new_sizes'] = torch.tensor(original_image_sizes) 61 | 62 | img = transform(image) 63 | img = img.unsqueeze(0) 64 | _, _, points_output = model(img, sizes) 65 | 66 | # process point 67 | prob = points_output['prob'].cpu().detach().gt(detection_thr).float().numpy() 68 | points = process(prob, 4, 4) 69 | 70 | return image, points 71 | 72 | def draw_points(points, img, color=(255,0,0)): 73 | for j in range(points.shape[1]): 74 | x = points[0][j].astype(int) 75 | y = points[1][j].astype(int) 76 | if x < 0: 77 | break 78 | cv2.circle(img, (x,y), 1, color, thickness=-1) 79 | return img 80 | 81 | def show_image_points(dataroot, image_name, save_dir, model, detection_thr): 82 | with torch.no_grad(): 83 | model.eval() 84 | 85 | img_path = os.path.join(dataroot, image_name) 86 | img, points = extract_points(img_path, model, detection_thr) 87 | h, w = img.shape[:2] 88 | img = draw_points(points, img) 89 | 90 | save_path = os.path.join(save_dir, image_name) 91 | cv2.imwrite(save_path, img) 92 | 93 | def show_points(configs): 94 | 95 | # read configs 96 | save_dir = configs['save_dir'] 97 | data_root = configs['data_root'] 98 | superpoint_model_config = configs['model']['superpoint'] 99 | detection_thr = superpoint_model_config['eval']['detection_threshold'] 100 | configs['num_gpu'] = [0] 101 | configs['public_model'] = 0 102 | 103 | # model 104 | maskrcnn_model = build_maskrcnn(configs) 105 | 106 | image_names = os.listdir(data_root) 107 | 108 | for image_name in image_names: 109 | show_image_points(data_root, image_name, save_dir, maskrcnn_model, detection_thr) 110 | 111 | 112 | def main(): 113 | parser = argparse.ArgumentParser(description="show match") 114 | parser.add_argument( 115 | "-c", "--config_file", 116 | dest = "config_file", 117 | type = str, 118 | default = "" 119 | ) 120 | parser.add_argument( 121 | "-g", "--gpu", 122 | dest = "gpu", 123 | type = int, 124 | default = 0 125 | ) 126 | parser.add_argument( 127 | "-s", "--save_dir", 128 | dest = "save_dir", 129 | type = str, 130 | default = "" 131 | ) 132 | parser.add_argument( 133 | "-d", "--data_root", 134 | dest = "data_root", 135 | type = str, 136 | default = "" 137 | ) 138 | parser.add_argument( 139 | "-m", "--model_path", 140 | dest = "pretrained_model_path", 141 | type = str, 142 | default = "" 143 | ) 144 | args = parser.parse_args() 145 | config_file = args.config_file 146 | f = open(config_file, 'r', encoding='utf-8') 147 | configs = f.read() 148 | configs = yaml.load(configs) 149 | configs['use_gpu'] = args.gpu 150 | configs['data_root'] = args.data_root 151 | configs['pretrained_model_path'] = args.pretrained_model_path 152 | configs['save_dir'] = args.save_dir 153 | 154 | show_points(configs) 155 | 156 | if __name__ == "__main__": 157 | main() 158 | -------------------------------------------------------------------------------- /debug_tools/test_batch_H.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | # sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages/') 8 | import numpy as np 9 | import time 10 | import sys 11 | import os 12 | import argparse 13 | from datetime import datetime 14 | 15 | import torch 16 | import torch.nn as nn 17 | from torch.utils.data import DataLoader 18 | from torchvision import transforms 19 | import yaml 20 | import cv2 21 | 22 | from models.superpoint import SuperPoint 23 | from models.backbone.fcn import VGGNet 24 | from models.vgg_like import VggLike 25 | from models.superobject import SuperObject 26 | from datasets.utils.pipeline import makedir 27 | from debug_tools.show_batch import show_batch, show_numpy 28 | from datasets.testdata.dataloader import TestDataset 29 | from utils.tools import process_point, tensor_to_numpy 30 | from datasets.utils.homographies import sample_homography, warp_batch_images 31 | 32 | def export_points(configs): 33 | # read configs 34 | val_batch_size = configs['model']['batch_size'] 35 | data_root = configs['data_root'] 36 | cell = configs['model']['cell'] 37 | img_new_size = configs['img_new_size'] 38 | 39 | # dataset 40 | val_data = TestDataset(dataroot=data_root, img_new_size=img_new_size) 41 | val_loader = DataLoader(val_data, batch_size=val_batch_size, num_workers=8) 42 | 43 | for iter, batch in enumerate(val_loader): 44 | inputs = batch['image'] 45 | img_shape = inputs.shape[-2:] 46 | H = sample_homography(img_shape, **configs['model']['homography_adaptation']['homographies']) 47 | show_batch(inputs) 48 | warped_img = warp_batch_images(inputs, H) 49 | show_batch(warped_img) 50 | 51 | img = tensor_to_numpy(inputs[0]) 52 | img = cv2.warpPerspective(img, H, (img_shape[1], img_shape[0])) 53 | show_numpy(img) 54 | 55 | def main(): 56 | parser = argparse.ArgumentParser(description="export points") 57 | parser.add_argument( 58 | "-c", "--config_file", 59 | dest = "config_file", 60 | type = str, 61 | default = "" 62 | ) 63 | parser.add_argument( 64 | "-d", "--data_root", 65 | dest = "data_root", 66 | type = str, 67 | default = "" 68 | ) 69 | args = parser.parse_args() 70 | 71 | config_file = args.config_file 72 | f = open(config_file, 'r', encoding='utf-8') 73 | configs = f.read() 74 | configs = yaml.load(configs) 75 | configs['data_root'] = args.data_root 76 | configs['img_new_size'] = [240, 320] 77 | 78 | export_points(configs) 79 | 80 | if __name__ == "__main__": 81 | main() 82 | -------------------------------------------------------------------------------- /debug_tools/test_data_process.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import os 8 | import yaml 9 | import argparse 10 | import copy 11 | import torch 12 | import torchvision 13 | import torchvision.transforms as transforms 14 | from torch.utils.data import Dataset, DataLoader 15 | 16 | from datasets.utils.preprocess import preprocess_train_data 17 | from datasets.utils import postprocess as post 18 | from datasets.utils.batch_collator import BatchCollator 19 | from debug_tools.show_batch import show_batch, show_numpy, show_batch_opencv 20 | from utils.tools import tensor_to_numpy 21 | from datasets.utils.build_data import coco_loader 22 | from torch.nn import functional as F 23 | 24 | from torchvision.models.detection.transform import resize_boxes 25 | from torchvision.models.detection.roi_heads import paste_masks_in_image 26 | 27 | def postT(result, # type: List[Dict[str, Tensor]] 28 | image_shapes, # type: List[Tuple[int, int]] 29 | original_image_sizes # type: List[Tuple[int, int]] 30 | ): 31 | for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)): 32 | boxes = pred["boxes"] 33 | boxes = resize_boxes(boxes, im_s, o_im_s) 34 | result[i]["boxes"] = boxes 35 | if "masks" in pred: 36 | masks = pred["masks"].unsqueeze(1) 37 | scale = min(float(o_im_s[0])/im_s[0], float(o_im_s[1])/im_s[1]) 38 | masks = F.interpolate(masks.float(), scale_factor=scale).squeeze(1).byte() 39 | result[i]["masks"] = masks 40 | return result 41 | 42 | 43 | def test(configs): 44 | # read configs 45 | model_dir = configs['model_dir'] 46 | data_root = configs['data_root'] 47 | data_config = configs['data'] 48 | train_data_name = data_config['TRAIN'] 49 | 50 | debug_dir = "/home/haoyuefan/xk_data/superpoint/coco/debug_results/data_processing" 51 | 52 | # data 53 | loader = coco_loader( 54 | data_root=data_root, name=train_data_name, config=data_config, batch_size=2, remove_images_without_annotations=True) 55 | 56 | for iter, batch in enumerate(loader): 57 | print("iter = {}".format(iter)) 58 | gt = copy.deepcopy(batch) 59 | original_images = batch['image'] 60 | image_names = batch['image_name'] 61 | 62 | images, sizes, maskrcnn_targets, warped_images, superpoint_targets = preprocess_train_data(batch, False, 1, data_config) 63 | 64 | # original_images 65 | original_images = [tensor_to_numpy(img) for img in original_images] 66 | 67 | # sizes 68 | original_sizes = sizes['original_sizes'] 69 | new_sizes = sizes['new_sizes'] 70 | 71 | # maskrcnn 72 | num_images = len(images) 73 | new_targets = [] 74 | for i in range(num_images): 75 | target = {} 76 | num_objs = int(torch.sum(maskrcnn_targets['labels'][i] >= 0).item()) 77 | for k in maskrcnn_targets.keys(): 78 | target[k] = maskrcnn_targets[k][i][:num_objs] 79 | target['scores'] = torch.ones(num_objs) 80 | target['masks'] = target['masks'].float() 81 | new_targets += [target] 82 | maskrcnn_targets = new_targets 83 | maskrcnn_targets = postT(maskrcnn_targets, new_sizes.numpy().tolist(), original_sizes.numpy().tolist()) 84 | 85 | # superpoint 86 | points_probs = superpoint_targets['kpt_map'] 87 | points_desc = torch.ones(len(points_probs), 256, points_probs.shape[-2], points_probs.shape[-1]) 88 | points_output = {'prob':points_probs, 'desc': points_desc} 89 | 90 | detections, points_output = post.postprocess(new_sizes, original_sizes, 0.3, maskrcnn_targets, points_output) 91 | 92 | results = post.save_detection_results(original_images, image_names, debug_dir, detections, None, points_output, True, True) 93 | 94 | # save gt 95 | new_gts = [] 96 | for i in range(len(images)): 97 | new_gt = {} 98 | for k in gt: 99 | new_gt[k] = gt[k][i] 100 | new_gt['scores'] = new_gt['labels'] 101 | new_gts.append(new_gt) 102 | 103 | save_dir_list = [os.path.join(debug_dir, image_name) for image_name in image_names] 104 | images = copy.deepcopy(original_images) 105 | images = post.overlay_objects(images, new_gts, None) 106 | images = post.overlay_points(images, new_gts) 107 | post.save_images(images, save_dir_list, "groundtruth") 108 | 109 | 110 | def main(): 111 | parser = argparse.ArgumentParser(description="Test Process") 112 | parser.add_argument( 113 | "-c", "--config_file", 114 | dest = "config_file", 115 | type = str, 116 | default = "" 117 | ) 118 | parser.add_argument( 119 | "-s", "--save_dir", 120 | dest = "save_dir", 121 | type = str, 122 | default = "" 123 | ) 124 | parser.add_argument( 125 | "-d", "--data_root", 126 | dest = "data_root", 127 | type = str, 128 | default = "" 129 | ) 130 | args = parser.parse_args() 131 | 132 | config_file = args.config_file 133 | f = open(config_file, 'r', encoding='utf-8') 134 | configs = f.read() 135 | configs = yaml.load(configs) 136 | configs['model_dir'] = args.save_dir 137 | configs['data_root'] = args.data_root 138 | 139 | test(configs) 140 | 141 | if __name__ == "__main__": 142 | main() -------------------------------------------------------------------------------- /experiments/compare_tracking.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import os 8 | import yaml 9 | import numpy as np 10 | import argparse 11 | 12 | from experiments.utils.utils import read_tracking_results, plot_tracking_details 13 | 14 | 15 | def filter_pr_curves(results, plot_keys): 16 | pr_curves = results['pr_curves'] 17 | new_pr_curves = {} 18 | for k in pr_curves.keys(): 19 | if k in plot_keys: 20 | new_pr_curves[k] = pr_curves[k] 21 | 22 | results['pr_curves'] = new_pr_curves 23 | return results 24 | 25 | 26 | def compare_tracking(configs): 27 | f1 = configs['file1'] 28 | f2 = configs['file2'] 29 | save_dir = configs['save_dir'] 30 | plot_keys = configs['interval'] 31 | 32 | results1 = read_tracking_results(f1) 33 | results2 = read_tracking_results(f2) 34 | 35 | results1 = filter_pr_curves(results1, plot_keys) 36 | results2 = filter_pr_curves(results2, plot_keys) 37 | 38 | results_list = [results1, results2] 39 | plot_tracking_details(results_list, save_dir, configs=configs) 40 | 41 | 42 | def main(): 43 | parser = argparse.ArgumentParser(description="compare tracking results") 44 | parser.add_argument( 45 | "-f1", "--file1", 46 | dest = "file1", 47 | type = str, 48 | default = "" 49 | ) 50 | parser.add_argument( 51 | "-f2", "--file2", 52 | dest = "file2", 53 | type = str, 54 | default = "" 55 | ) 56 | parser.add_argument( 57 | "-s", "--save_dir", 58 | dest = "save_dir", 59 | type = str, 60 | default = "" 61 | ) 62 | parser.add_argument( 63 | "-c", "--config_file", 64 | dest = "config_file", 65 | type = str, 66 | default = "" 67 | ) 68 | args = parser.parse_args() 69 | config_file = args.config_file 70 | f = open(config_file, 'r', encoding='utf-8') 71 | configs = f.read() 72 | configs = yaml.load(configs) 73 | configs['file1'] = args.file1 74 | configs['file2'] = args.file2 75 | configs['save_dir'] = args.save_dir 76 | 77 | compare_tracking(configs) 78 | 79 | if __name__ == "__main__": 80 | main() -------------------------------------------------------------------------------- /experiments/demo/kitti-relocalization.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sair-lab/AirCode/6f7aaeafa3b6f8c762170431447568855601c684/experiments/demo/kitti-relocalization.gif -------------------------------------------------------------------------------- /experiments/demo/object-matching1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sair-lab/AirCode/6f7aaeafa3b6f8c762170431447568855601c684/experiments/demo/object-matching1.gif -------------------------------------------------------------------------------- /experiments/demo/object-matching2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sair-lab/AirCode/6f7aaeafa3b6f8c762170431447568855601c684/experiments/demo/object-matching2.gif -------------------------------------------------------------------------------- /experiments/object_tracking/single_object_tracking.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import os 8 | 9 | import torch 10 | from torchvision import transforms 11 | import yaml 12 | import cv2 13 | import numpy as np 14 | import argparse 15 | 16 | from model.build_model import build_maskrcnn, build_gcn, build_superpoint_model 17 | from datasets.utils.pipeline import makedir 18 | from datasets.kitti.kitti_tracking import KittiTracking 19 | from datasets.otb.otb_tracking import OtbTracking 20 | from datasets.vot.vot_tracking import VotTracking 21 | 22 | from experiments.object_tracking.object_tracking import update_normal_size, network_output, calculate_pr_curve, box_iou 23 | from experiments.utils.utils import save_tracking_results, plot_pr_curves, plot_tracking_details, get_pr_curve_area 24 | 25 | 26 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 27 | 28 | 29 | def reorder_descs(net_output, tracking_gt): 30 | ''' 31 | reorder the descs, the first is the desc of tracked object 32 | ''' 33 | net_output_boxes = net_output['objects']['boxes'] # N * 4 34 | if len(net_output_boxes) < 1 : 35 | return None 36 | 37 | boxes = torch.tensor(tracking_gt['box']) # 4 38 | boxes = boxes.unsqueeze(0) # 1 * 4 39 | ious = box_iou(boxes, net_output_boxes) # 1 * N 40 | ious = ious.squeeze(0) 41 | 42 | value, index = ious.max(0) 43 | value, index = value.item(), index.item() 44 | 45 | if value > 0.5: 46 | descs = net_output['descs'] 47 | order = [i for i in range(len(descs))] 48 | order[0] = index 49 | order[index] = 0 50 | 51 | descs = descs[order] 52 | else: 53 | descs = None 54 | 55 | return descs 56 | 57 | 58 | def match_objects(object_descs, last_object_descs): 59 | ''' 60 | calculate gt_matrix, match_matrix 61 | ''' 62 | if object_descs is None or last_object_descs is None: 63 | return None, None 64 | 65 | # generate groundtruth pair matrix 66 | def get_gt_and_match(descs1, descs2): 67 | N = len(descs2) 68 | gt_matrix = torch.zeros(N) 69 | gt_matrix[0] = 1.0 70 | 71 | tracked_desc = descs1[0].unsqueeze(0) # 1 * D 72 | match_matrix = torch.einsum('nd,dm->nm', tracked_desc, descs2.t()) # 1 * M 73 | return gt_matrix, match_matrix 74 | 75 | gt1, match1 = get_gt_and_match(last_object_descs, object_descs) 76 | gt2, match2 = get_gt_and_match(object_descs, last_object_descs) 77 | 78 | return [gt1, gt2], [match1, match2] 79 | 80 | 81 | def calculate_pr_curves(object_descs_list, intervals): 82 | pr_curves = {} 83 | for interval in intervals: 84 | gts, matches = [], [] 85 | last_object_descs = None 86 | for i in range(len(object_descs_list)): 87 | if (i % interval) != 0: 88 | continue 89 | 90 | object_descs = object_descs_list[i] 91 | if last_object_descs is not None and object_descs is not None: 92 | gt_matrix, match_matrix = match_objects(object_descs, last_object_descs) 93 | if gt_matrix is not None and match_matrix is not None: 94 | gts += gt_matrix 95 | matches += match_matrix 96 | 97 | last_object_descs = object_descs 98 | 99 | pr_curve = calculate_pr_curve(gts, matches) 100 | pr_curves[interval] = pr_curve 101 | return pr_curves 102 | 103 | 104 | def single_object_tracking(configs): 105 | 106 | # read configs 107 | save_dir = configs['save_dir'] 108 | data_root = configs['data_root'] 109 | model_dir = configs['model_dir'] 110 | dataset_name = configs['data']['name'] 111 | configs['num_gpu'] = [0] 112 | configs['public_model'] = 0 113 | 114 | superpoint_model_path = os.path.join(model_dir, "points_model.pth") 115 | maskrcnn_model_path = os.path.join(model_dir, "maskrcnn_model.pth") 116 | gcn_model_path = os.path.join(model_dir, "gcn_model.pth") 117 | configs["maskrcnn_model_path"] = maskrcnn_model_path 118 | configs["superpoint_model_path"] = superpoint_model_path 119 | configs["graph_model_path"] = gcn_model_path 120 | 121 | # model 122 | superpoint_model = build_superpoint_model(configs, requires_grad=False) 123 | superpoint_model.eval() 124 | 125 | maskrcnn_model = build_maskrcnn(configs) 126 | maskrcnn_model.eval() 127 | 128 | gcn_model = build_gcn(configs) 129 | gcn_model.eval() 130 | 131 | intervals = [1, 2, 3, 5, 10] 132 | seqs = {'kitti':['0002', '0003', '0006', '0010'], 133 | 'otb': ['BlurBody', 'BlurCar2', 'Human2', 'Human7', 'Liquor'], 134 | 'vot': ['bluecar', 'bus6', 'humans_corridor_occ_2_A']} 135 | datasets = {'kitti':KittiTracking, 'otb':OtbTracking, 'vot':VotTracking} 136 | DATASET = datasets[dataset_name] 137 | SEQNAMES = seqs[dataset_name] 138 | # SEQNAMES = [seqs[dataset_name][0]] 139 | 140 | pr_curves_list = [] 141 | for seq in SEQNAMES: 142 | dataset = DATASET(data_root, seq) 143 | 144 | image_size = dataset.image_size() 145 | configs['data']['normal_size'] = update_normal_size(image_size) 146 | 147 | object_descs_list = [] 148 | for data in dataset: 149 | image = data['image'] 150 | image_name = data['image_name'] 151 | print(image_name) 152 | 153 | net_output = network_output(image, superpoint_model, maskrcnn_model, gcn_model, configs) 154 | net_output = {'points': net_output[0], 'objects': net_output[1], 'descs': net_output[2]} 155 | 156 | tracking_gt = dataset.get_label(image_name) 157 | if tracking_gt is None or net_output['points'] is None: 158 | object_descs_list.append(None) 159 | continue 160 | 161 | object_descs = reorder_descs(net_output, tracking_gt) 162 | object_descs_list.append(object_descs) 163 | 164 | pr_curves = calculate_pr_curves(object_descs_list, intervals) 165 | pr_curves_list.append(pr_curves) 166 | print(pr_curves) 167 | 168 | # plot 169 | new_pr_curves, areas = {}, {} 170 | for k in pr_curves_list[0].keys(): 171 | pr_curve_list = [torch.tensor(pr_curves[k]) for pr_curves in pr_curves_list] 172 | pr_curve_list = torch.stack(pr_curve_list) # N * 10 * 2 173 | new_pr_curve = torch.mean(pr_curve_list, 0) 174 | new_pr_curves[k] = new_pr_curve.cpu().numpy().tolist() 175 | areas[k] = get_pr_curve_area(new_pr_curves[k]) 176 | 177 | plot_pr_curves(new_pr_curves, "otb_tracking", save_dir) 178 | 179 | # # save results to yaml 180 | results = {'dataset':dataset_name, 'model':"ours", 'areas': areas, 'pr_curves': new_pr_curves} 181 | save_tracking_results(results, save_dir) 182 | 183 | 184 | def main(): 185 | parser = argparse.ArgumentParser(description="show match") 186 | parser.add_argument( 187 | "-c", "--config_file", 188 | dest = "config_file", 189 | type = str, 190 | default = "" 191 | ) 192 | parser.add_argument( 193 | "-g", "--gpu", 194 | dest = "gpu", 195 | type = int, 196 | default = 0 197 | ) 198 | parser.add_argument( 199 | "-s", "--save_dir", 200 | dest = "save_dir", 201 | type = str, 202 | default = "" 203 | ) 204 | parser.add_argument( 205 | "-d", "--data_root", 206 | dest = "data_root", 207 | type = str, 208 | default = "" 209 | ) 210 | parser.add_argument( 211 | "-m", "--model_dir", 212 | dest = "model_dir", 213 | type = str, 214 | default = "" 215 | ) 216 | args = parser.parse_args() 217 | config_file = args.config_file 218 | f = open(config_file, 'r', encoding='utf-8') 219 | configs = f.read() 220 | configs = yaml.load(configs) 221 | configs['use_gpu'] = args.gpu 222 | configs['data_root'] = args.data_root 223 | configs['model_dir'] = args.model_dir 224 | configs['save_dir'] = args.save_dir 225 | 226 | single_object_tracking(configs) 227 | 228 | if __name__ == "__main__": 229 | main() 230 | -------------------------------------------------------------------------------- /experiments/place_recogination/offline_process.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import os 8 | 9 | import torch 10 | from torchvision import transforms 11 | import yaml 12 | import cv2 13 | import numpy as np 14 | import argparse 15 | 16 | from datasets.utils.pipeline import makedir 17 | from datasets.kitti.kitti_odomery import KittiOdometry 18 | from datasets.utils import postprocess as post 19 | 20 | from experiments.object_tracking.object_tracking import update_normal_size, network_output, calculate_pr_curve, box_iou 21 | from experiments.utils.utils import save_tracking_results, plot_pr_curves, plot_tracking_details, get_pr_curve_area 22 | 23 | 24 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 25 | 26 | def filter_objects(descs, objects, target_labels): 27 | new_descs = [] 28 | new_objects = {} 29 | for k in objects.keys(): 30 | new_objects[k] = [] 31 | 32 | for i in range(len(descs)): 33 | label = objects['labels'][i] 34 | score = objects['scores'][i] 35 | if label in target_labels and score > 0.5: 36 | new_descs.append(descs[i]) 37 | for k in objects.keys(): 38 | new_objects[k].append(objects[k][i]) 39 | 40 | if len(new_descs) < 1: 41 | return None, None 42 | 43 | new_descs = np.vstack(new_descs) 44 | for k in objects.keys(): 45 | new_objects[k] = np.vstack(new_objects[k]) 46 | 47 | return new_descs, new_objects 48 | 49 | def relocalization_offline(configs): 50 | 51 | # read configs 52 | save_dir = configs['save_dir'] 53 | data_root = configs['data_root'] 54 | net_output_dir = configs['net_output_dir'] 55 | dataset_name = configs['data']['name'] 56 | 57 | # data 58 | seqs = ['00', '05', '06'] 59 | similarity_thr = 0.95 60 | results = {} 61 | for seq in seqs: 62 | # dataset 63 | dataset = KittiOdometry(data_root, seq) 64 | dis_thr = dataset.dis_thr 65 | angle_thr = dataset.angle_thr 66 | interval = dataset.interval 67 | loop_gt, num_loop_gt = dataset.get_loop_gt() 68 | 69 | seq_net_output_dir = os.path.join(net_output_dir, seq) 70 | descs_list = [] 71 | image_name_list = [] 72 | image_indexes = [] 73 | predict_loops = {} 74 | for data in dataset: 75 | image = data['image'] 76 | image_name = data['image_name'] 77 | gt = dataset.get_label(image_name) 78 | print(image_name) 79 | 80 | image_net_output_dir = os.path.join(seq_net_output_dir, image_name) 81 | # load points 82 | points = [] 83 | points_dir = os.path.join(image_net_output_dir, "points") 84 | if not os.path.exists(points_dir): 85 | continue 86 | points_file_names = os.listdir(points_dir) 87 | for points_file_name in points_file_names: 88 | p_path = os.path.join(points_dir, points_file_name) 89 | p = np.load(p_path, allow_pickle=True) 90 | points.append(p) 91 | 92 | # load descs 93 | descs_path = os.path.join(image_net_output_dir, "descs.npy") 94 | descs = np.load(descs_path) 95 | 96 | # load objects 97 | objects = {} 98 | objects_dir = os.path.join(image_net_output_dir, "objects") 99 | objects_file_names = os.listdir(objects_dir) 100 | for objects_file_name in objects_file_names: 101 | key = objects_file_name.split('.')[0] 102 | value_path = os.path.join(objects_dir, objects_file_name) 103 | objects[key] = np.load(value_path) 104 | # objects[key] = torch.tensor(objects[key]) 105 | 106 | # original_images = [image] 107 | # image_names = [image_name] 108 | # detections = [objects] 109 | # results = post.save_detection_results(original_images, image_names, save_dir, detections, 110 | # None, None, True, False) 111 | 112 | 113 | target_labels = [3, 8] 114 | descs, objects = filter_objects(descs, objects, target_labels) 115 | if descs is None: 116 | continue 117 | 118 | if gt['index'] > interval and len(descs_list) > 0: 119 | # find loop 120 | max_score = 0 121 | match_image = "" 122 | for descs_i, image_name_i, image_index_i in zip(descs_list, image_name_list, image_indexes): 123 | if gt['index'] - image_index_i < interval: 124 | break 125 | 126 | descs_similarity = descs.dot(descs_i.T) # m * n 127 | matches = descs_similarity > similarity_thr 128 | matches = matches * descs_similarity 129 | matches = np.max(matches, 1) 130 | # decide to match 131 | good_match = 0 132 | score = np.sum(matches) 133 | num_nonzero = np.sum((matches > 0)) 134 | mean_match = np.mean(matches) 135 | if score > 0 and mean_match > 0.3: 136 | good_match = 1 137 | 138 | m, n = descs_similarity.shape 139 | if m - n > 2 or n - m > 2: 140 | good_match = 0 141 | 142 | if good_match and score > max_score: 143 | max_score = score 144 | match_image = image_name_i 145 | predict_loops[image_name] = {'image_name': match_image, 'score': max_score} 146 | 147 | descs_list.append(descs) 148 | image_name_list.append(image_name) 149 | image_indexes.append(gt['index']) 150 | 151 | # calculate pr 152 | # calculate prediction 153 | num_loop_prediction, num_correct_prediction = 0, 0 154 | for image_name in predict_loops.keys(): 155 | predict_loop = predict_loops[image_name] 156 | if predict_loop['score'] > 0: 157 | num_loop_prediction += 1 158 | loop_image_name = predict_loop['image_name'] 159 | gt1 = dataset.get_label(image_name) 160 | gt2 = dataset.get_label(loop_image_name) 161 | 162 | idx1, p1, R1 = gt1['index'], gt1['position'], gt1['rotation'] 163 | idx2, p2, R2 = gt2['index'], gt2['position'], gt2['rotation'] 164 | 165 | dp = np.linalg.norm((p1-p2)) 166 | dR = R1.dot(R2.T) 167 | dr, _ = cv2.Rodrigues(dR) 168 | d_angle = np.linalg.norm(dr) 169 | d_idx = idx2 - idx1 170 | d_idx = d_idx if d_idx > 0 else (-d_idx) 171 | if (d_idx > interval) and (dp < dis_thr) and (d_angle < angle_thr): 172 | num_correct_prediction += 1 173 | # else: 174 | # print("img1 = {}, img2 = {}".format(image_name, loop_image_name)) 175 | 176 | precision = float(num_correct_prediction) / num_loop_prediction if num_loop_prediction > 0 else 1 177 | recall = float(num_correct_prediction) / num_loop_gt if num_loop_gt > 0 else 1 178 | 179 | results[seq] = {'precision': precision, 'recall': recall} 180 | 181 | print(results) 182 | file_name = "kitti_odometry_pr.yaml" 183 | file_path = os.path.join(save_dir, file_name) 184 | fp = open(file_path, 'w') 185 | fp.write(yaml.dump(results)) 186 | 187 | 188 | def main(): 189 | parser = argparse.ArgumentParser(description="show match") 190 | parser.add_argument( 191 | "-c", "--config_file", 192 | dest = "config_file", 193 | type = str, 194 | default = "" 195 | ) 196 | parser.add_argument( 197 | "-s", "--save_dir", 198 | dest = "save_dir", 199 | type = str, 200 | default = "" 201 | ) 202 | parser.add_argument( 203 | "-n", "--net_output_dir", 204 | dest = "net_output_dir", 205 | type = str, 206 | default = "" 207 | ) 208 | parser.add_argument( 209 | "-d", "--data_root", 210 | dest = "data_root", 211 | type = str, 212 | default = "" 213 | ) 214 | args = parser.parse_args() 215 | config_file = args.config_file 216 | f = open(config_file, 'r', encoding='utf-8') 217 | configs = f.read() 218 | configs = yaml.load(configs) 219 | configs['data_root'] = args.data_root 220 | configs['net_output_dir'] = args.net_output_dir 221 | configs['save_dir'] = args.save_dir 222 | 223 | relocalization_offline(configs) 224 | 225 | if __name__ == "__main__": 226 | main() 227 | -------------------------------------------------------------------------------- /experiments/place_recogination/offline_topK.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import os 8 | 9 | import torch 10 | from torchvision import transforms 11 | import yaml 12 | import cv2 13 | import numpy as np 14 | import argparse 15 | 16 | from datasets.utils.pipeline import makedir 17 | from datasets.kitti.kitti_odomery import KittiOdometry 18 | from datasets.utils import postprocess as post 19 | 20 | from experiments.object_tracking.object_tracking import update_normal_size, network_output, calculate_pr_curve, box_iou 21 | from experiments.utils.utils import save_tracking_results, plot_pr_curves, plot_tracking_details, get_pr_curve_area 22 | 23 | 24 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 25 | 26 | def filter_objects(descs, objects, target_labels): 27 | new_descs = [] 28 | new_objects = {} 29 | for k in objects.keys(): 30 | new_objects[k] = [] 31 | 32 | for i in range(len(descs)): 33 | label = objects['labels'][i] 34 | score = objects['scores'][i] 35 | if label in target_labels and score > 0.5: 36 | new_descs.append(descs[i]) 37 | for k in objects.keys(): 38 | new_objects[k].append(objects[k][i]) 39 | 40 | if len(new_descs) < 1: 41 | return None, None 42 | 43 | new_descs = np.vstack(new_descs) 44 | for k in objects.keys(): 45 | new_objects[k] = np.vstack(new_objects[k]) 46 | 47 | return new_descs, new_objects 48 | 49 | def relocalization_offline(configs): 50 | 51 | # read configs 52 | save_dir = configs['save_dir'] 53 | data_root = configs['data_root'] 54 | net_output_dir = configs['net_output_dir'] 55 | dataset_name = configs['data']['name'] 56 | 57 | # data 58 | seqs = ['00', '05', '06'] 59 | similarity_thr = 0.88 60 | interval = 100 61 | pr_curves_list = [] 62 | datasets_results = {} 63 | for seq in seqs: 64 | # dataset 65 | dataset = KittiOdometry(data_root, seq) 66 | dis_thr = dataset.dis_thr 67 | angle_thr = dataset.angle_thr 68 | interval = dataset.interval 69 | 70 | seq_net_output_dir = os.path.join(net_output_dir, seq) 71 | descs_list = [] 72 | image_name_list = [] 73 | image_indexes = [] 74 | predict_loops = {} 75 | for data in dataset: 76 | image = data['image'] 77 | image_name = data['image_name'] 78 | gt = dataset.get_label(image_name) 79 | print(image_name) 80 | 81 | image_net_output_dir = os.path.join(seq_net_output_dir, image_name) 82 | # load points 83 | points = [] 84 | points_dir = os.path.join(image_net_output_dir, "points") 85 | if not os.path.exists(points_dir): 86 | continue 87 | points_file_names = os.listdir(points_dir) 88 | for points_file_name in points_file_names: 89 | p_path = os.path.join(points_dir, points_file_name) 90 | p = np.load(p_path, allow_pickle=True) 91 | points.append(p) 92 | 93 | # load descs 94 | descs_path = os.path.join(image_net_output_dir, "descs.npy") 95 | descs = np.load(descs_path) 96 | 97 | # load objects 98 | objects = {} 99 | objects_dir = os.path.join(image_net_output_dir, "objects") 100 | objects_file_names = os.listdir(objects_dir) 101 | for objects_file_name in objects_file_names: 102 | key = objects_file_name.split('.')[0] 103 | value_path = os.path.join(objects_dir, objects_file_name) 104 | objects[key] = np.load(value_path) 105 | 106 | 107 | target_labels = [3, 8] 108 | descs, objects = filter_objects(descs, objects, target_labels) 109 | if descs is None: 110 | continue 111 | 112 | if gt['index'] > interval and len(descs_list) > 0: 113 | # find loop 114 | scores = [] 115 | match_images = [] 116 | for descs_i, image_name_i, image_index_i in zip(descs_list, image_name_list, image_indexes): 117 | if gt['index'] - image_index_i < interval: 118 | break 119 | 120 | descs_similarity = descs.dot(descs_i.T) # m * n 121 | matches = descs_similarity > similarity_thr 122 | matches = matches * descs_similarity 123 | matches = np.max(matches, 1) 124 | # decide to match 125 | good_match = 0 126 | score = np.sum(matches) 127 | 128 | m, n = descs_similarity.shape 129 | num_diff = m - n 130 | num_diff = num_diff if num_diff > 0 else (-num_diff) 131 | score = score - num_diff * 0 132 | 133 | scores.append(score) 134 | match_images.append(image_name_i) 135 | 136 | predict_loops[image_name] = {'match_images': match_images, 'scores': scores} 137 | 138 | descs_list.append(descs) 139 | image_name_list.append(image_name) 140 | image_indexes.append(gt['index']) 141 | 142 | # find groundtruth 143 | loop_gt, num_loop_gt = dataset.get_loop_gt() 144 | 145 | # recall 146 | topk_k = [i for i in range(1, 21)] 147 | recalls = {} 148 | for k in topk_k: 149 | pred_loop = 0 150 | for image_name in loop_gt.keys(): 151 | if loop_gt[image_name]: 152 | if image_name not in predict_loops.keys(): 153 | continue 154 | 155 | predict_loop = predict_loops[image_name] 156 | scores, match_images = predict_loop['scores'], predict_loop['match_images'] 157 | scores = torch.tensor(scores) 158 | k_loop_images = [] 159 | k_loop_scores = [] 160 | if k > len(scores): 161 | k_loop_images = match_images 162 | else: 163 | value, _ = scores.topk(k) 164 | min_v = value[-1].item() 165 | indices = [] 166 | for i_score in range(len(scores)): 167 | if(scores[i_score] >= min_v): 168 | indices.append(i_score) 169 | # _, indices = scores.topk(k) 170 | # indices = indices.numpy().tolist() 171 | for idx in indices: 172 | k_loop_images.append(match_images[idx]) 173 | 174 | # if correct image in k_loop_images 175 | for k_image_name in k_loop_images: 176 | gt1 = dataset.get_label(image_name) 177 | gt2 = dataset.get_label(k_image_name) 178 | 179 | idx1, p1, R1 = gt1['index'], gt1['position'], gt1['rotation'] 180 | idx2, p2, R2 = gt2['index'], gt2['position'], gt2['rotation'] 181 | 182 | dp = np.linalg.norm((p1-p2)) 183 | dR = R1.dot(R2.T) 184 | dr, _ = cv2.Rodrigues(dR) 185 | d_angle = np.linalg.norm(dr) 186 | d_idx = idx2 - idx1 187 | d_idx = d_idx if d_idx > 0 else (-d_idx) 188 | if (d_idx > interval) and (dp < dis_thr) and (d_angle < angle_thr): 189 | pred_loop += 1 190 | break 191 | print("k = {}, pred_loop = {}, num_loop_gt = {}".format(k, pred_loop, num_loop_gt)) 192 | recall = float(pred_loop) / num_loop_gt if num_loop_gt > 0 else 1 193 | recalls[k] = recall 194 | datasets_results[seq] = recalls 195 | 196 | 197 | file_name = "kitti_odometry.yaml" 198 | file_path = os.path.join(save_dir, file_name) 199 | fp = open(file_path, 'w') 200 | fp.write(yaml.dump(datasets_results)) 201 | 202 | 203 | 204 | def main(): 205 | parser = argparse.ArgumentParser(description="show match") 206 | parser.add_argument( 207 | "-c", "--config_file", 208 | dest = "config_file", 209 | type = str, 210 | default = "" 211 | ) 212 | parser.add_argument( 213 | "-s", "--save_dir", 214 | dest = "save_dir", 215 | type = str, 216 | default = "" 217 | ) 218 | parser.add_argument( 219 | "-n", "--net_output_dir", 220 | dest = "net_output_dir", 221 | type = str, 222 | default = "" 223 | ) 224 | parser.add_argument( 225 | "-d", "--data_root", 226 | dest = "data_root", 227 | type = str, 228 | default = "" 229 | ) 230 | args = parser.parse_args() 231 | config_file = args.config_file 232 | f = open(config_file, 'r', encoding='utf-8') 233 | configs = f.read() 234 | configs = yaml.load(configs) 235 | configs['data_root'] = args.data_root 236 | configs['net_output_dir'] = args.net_output_dir 237 | configs['save_dir'] = args.save_dir 238 | 239 | relocalization_offline(configs) 240 | 241 | if __name__ == "__main__": 242 | main() 243 | -------------------------------------------------------------------------------- /experiments/place_recogination/online_relocalization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import os 8 | 9 | import torch 10 | from torchvision import transforms 11 | import yaml 12 | import cv2 13 | import numpy as np 14 | import argparse 15 | 16 | from model.build_model import build_maskrcnn, build_gcn, build_superpoint_model 17 | from datasets.utils.pipeline import makedir 18 | from datasets.kitti.kitti_odomery import KittiOdometry 19 | 20 | from experiments.object_tracking.object_tracking import update_normal_size, network_output, calculate_pr_curve, box_iou 21 | 22 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 23 | 24 | 25 | def relocalization(configs): 26 | 27 | # read configs 28 | save_dir = configs['save_dir'] 29 | data_root = configs['data_root'] 30 | model_dir = configs['model_dir'] 31 | dataset_name = configs['data']['name'] 32 | configs['num_gpu'] = [0] 33 | configs['public_model'] = 0 34 | 35 | superpoint_model_path = os.path.join(model_dir, "points_model.pth") 36 | maskrcnn_model_path = os.path.join(model_dir, "maskrcnn_model.pth") 37 | gcn_model_path = os.path.join(model_dir, "gcn_model.pth") 38 | configs["maskrcnn_model_path"] = maskrcnn_model_path 39 | configs["superpoint_model_path"] = superpoint_model_path 40 | configs["graph_model_path"] = gcn_model_path 41 | 42 | # model 43 | superpoint_model = build_superpoint_model(configs, requires_grad=False) 44 | superpoint_model.eval() 45 | 46 | maskrcnn_model = build_maskrcnn(configs) 47 | maskrcnn_model.eval() 48 | 49 | gcn_model = build_gcn(configs) 50 | gcn_model.eval() 51 | 52 | # data 53 | seqs = ['00', '05', '06'] 54 | 55 | pr_curves_list = [] 56 | for seq in seqs: 57 | dataset = KittiOdometry(data_root, seq) 58 | dis_thr = dataset.dis_thr 59 | angle_thr = dataset.angle_thr 60 | interval = dataset.interval 61 | 62 | image_size = dataset.image_size() 63 | configs['data']['normal_size'] = update_normal_size(image_size) 64 | 65 | seq_save_dir = os.path.join(save_dir, seq) 66 | makedir(seq_save_dir) 67 | for data in dataset: 68 | image = data['image'] 69 | image_name = data['image_name'] 70 | print(image_name) 71 | 72 | net_output = network_output(image, superpoint_model, maskrcnn_model, gcn_model, configs) 73 | net_output = {'points': net_output[0], 'objects': net_output[1], 'descs': net_output[2]} 74 | if net_output['points'] is None: 75 | continue 76 | 77 | image_save_dir = os.path.join(seq_save_dir, image_name) 78 | makedir(image_save_dir) 79 | 80 | # save points 81 | points_dir = os.path.join(image_save_dir, "points") 82 | makedir(points_dir) 83 | points = net_output['points'] 84 | for i in range(len(points)): 85 | p = points[i].cpu().numpy 86 | p_path = os.path.join(points_dir, (str(i) + ".npy")) 87 | np.save(p_path, p) 88 | 89 | # save descs 90 | descs_path = os.path.join(image_save_dir, "descs.npy") 91 | descs = net_output['descs'].cpu().numpy() 92 | np.save(descs_path, descs) 93 | 94 | # save objects 95 | objects = net_output['objects'] 96 | objects_dir = os.path.join(image_save_dir, "objects") 97 | makedir(objects_dir) 98 | for k in objects.keys(): 99 | value = objects[k].cpu().numpy() 100 | value_path = os.path.join(objects_dir, (k+".npy")) 101 | np.save(value_path, value) 102 | 103 | 104 | 105 | def main(): 106 | parser = argparse.ArgumentParser(description="show match") 107 | parser.add_argument( 108 | "-c", "--config_file", 109 | dest = "config_file", 110 | type = str, 111 | default = "" 112 | ) 113 | parser.add_argument( 114 | "-g", "--gpu", 115 | dest = "gpu", 116 | type = int, 117 | default = 0 118 | ) 119 | parser.add_argument( 120 | "-s", "--save_dir", 121 | dest = "save_dir", 122 | type = str, 123 | default = "" 124 | ) 125 | parser.add_argument( 126 | "-d", "--data_root", 127 | dest = "data_root", 128 | type = str, 129 | default = "" 130 | ) 131 | parser.add_argument( 132 | "-m", "--model_dir", 133 | dest = "model_dir", 134 | type = str, 135 | default = "" 136 | ) 137 | args = parser.parse_args() 138 | config_file = args.config_file 139 | f = open(config_file, 'r', encoding='utf-8') 140 | configs = f.read() 141 | configs = yaml.load(configs) 142 | configs['use_gpu'] = args.gpu 143 | configs['data_root'] = args.data_root 144 | configs['model_dir'] = args.model_dir 145 | configs['save_dir'] = args.save_dir 146 | 147 | relocalization(configs) 148 | 149 | if __name__ == "__main__": 150 | main() 151 | -------------------------------------------------------------------------------- /experiments/show_object_matching/draw_object.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | import os 7 | 8 | import torch 9 | from torchvision import transforms 10 | import yaml 11 | import cv2 12 | import numpy as np 13 | import argparse 14 | import copy 15 | 16 | from utils.tools import tensor_to_numpy 17 | from utils import cv2_util 18 | 19 | 20 | def compute_colors_for_labels(labels): 21 | """ 22 | Simple function that adds fixed colors depending on the class 23 | """ 24 | palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1]) 25 | colors = labels[:, None] * palette 26 | colors = (colors % 255).numpy().astype("uint8") 27 | return colors 28 | 29 | 30 | def overlay_boxes(image, boxes, colors): 31 | """ 32 | Adds the predicted boxes on top of the image 33 | 34 | Arguments: 35 | image (np.ndarray): an image as returned by OpenCV 36 | """ 37 | 38 | for box, color in zip(boxes, colors): 39 | box = box.to(torch.int64) 40 | top_left, bottom_right = box[:2].tolist(), box[2:].tolist() 41 | image = cv2.rectangle( 42 | image, tuple(top_left), tuple(bottom_right), tuple(color), 1 43 | ) 44 | 45 | return image 46 | 47 | 48 | def overlay_class_names(image, boxes, textes, colors): 49 | """ 50 | Adds detected class names and scores in the positions defined by the 51 | top-left corner of the predicted bounding box 52 | 53 | Arguments: 54 | image (np.ndarray): an image as returned by OpenCV 55 | """ 56 | 57 | for box, text, color in zip(boxes, textes, colors): 58 | x, y = (box[0] + box[2]) / 2 - 100, (box[1] + box[3]) / 2 59 | cv2.putText( 60 | image, text, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 0, 255), 2 61 | ) 62 | 63 | return image 64 | 65 | 66 | def overlay_mask(image, masks, colors): 67 | """ 68 | Adds the instances contours for each predicted object. 69 | Each label has a different color. 70 | 71 | Arguments: 72 | image (np.ndarray): an image as returned by OpenCV 73 | predictions (BoxList): the result of the computation by the model. 74 | It should contain the field `mask` and `labels`. 75 | """ 76 | for mask, color in zip(masks, colors): 77 | if len(mask.shape) == 3: 78 | mask = mask.squeeze(0) 79 | thresh = tensor_to_numpy(mask[None, :, :]) 80 | thresh = thresh[:, :, 0] 81 | contours, hierarchy = cv2_util.findContours( 82 | thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE 83 | ) 84 | image = cv2.drawContours(image, contours, -1, color, 4) 85 | 86 | composite = image 87 | 88 | return composite 89 | 90 | 91 | def draw_object(data, colors=None, match_idx_list=None): 92 | image = data['image'] 93 | points = data['points'] 94 | objects = data['objects'] 95 | labels = objects['labels'] 96 | boxes = objects['boxes'] 97 | masks = objects['masks'] 98 | if colors is None: 99 | colors = compute_colors_for_labels(labels).tolist() 100 | 101 | # image = overlay_boxes(image, boxes, colors) 102 | image = overlay_mask(image, masks, colors) 103 | 104 | textes = [] 105 | for idx in range(len(boxes)): 106 | if idx in match_idx_list: 107 | i = match_idx_list.index(idx) 108 | text = "object" + str(i+1) 109 | else: 110 | text = "no matching object" 111 | textes.append(text) 112 | image = overlay_class_names(image, boxes, textes, colors) 113 | 114 | H, W = image.shape[:2] 115 | 116 | for p, c in zip(points, colors): 117 | p = p.cpu().numpy() 118 | if len(p) == 0: 119 | continue 120 | for i in range(len(p)): 121 | x = round(p[i][1] * W + W/2) 122 | y = round(p[i][0] * H + H/2) 123 | if x < 0: 124 | continue 125 | cv2.circle(image, (x,y), 7, tuple(c), thickness=-1) 126 | 127 | return image, colors -------------------------------------------------------------------------------- /experiments/show_object_matching/show_object_matching.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import os 8 | 9 | import torch 10 | from torchvision import transforms 11 | import yaml 12 | import cv2 13 | import numpy as np 14 | import argparse 15 | import copy 16 | 17 | from model.inference import maskrcnn_inference 18 | from model.build_model import build_maskrcnn, build_gcn, build_superpoint_model 19 | from model.inference import detection_inference 20 | from datasets.utils.pipeline import makedir 21 | from kornia.feature import match_nn 22 | from datasets.utils import preprocess 23 | from experiments.show_object_matching.draw_object import draw_object, compute_colors_for_labels 24 | 25 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 26 | 27 | def filter_objects(data, target_labels=None): 28 | ''' 29 | data = {'image':image, 'points': output[0], 'objects': output[1], 30 | 'descs': output[2], 'keeps': output[3]} 31 | ''' 32 | points = data['points'] 33 | descs = data['descs'] 34 | 35 | keeps = data['keeps'] 36 | objects = data['objects'] 37 | labels = objects['labels'][keeps] 38 | boxes = objects['boxes'][keeps] 39 | masks = objects['masks'][keeps] 40 | 41 | if target_labels is not None: 42 | new_labels, new_boxes, new_masks = [], [], [] 43 | new_points, new_descs = [], [] 44 | for i in range(len(points)): 45 | if labels[i].item() in target_labels: 46 | new_labels.append(labels[i]) 47 | new_boxes.append(boxes[i]) 48 | new_masks.append(masks[i]) 49 | new_points.append(points[i]) 50 | new_descs.append(descs[i]) 51 | 52 | labels = torch.stack(new_labels) 53 | boxes = torch.stack(new_boxes) 54 | masks = torch.stack(new_masks) 55 | points = new_points 56 | descs = torch.stack(new_descs) 57 | 58 | objects['labels'] = labels 59 | objects['boxes'] = boxes 60 | objects['masks'] = masks 61 | data['objects'] = objects 62 | 63 | data['points'] = points 64 | data['descs'] = descs 65 | return data 66 | 67 | 68 | def draw_results(tpl_data, data, save_dir, image_name, match_thr=0.95): 69 | 70 | # get colors 71 | tpl_obj_num = len(tpl_data['objects']['boxes']) 72 | obj_num = len(data['objects']['boxes']) 73 | sum_num = tpl_obj_num + obj_num 74 | index = [(i+1)*30 for i in range(sum_num)] 75 | index = torch.tensor(index) 76 | sum_colors = compute_colors_for_labels(index).tolist() 77 | tpl_colors = sum_colors[:tpl_obj_num] 78 | colors = sum_colors[tpl_obj_num:sum_num] 79 | print(tpl_colors) 80 | 81 | # match 82 | tpl_descs = tpl_data['descs'] 83 | descs = data['descs'] 84 | dis, match = match_nn(tpl_descs, descs) 85 | dis, match = dis.cpu().squeeze(1).numpy(), match.cpu().numpy() 86 | print(dis) 87 | 88 | # update match colors 89 | match_idx_list1, match_idx_list2 = [], [] 90 | for i in range(match.shape[0]): 91 | if dis[i] > match_thr : 92 | continue 93 | idx1 = int(match[i, 0]) 94 | idx2 = int(match[i, 1]) 95 | colors[idx2] = tpl_colors[idx1] 96 | match_idx_list1.append(idx1) 97 | match_idx_list2.append(idx2) 98 | 99 | # draw object 100 | tpl_image, _ = draw_object(tpl_data, tpl_colors, match_idx_list1) 101 | image, _ = draw_object(data, colors, match_idx_list2) 102 | img = np.concatenate([tpl_image, image], 1) 103 | # img = np.concatenate([tpl_image, image], 0) 104 | 105 | # draw match 106 | tpl_boxes = tpl_data['objects']['boxes'] 107 | boxes = data['objects']['boxes'] 108 | for i in range(match.shape[0]): 109 | if dis[i] > match_thr : 110 | continue 111 | 112 | idx1 = int(match[i, 0]) 113 | idx2 = int(match[i, 1]) 114 | 115 | c = tpl_colors[idx1] 116 | 117 | tpl_box = tpl_boxes[idx1] 118 | x1 = (int)((tpl_box[0] + tpl_box[2]) / 2) 119 | y1 = (int)((tpl_box[1] + tpl_box[3]) / 2) 120 | cv2.circle(img, (x1, y1), 10, tuple(c), 2) 121 | 122 | box = boxes[idx2] 123 | x2 = (int)((box[0] + box[2]) / 2 + tpl_image.shape[-2]) 124 | y2 = (int)((box[1] + box[3]) / 2) 125 | # x2 = (int)((box[0] + box[2]) / 2) 126 | # y2 = (int)((box[1] + box[3]) / 2 + tpl_image.shape[0]) 127 | cv2.circle(img, (x2, y2), 10, tuple(c), 2) 128 | 129 | cv2.line(img, (x1, y1), (x2, y2), tuple(c), 2) 130 | 131 | save_path = os.path.join(save_dir, image_name) 132 | cv2.imwrite(save_path, img) 133 | 134 | 135 | def network_output(image, points_model, maskrcnn_model, gcn_model, configs, filter_labes=None): 136 | with torch.no_grad(): 137 | data_config = configs['data'] 138 | superpoint_model_config = configs['model']['superpoint'] 139 | detection_threshold = superpoint_model_config['detection_threshold'] 140 | use_gpu = configs['use_gpu'] 141 | 142 | transform = transforms.ToTensor() 143 | image = transform(image) 144 | image = image.unsqueeze(0) 145 | batch = {'image': image} 146 | 147 | points_output, detections, _ = detection_inference(maskrcnn_model, points_model, batch, use_gpu, 1, 148 | detection_threshold, data_config, save_dir=None) 149 | 150 | batch_points, batch_descs = preprocess.extract_points_clusters(points_output, list([detections[0]['masks']])) 151 | 152 | original_sizes = [list(img.shape[-2:]) for img in image] 153 | 154 | batch_points = preprocess.normalize_points(batch_points, original_sizes) 155 | 156 | batch_points = preprocess.batch_merge(batch_points) 157 | batch_descs = preprocess.batch_merge(batch_descs) 158 | 159 | keeps = preprocess.select_good_clusters(batch_points) 160 | 161 | good_points, good_descs = [], [] 162 | for i in range(len(keeps)): 163 | if keeps[i].item(): 164 | good_points.append(batch_points[i]) 165 | good_descs.append(batch_descs[i]) 166 | 167 | batch_object_descs, _ = gcn_model(good_points, good_descs) 168 | 169 | return good_points, detections[0], batch_object_descs, keeps 170 | 171 | 172 | def read_image(img_path): 173 | image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 174 | if len(image.shape) == 2: 175 | image = cv2.merge([image, image, image]) 176 | return image 177 | 178 | def show_object_matching(configs): 179 | 180 | # read configs 181 | save_dir = configs['save_dir'] 182 | data_root = configs['data_root'] 183 | model_dir = configs['model_dir'] 184 | use_gpu = configs['use_gpu'] 185 | superpoint_model_config = configs['model']['superpoint'] 186 | detection_thr = superpoint_model_config['detection_threshold'] 187 | configs['num_gpu'] = [0] 188 | configs['public_model'] = 0 189 | 190 | superpoint_model_path = os.path.join(model_dir, "points_model.pth") 191 | maskrcnn_model_path = os.path.join(model_dir, "maskrcnn_model.pth") 192 | gcn_model_path = os.path.join(model_dir, "gcn_model.pth") 193 | configs["maskrcnn_model_path"] = maskrcnn_model_path 194 | configs["superpoint_model_path"] = superpoint_model_path 195 | configs["graph_model_path"] = gcn_model_path 196 | 197 | # model 198 | superpoint_model = build_superpoint_model(configs, requires_grad=False) 199 | superpoint_model.eval() 200 | 201 | maskrcnn_model = build_maskrcnn(configs) 202 | maskrcnn_model.eval() 203 | 204 | gcn_model = build_gcn(configs) 205 | gcn_model.eval() 206 | 207 | # template image 208 | tpl_path = os.path.join(data_root, "template.jpg") 209 | tpl_image = read_image(tpl_path) 210 | tpl_output = network_output(tpl_image, superpoint_model, maskrcnn_model, gcn_model, configs) 211 | tpl_data = {'image':tpl_image, 'points': tpl_output[0], 'objects': tpl_output[1], 212 | 'descs': tpl_output[2], 'keeps': tpl_output[3]} 213 | 214 | 215 | # filter data 216 | target_labels = [40] 217 | # tpl_data = filter_objects(tpl_data, target_labels) 218 | tpl_labels = tpl_data['objects']['labels'] 219 | print(tpl_labels) 220 | 221 | seq_path = os.path.join(data_root, "seq") 222 | image_names = os.listdir(seq_path) 223 | image_names.sort() 224 | with torch.no_grad(): 225 | for image_name in image_names: 226 | image_path = tpl_path = os.path.join(seq_path, image_name) 227 | image = read_image(image_path) 228 | output = network_output(image, superpoint_model, maskrcnn_model, gcn_model, configs) 229 | data = {'image':image, 'points': output[0], 'objects': output[1], 230 | 'descs': output[2], 'keeps': output[3]} 231 | 232 | # data = filter_objects(data, target_labels) 233 | labels = data['objects']['labels'] 234 | print(labels) 235 | draw_results(tpl_data, data, save_dir, image_name) 236 | 237 | 238 | def main(): 239 | parser = argparse.ArgumentParser(description="show match") 240 | parser.add_argument( 241 | "-c", "--config_file", 242 | dest = "config_file", 243 | type = str, 244 | default = "" 245 | ) 246 | parser.add_argument( 247 | "-g", "--gpu", 248 | dest = "gpu", 249 | type = int, 250 | default = 0 251 | ) 252 | parser.add_argument( 253 | "-s", "--save_dir", 254 | dest = "save_dir", 255 | type = str, 256 | default = "" 257 | ) 258 | parser.add_argument( 259 | "-d", "--data_root", 260 | dest = "data_root", 261 | type = str, 262 | default = "" 263 | ) 264 | parser.add_argument( 265 | "-m", "--model_dir", 266 | dest = "model_dir", 267 | type = str, 268 | default = "" 269 | ) 270 | args = parser.parse_args() 271 | config_file = args.config_file 272 | f = open(config_file, 'r', encoding='utf-8') 273 | configs = f.read() 274 | configs = yaml.load(configs) 275 | configs['use_gpu'] = args.gpu 276 | configs['data_root'] = args.data_root 277 | configs['model_dir'] = args.model_dir 278 | configs['save_dir'] = args.save_dir 279 | 280 | show_object_matching(configs) 281 | 282 | if __name__ == "__main__": 283 | main() 284 | -------------------------------------------------------------------------------- /experiments/utils/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | import os 7 | import yaml 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | 11 | 12 | def plot_pr_curves(pr_curves, dataset_name, save_dir): 13 | ''' 14 | plot pr curves 15 | input: 16 | pr_curves: Dict[interval: pr_curve] 17 | dataset_name: "dataset" + "seq" 18 | save_dir: save directory 19 | ''' 20 | 21 | plt.title(dataset_name) 22 | colors = ['green', 'red', 'blue', 'yellow', 'darkviolet', 'sandybrown'] 23 | for k, c in zip(pr_curves.keys(), colors): 24 | pr_curve = pr_curves[k] 25 | xs, ys = [], [] 26 | for pr in pr_curve: 27 | xs.append(pr[0]) 28 | ys.append(pr[1]) 29 | 30 | plt.plot(xs, ys, color=c, label=str(k)) 31 | 32 | plt.legend() 33 | plt.xlabel('precision') 34 | plt.ylabel('recall') 35 | 36 | image_name = dataset_name + ".jpg" 37 | save_path = os.path.join(save_dir, image_name) 38 | plt.savefig(save_path) 39 | 40 | 41 | def get_pr_curve_area(pr_curve): 42 | ''' 43 | pr_curve: [[p0, r0], [p1, r1]... [pn, rn]], thr: small->big, precision: small->big, recall: big->small 44 | ''' 45 | area = 0.0 46 | for i in range(1, len(pr_curve)): 47 | p0, r0 = pr_curve[i-1] 48 | p1, r1 = pr_curve[i] 49 | 50 | area = area + (r0 - r1) * (p1 + p0) / 2 51 | 52 | return area 53 | 54 | 55 | def plot_tracking_details(results_list, save_dir, name=None, configs=None): 56 | if config is not None: 57 | title = configs['title'] 58 | colors = configs['colors'] 59 | linewidth = configs['linewidth'] 60 | xlabel = configs['xlabel'] 61 | ylabel = configs['ylabel'] 62 | fontsize = configs['fontsize'] 63 | figsize = configs['figsize'] 64 | dpi = configs['dpi'] 65 | else: 66 | title = results_list[0]['dataset'] if name is None else name 67 | colors = ['green', 'red', 'blue', 'yellow', 'darkviolet', 'sandybrown'] 68 | linewidth = 3 69 | xlabel = "recall" 70 | ylabel = "precision" 71 | fontsize = 20 72 | figsize = (10, 10) 73 | dpi = 100 74 | 75 | plt.title(title) 76 | plt.xticks(fontsize=fontsize) 77 | plt.yticks(fontsize=fontsize) 78 | 79 | for i in range(len(results_list)): 80 | pr_curves = results_list[i]['pr_curves'] 81 | areas = results_list[i]['areas'] 82 | for k in pr_curves.keys(): 83 | pr_curve = pr_curves[k] 84 | xs, ys = [], [] 85 | for pr in pr_curve: 86 | xs.append(pr[1]) # recall 87 | ys.append(pr[0]) # precision 88 | 89 | area = round(areas[k], 4) 90 | 91 | label = "[{}] k = {}, {}".format(area, k, results_list[i]['model']) 92 | linestyle = '-' if i==0 else '--' 93 | plt.plot(xs, ys, color=colors[k], label=label, linewidth=linewidth, linestyle=linestyle) 94 | 95 | plt.legend(fontsize=fontsize) 96 | plt.grid() 97 | plt.xlabel(xlabel, fontsize=fontsize) 98 | plt.ylabel(ylabel, fontsize=fontsize) 99 | 100 | image_name = title + ".jpg" 101 | save_path = os.path.join(save_dir, image_name) 102 | plt.savefig(save_path) 103 | 104 | 105 | def save_tracking_results(results, save_dir): 106 | ''' 107 | saving tracking experiment results 108 | 109 | results: 110 | dataset: * 111 | model: * 112 | pr_curves: 113 | interval_0: [[p00, r00], [p01, r01]... [p0n, r0n]] 114 | interval_1: [[p10, r10], [p11, r11]... [p1n, r1n]] 115 | ... 116 | interval_m: [[pm0, rm0], [pm1, rm1]... [pmn, rmn]] 117 | ''' 118 | file_name = results['dataset'] + "_" + results['model'] + ".yaml" 119 | file_path = os.path.join(save_dir, file_name) 120 | fp = open(file_path, 'w') 121 | fp.write(yaml.dump(results)) 122 | 123 | 124 | def read_tracking_results(file_path): 125 | f = open(config_file, 'r', encoding='utf-8') 126 | results = f.read() 127 | f.close() 128 | return results -------------------------------------------------------------------------------- /model/backbone/fcn.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optim 6 | from torchvision import models 7 | from torchvision.models.vgg import VGG 8 | 9 | class FCNs(nn.Module): 10 | 11 | def __init__(self, pretrained_net): 12 | super(FCNs,self).__init__() 13 | self.pretrained_net = pretrained_net 14 | self.relu = nn.ReLU(inplace=True) 15 | self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) 16 | self.bn1 = nn.BatchNorm2d(512) 17 | self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) 18 | self.bn2 = nn.BatchNorm2d(256) 19 | self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) 20 | self.bn3 = nn.BatchNorm2d(128) 21 | self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) 22 | self.bn4 = nn.BatchNorm2d(64) 23 | self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) 24 | self.bn5 = nn.BatchNorm2d(32) 25 | self.output_dim = 32 26 | # self.classifier = nn.Conv2d(32, n_class, kernel_size=1) 27 | 28 | def forward(self, x): 29 | output = self.pretrained_net(x) 30 | x5 = output['x5'] # size=(N, 512, x.H/32, x.W/32) 31 | x4 = output['x4'] # size=(N, 512, x.H/16, x.W/16) 32 | x3 = output['x3'] # size=(N, 256, x.H/8, x.W/8) 33 | x2 = output['x2'] # size=(N, 128, x.H/4, x.W/4) 34 | x1 = output['x1'] # size=(N, 64, x.H/2, x.W/2) 35 | 36 | score = self.bn1(self.relu(self.deconv1(x5))) # size=(N, 512, x.H/16, x.W/16) 37 | score = score + x4 # element-wise add, size=(N, 512, x.H/16, x.W/16) 38 | score = self.bn2(self.relu(self.deconv2(score))) # size=(N, 256, x.H/8, x.W/8) 39 | score = score + x3 # element-wise add, size=(N, 256, x.H/8, x.W/8) 40 | score = self.bn3(self.relu(self.deconv3(score))) # size=(N, 128, x.H/4, x.W/4) 41 | score = score + x2 # element-wise add, size=(N, 128, x.H/4, x.W/4) 42 | score = self.bn4(self.relu(self.deconv4(score))) # size=(N, 64, x.H/2, x.W/2) 43 | score = score + x1 # element-wise add, size=(N, 64, x.H/2, x.W/2) 44 | score = self.bn5(self.relu(self.deconv5(score))) # size=(N, 32, x.H, x.W) 45 | # score = self.classifier(score) # size=(N, n_class, x.H/1, x.W/1) 46 | 47 | return score # size=(N, n_class, x.H/1, x.W/1) 48 | 49 | 50 | class VGGNet(VGG): 51 | def __init__(self, pretrained=True, model='vgg16', requires_grad=True, remove_fc=True, show_params=False, input_channel=3): 52 | super(VGGNet,self).__init__(make_layers(cfg[model], input_channel=input_channel)) 53 | self.ranges = ranges[model] 54 | 55 | if pretrained: 56 | exec("self.load_state_dict(models.%s(pretrained=True).state_dict())" % model) 57 | 58 | if not requires_grad: 59 | for param in super().parameters(): 60 | param.requires_grad = False 61 | 62 | if remove_fc: # delete redundant fully-connected layer params, can save memory 63 | del self.classifier 64 | 65 | if show_params: 66 | for name, param in self.named_parameters(): 67 | print(name, param.size()) 68 | 69 | def forward(self, x): 70 | output = {} 71 | 72 | # get the output of each maxpooling layer (5 maxpool in VGG net) 73 | for idx in range(len(self.ranges)): 74 | for layer in range(self.ranges[idx][0], self.ranges[idx][1]): 75 | x = self.features[layer](x) 76 | output["x%d"%(idx+1)] = x 77 | 78 | return output 79 | 80 | 81 | ranges = { 82 | 'vgg11': ((0, 3), (3, 6), (6, 11), (11, 16), (16, 21)), 83 | 'vgg13': ((0, 5), (5, 10), (10, 15), (15, 20), (20, 25)), 84 | 'vgg16': ((0, 5), (5, 10), (10, 17), (17, 24), (24, 31)), 85 | 'vgg19': ((0, 5), (5, 10), (10, 19), (19, 28), (28, 37)) 86 | } 87 | 88 | # cropped version from https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py 89 | cfg = { 90 | 'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 91 | 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 92 | 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 93 | 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 94 | } 95 | 96 | def make_layers(cfg, batch_norm=False, input_channel=3): 97 | layers = [] 98 | in_channels = input_channel 99 | for v in cfg: 100 | if v == 'M': 101 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 102 | else: 103 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 104 | if batch_norm: 105 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 106 | else: 107 | layers += [conv2d, nn.ReLU(inplace=True)] 108 | in_channels = v 109 | return nn.Sequential(*layers) 110 | 111 | 112 | if __name__ == "__main__": 113 | batch_size, n_class, h, w = 10, 20, 160, 160 114 | 115 | # test output size 116 | vgg_model = VGGNet(requires_grad=True) 117 | input = torch.autograd.Variable(torch.randn(batch_size, 3, 224, 224)) 118 | output = vgg_model(input) 119 | assert output['x5'].size() == torch.Size([batch_size, 512, 7, 7]) 120 | 121 | fcn_model = FCNs(pretrained_net=vgg_model, n_class=n_class) 122 | input = torch.autograd.Variable(torch.randn(batch_size, 3, h, w)) 123 | output = fcn_model(input) 124 | assert output.size() == torch.Size([batch_size, n_class, h, w]) 125 | 126 | print("Pass size check") 127 | 128 | # test a random batch, loss should decrease 129 | fcn_model = FCNs(pretrained_net=vgg_model, n_class=n_class) 130 | criterion = nn.BCELoss() 131 | optimizer = optim.SGD(fcn_model.parameters(), lr=1e-3, momentum=0.9) 132 | input = torch.autograd.Variable(torch.randn(batch_size, 3, h, w)) 133 | y = torch.autograd.Variable(torch.randn(batch_size, n_class, h, w), requires_grad=False) 134 | for iter in range(10): 135 | optimizer.zero_grad() 136 | output = fcn_model(input) 137 | output = nn.functional.sigmoid(output) 138 | loss = criterion(output, y) 139 | loss.backward() 140 | print("iter{}, loss {}".format(iter, loss.data[0])) 141 | optimizer.step() -------------------------------------------------------------------------------- /model/backbone/resnet_fpn.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from collections import OrderedDict 3 | from torch import nn 4 | from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool 5 | from torchvision.ops import misc as misc_nn_ops 6 | from torchvision.models._utils import IntermediateLayerGetter 7 | from torchvision.models import resnet 8 | 9 | 10 | class BackboneWithFPN(nn.Module): 11 | """ 12 | Adds a FPN on top of a model. 13 | Internally, it uses torchvision.models._utils.IntermediateLayerGetter to 14 | extract a submodel that returns the feature maps specified in return_layers. 15 | The same limitations of IntermediatLayerGetter apply here. 16 | Args: 17 | backbone (nn.Module) 18 | return_layers (Dict[name, new_name]): a dict containing the names 19 | of the modules for which the activations will be returned as 20 | the key of the dict, and the value of the dict is the name 21 | of the returned activation (which the user can specify). 22 | in_channels_list (List[int]): number of channels for each feature map 23 | that is returned, in the order they are present in the OrderedDict 24 | out_channels (int): number of channels in the FPN. 25 | Attributes: 26 | out_channels (int): the number of channels in the FPN 27 | """ 28 | def __init__(self, backbone, return_layers, in_channels_list, out_channels, extra_blocks=None): 29 | super(BackboneWithFPN, self).__init__() 30 | 31 | if extra_blocks is None: 32 | extra_blocks = LastLevelMaxPool() 33 | 34 | self.body = IntermediateLayerGetter(backbone, return_layers=return_layers) 35 | self.fpn = FeaturePyramidNetwork( 36 | in_channels_list=in_channels_list, 37 | out_channels=out_channels, 38 | extra_blocks=extra_blocks, 39 | ) 40 | self.in_channels_list = in_channels_list 41 | self.out_channels = out_channels 42 | 43 | def forward(self, x): 44 | resnet_x = self.body(x) 45 | fpn_x = self.fpn(resnet_x) 46 | return fpn_x, resnet_x 47 | 48 | 49 | def resnet_fpn_backbone( 50 | backbone_name, 51 | pretrained, 52 | norm_layer=misc_nn_ops.FrozenBatchNorm2d, 53 | trainable_layers=3, 54 | returned_layers=None, 55 | extra_blocks=None 56 | ): 57 | """ 58 | Constructs a specified ResNet backbone with FPN on top. Freezes the specified number of layers in the backbone. 59 | 60 | Examples:: 61 | 62 | >>> from torchvision.models.detection.backbone_utils import resnet_fpn_backbone 63 | >>> backbone = resnet_fpn_backbone('resnet50', pretrained=True, trainable_layers=3) 64 | >>> # get some dummy image 65 | >>> x = torch.rand(1,3,64,64) 66 | >>> # compute the output 67 | >>> output = backbone(x) 68 | >>> print([(k, v.shape) for k, v in output.items()]) 69 | >>> # returns 70 | >>> [('0', torch.Size([1, 256, 16, 16])), 71 | >>> ('1', torch.Size([1, 256, 8, 8])), 72 | >>> ('2', torch.Size([1, 256, 4, 4])), 73 | >>> ('3', torch.Size([1, 256, 2, 2])), 74 | >>> ('pool', torch.Size([1, 256, 1, 1]))] 75 | 76 | Args: 77 | backbone_name (string): resnet architecture. Possible values are 'ResNet', 'resnet18', 'resnet34', 'resnet50', 78 | 'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2' 79 | norm_layer (torchvision.ops): it is recommended to use the default value. For details visit: 80 | (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267) 81 | pretrained (bool): If True, returns a model with backbone pre-trained on Imagenet 82 | trainable_layers (int): number of trainable (not frozen) resnet layers starting from final block. 83 | Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. 84 | """ 85 | backbone = resnet.__dict__[backbone_name]( 86 | pretrained=pretrained, 87 | norm_layer=norm_layer) 88 | 89 | # select layers that wont be frozen 90 | assert trainable_layers <= 5 and trainable_layers >= 0 91 | layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers] 92 | # freeze layers only if pretrained backbone is used 93 | for name, parameter in backbone.named_parameters(): 94 | if all([not name.startswith(layer) for layer in layers_to_train]): 95 | parameter.requires_grad_(False) 96 | 97 | if extra_blocks is None: 98 | extra_blocks = LastLevelMaxPool() 99 | 100 | if returned_layers is None: 101 | returned_layers = [1, 2, 3, 4] 102 | assert min(returned_layers) > 0 and max(returned_layers) < 5 103 | return_layers = {f'layer{k}': str(v) for v, k in enumerate(returned_layers)} 104 | 105 | in_channels_stage2 = backbone.inplanes // 8 106 | in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers] 107 | out_channels = 256 108 | return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks) 109 | 110 | 111 | def _validate_resnet_trainable_layers(pretrained, trainable_backbone_layers): 112 | # dont freeze any layers if pretrained model or backbone is not used 113 | if not pretrained: 114 | if trainable_backbone_layers is not None: 115 | warnings.warn( 116 | "Changing trainable_backbone_layers has not effect if " 117 | "neither pretrained nor pretrained_backbone have been set to True, " 118 | "falling back to trainable_backbone_layers=5 so that all layers are trainable") 119 | trainable_backbone_layers = 5 120 | # by default, freeze first 2 blocks following Faster R-CNN 121 | if trainable_backbone_layers is None: 122 | trainable_backbone_layers = 3 123 | assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0 124 | return trainable_backbone_layers 125 | -------------------------------------------------------------------------------- /model/build_model.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | import os 4 | import time 5 | import argparse 6 | import yaml 7 | import copy 8 | 9 | import torch 10 | import torch.distributed as dist 11 | 12 | import torch.optim as optim 13 | from torch.autograd import Variable 14 | from torch.optim import lr_scheduler 15 | # from torchvision.models.detection.backbone_utils import resnet_fpn_backbone 16 | 17 | from model.backbone.resnet_fpn import resnet_fpn_backbone 18 | from model.backbone.fcn import VGGNet 19 | from model.mask_rcnn.mask_rcnn import MaskRCNN 20 | from model.superpoint.vgg_like import VggLike 21 | from model.superpoint.superpoint_public_model import SuperPointNet 22 | from model.graph_models.object_descriptor import ObjectDescriptor 23 | 24 | def build_maskrcnn(configs): 25 | ## command line config 26 | num_gpu = configs['num_gpu'] 27 | use_gpu = (len(num_gpu) > 0) and configs['use_gpu'] 28 | pretrained_model_path = configs['maskrcnn_model_path'] 29 | public_model = configs['public_model'] 30 | ## data cofig 31 | nclass = configs['data']['nclass'] 32 | ## mask_rcnn config 33 | maskrcnn_model_config = configs['model']['maskrcnn'] 34 | backbone_type = maskrcnn_model_config['backbone_type'] 35 | image_mean = maskrcnn_model_config['image_mean'] 36 | image_std = maskrcnn_model_config['image_std'] 37 | trainable_layers = maskrcnn_model_config['trainable_layers'] 38 | 39 | # model 40 | # backbone = ResNetFPN(pretrained_type) 41 | backbone = resnet_fpn_backbone(backbone_type, False, trainable_layers=trainable_layers) 42 | model = MaskRCNN(backbone, nclass, image_mean=image_mean, image_std=image_std) 43 | 44 | if pretrained_model_path != "" and public_model: 45 | model_dict = model.state_dict() 46 | pretrained_dict = torch.load(pretrained_model_path) 47 | remove_dict = ['roi_heads.box_predictor.cls_score.weight', 48 | 'roi_heads.box_predictor.cls_score.bias', 49 | 'roi_heads.box_predictor.bbox_pred.weight', 50 | 'roi_heads.box_predictor.bbox_pred.bias', 51 | 'roi_heads.mask_predictor.mask_fcn_logits.weight', 52 | 'roi_heads.mask_predictor.mask_fcn_logits.bias'] 53 | pretrained_dict = {k:v for k, v in pretrained_dict.items() if ((k in model_dict) and (k not in remove_dict))} 54 | model_dict.update(pretrained_dict) 55 | model.load_state_dict(model_dict) 56 | print("load model from {}".format(pretrained_model_path)) 57 | print("load parameters : {}".format(pretrained_dict.keys())) 58 | 59 | 60 | if use_gpu: 61 | model = model.cuda() 62 | model = torch.nn.DataParallel(model, device_ids=num_gpu) 63 | print("Finish cuda loading") 64 | 65 | if pretrained_model_path != "" and (not public_model): 66 | model_dict = model.state_dict() 67 | pretrained_dict = torch.load(pretrained_model_path) 68 | model_dict.update(pretrained_dict) 69 | model.load_state_dict(model_dict) 70 | print("load model from {}".format(pretrained_model_path)) 71 | 72 | return model 73 | 74 | def build_superpoint_model(configs, requires_grad=True): 75 | ## command line config 76 | num_gpu = configs['num_gpu'] 77 | use_gpu = (len(num_gpu) > 0) and configs['use_gpu'] 78 | pretrained_model_path = configs['superpoint_model_path'] 79 | 80 | vgg_model = VGGNet(requires_grad=requires_grad) 81 | model = VggLike(vgg_model) 82 | 83 | # model = SuperPointNet() 84 | # if pretrained_model_path != "": 85 | # model_dict = model.state_dict() 86 | # pretrained_dict = torch.load(pretrained_model_path) 87 | # model_dict.update(pretrained_dict) 88 | # model.load_state_dict(model_dict) 89 | # print("load model from {}".format(pretrained_model_path)) 90 | 91 | 92 | if use_gpu: 93 | model = model.cuda() 94 | model = torch.nn.DataParallel(model, device_ids=num_gpu) 95 | print("Finish cuda loading") 96 | 97 | if pretrained_model_path != "": 98 | model_dict = model.state_dict() 99 | pretrained_dict = torch.load(pretrained_model_path) 100 | model_dict.update(pretrained_dict) 101 | model.load_state_dict(model_dict) 102 | print("load model from {}".format(pretrained_model_path)) 103 | 104 | return model 105 | 106 | def build_gcn(configs): 107 | num_gpu = configs['num_gpu'] 108 | use_gpu = (len(num_gpu) > 0) 109 | gcn_config = configs['model']['gcn'] 110 | pretrained_model_path = configs['graph_model_path'] 111 | 112 | model = ObjectDescriptor(gcn_config) 113 | 114 | if use_gpu: 115 | model = model.cuda() 116 | model = torch.nn.DataParallel(model, device_ids=num_gpu) 117 | print("Finish cuda loading") 118 | 119 | if pretrained_model_path != "": 120 | if use_gpu: 121 | model.load_state_dict(torch.load(pretrained_model_path)) 122 | else: 123 | model.load_state_dict(torch.load(pretrained_model_path, map_location=torch.device('cpu'))) 124 | print("load model from {}".format(pretrained_model_path)) 125 | 126 | return model -------------------------------------------------------------------------------- /model/graph_models/attention.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import math 5 | import torch 6 | import torch.nn as nn 7 | 8 | class GraphAtten(nn.Module): 9 | def __init__(self, nfeat, nhid, nout, alpha=0.2, nheads=8): 10 | super(GraphAtten, self).__init__() 11 | self.attns = [Attention(nfeat, nhid, alpha) for _ in range(nheads)] 12 | for i, attention in enumerate(self.attns): 13 | self.add_module('attention_{}'.format(i), attention) 14 | 15 | self.relu = nn.ReLU() 16 | 17 | self.merge = nn.Linear(nheads*nhid, nhid) 18 | 19 | self.mlp1 = nn.Linear((nfeat+nhid), (nfeat+nhid)) 20 | self.bn1 = nn.BatchNorm1d((nfeat+nhid)) 21 | self.mlp2 = nn.Linear((nfeat+nhid), nout) 22 | self.bn2 = nn.BatchNorm1d(nout) 23 | 24 | def print_para(self, layer): 25 | model_dict = self.state_dict() 26 | para = model_dict[layer] 27 | print("layer = {}".format(para)) 28 | 29 | def forward(self, x): 30 | m = torch.cat([attn(x) for attn in self.attns], dim=1) 31 | m = self.relu(self.merge(m)) 32 | x = torch.cat([x, m], 1) 33 | x = self.relu(self.bn1(self.mlp1(x))) 34 | x = self.relu(self.bn2(self.mlp2(x))) 35 | return x 36 | 37 | 38 | class Attention(nn.Module): 39 | def __init__(self, in_features, out_features, alpha): 40 | super(Attention, self).__init__() 41 | self.tranq = nn.Linear(in_features, out_features) 42 | self.trank = nn.Linear(in_features, out_features) 43 | self.tranv = nn.Linear(in_features, out_features) 44 | self.norm = nn.Sequential(nn.Softmax(dim=1)) 45 | self.leakyrelu = nn.LeakyReLU(alpha) 46 | self.relu = nn.ReLU() 47 | 48 | def forward(self, x): 49 | q = self.relu(self.tranq(x)) # n * dim 50 | k = self.relu(self.trank(x)) # n * dim 51 | v = self.relu(self.tranv(x)) 52 | 53 | adj = torch.einsum('nd,dm->nm', q, k.t()) # n * n 54 | adj = self.leakyrelu(adj) 55 | adj = self.norm(adj) 56 | 57 | m = adj @ v 58 | return m -------------------------------------------------------------------------------- /model/graph_models/descriptor_loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | 12 | class DescriptorLoss(nn.Module): 13 | ''' 14 | loss for object descriptor 15 | ''' 16 | def __init__(self, config): 17 | super().__init__() 18 | self.config = config 19 | 20 | def forward(self, descs, conns): 21 | ''' 22 | descs: N * D 23 | conns: N * N 24 | ''' 25 | similarity = torch.einsum('nd,dm->nm', descs, descs.t()) # N * N 26 | 27 | print(similarity) 28 | 29 | pos_idx = conns 30 | pos_similarity = similarity * pos_idx 31 | 32 | neg_idx0 = torch.ones_like(conns) - conns 33 | neg_idx0 = neg_idx0 - torch.eye(len(conns), device=conns.device, dtype=conns.dtype) 34 | neg_similarity0 = similarity * neg_idx0 35 | value, index = neg_similarity0.topk(1, largest=True) 36 | value = value.repeat(1, similarity.shape[1]) 37 | neg_idx1 = (neg_similarity0 == value).float() 38 | 39 | zero = torch.tensor(0.0, dtype=similarity.dtype, device=similarity.device) 40 | positive_dist = torch.max(zero, self.config['train']['positive_margin'] - similarity) 41 | negative_dist = torch.max(zero, similarity - self.config['train']['negative_margin']) 42 | 43 | ploss = torch.sum(pos_idx * positive_dist) / torch.sum(pos_idx) 44 | nloss = torch.sum(neg_idx1 * negative_dist) / torch.sum(neg_idx1) 45 | 46 | return ploss, nloss -------------------------------------------------------------------------------- /model/graph_models/object_descriptor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import math 5 | import torch 6 | import torch.nn as nn 7 | 8 | from model.graph_models.attention import GraphAtten 9 | 10 | class ObjectDescriptor(nn.Module): 11 | def __init__(self, config): 12 | super(ObjectDescriptor, self).__init__() 13 | points_encoder_dims = config['points_encoder_dims'] 14 | descriptor_dim = config['descriptor_dim'] 15 | nhid = config['hidden_dim'] 16 | alpha = config['alpha'] 17 | nheads = config['nheads'] 18 | nout = config['nout'] 19 | nfeat = descriptor_dim + points_encoder_dims[-1] 20 | self.points_encoder = PointsEncoder(points_encoder_dims) 21 | self.gcn = GCN(nfeat, nhid, nout, alpha, nheads) 22 | 23 | def forward(self, batch_points, batch_descs): 24 | ''' 25 | inputs: 26 | batch_points: List[Tensor], normalized points, each tensor belong to a object 27 | batch_descs: List[Tensor] 28 | ''' 29 | batch_features, locations = [], [] 30 | for points, descs in zip(batch_points, batch_descs): 31 | encoded_points = self.points_encoder(points) 32 | features = torch.cat((descs, encoded_points), dim=1) 33 | features, w = self.gcn(features) 34 | batch_features.append(features) 35 | locations.append(w) 36 | batch_features = torch.stack(batch_features) 37 | batch_features = nn.functional.normalize(batch_features, p=2, dim=-1) 38 | locations = torch.cat(locations, 0) 39 | return batch_features, locations 40 | 41 | 42 | class PointsEncoder(nn.Module): 43 | def __init__(self, dims): 44 | super(PointsEncoder, self).__init__() 45 | layers = [] 46 | for i in range(len(dims)-1): 47 | layers.append(nn.Linear(dims[i], dims[i+1])) 48 | if i != len(dims)-2: 49 | layers.append(nn.BatchNorm1d((dims[i+1]))) 50 | layers.append(nn.ReLU()) 51 | 52 | self.layers = layers 53 | for i, layer in enumerate(self.layers): 54 | self.add_module('point_encoder{}'.format(i), layer) 55 | 56 | def forward(self, x): 57 | for layer in self.layers: 58 | x = layer(x) 59 | x = nn.functional.normalize(x, p=2, dim=-1) 60 | return x 61 | 62 | 63 | class GCN(nn.Module): 64 | def __init__(self, nfeat, nhid, nout, alpha=0.2, nheads=8): 65 | super(GCN, self).__init__() 66 | 67 | self.atten1 = GraphAtten(nfeat, nhid, nfeat, alpha, nheads) 68 | self.atten2 = GraphAtten(nfeat, nhid, nfeat, alpha, nheads) 69 | self.tran1 = nn.Linear(nfeat, nfeat) 70 | self.relu = nn.ReLU() 71 | self.sparsification = Sparsification(nfeat, nout) 72 | 73 | def forward(self, x): 74 | x = self.atten1(x) 75 | x = self.atten2(x) 76 | x = self.relu(self.tran1(x)) 77 | x, w = self.sparsification(x) 78 | 79 | return x, w 80 | 81 | 82 | class Sparsification(nn.Module): 83 | def __init__(self, input_dim, output_dim): 84 | super(Sparsification, self).__init__() 85 | 86 | self.relu = nn.ReLU() 87 | self.softmax = nn.Softmax(dim=-1) 88 | self.location_encoder1 = nn.Linear(input_dim, input_dim) 89 | self.location_encoder2 = nn.Linear(input_dim, output_dim) 90 | 91 | self.feature_encoder1 = nn.Linear(input_dim, input_dim) 92 | self.feature_encoder2 = nn.Linear(input_dim, output_dim) 93 | self.feature_encoder3 = nn.Linear(output_dim, output_dim) 94 | 95 | 96 | def forward(self, x): 97 | 98 | 99 | descriptor = self.relu(self.feature_encoder1(x)) 100 | descriptor = self.relu(self.feature_encoder2(descriptor)) 101 | 102 | locations = self.relu(self.location_encoder1(x)) 103 | locations = self.relu(self.location_encoder2(locations)) 104 | norm_locations = nn.functional.normalize(locations, p=2, dim=-1) 105 | 106 | descriptor = locations * descriptor 107 | descriptor = torch.sum(descriptor, 0) 108 | descriptor = self.feature_encoder3(descriptor) 109 | 110 | return descriptor, norm_locations -------------------------------------------------------------------------------- /model/inference.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import os 8 | import copy 9 | import numpy as np 10 | import cv2 11 | import torch 12 | import torch.distributed as dist 13 | from torchvision import transforms 14 | 15 | from utils.tools import tensor_to_numpy 16 | from datasets.utils.preprocess import preprocess_validation_data 17 | from datasets.utils import postprocess as post 18 | from utils.tools import tensor_to_numpy 19 | from datasets.utils.pipeline import makedir 20 | 21 | def detection_inference(maskrcnn_model, superpoint_model, batch, use_gpu, gaussian_radius, detection_threshold, 22 | data_config, save_dir=None): 23 | with torch.no_grad(): 24 | original_images = batch['image'] 25 | original_images = [tensor_to_numpy(img.clone()) for img in original_images] 26 | 27 | # preprocess 28 | images, sizes, maskrcnn_targets, superpoint_targets = preprocess_validation_data(batch, 29 | use_gpu, gaussian_radius, data_config) 30 | original_sizes = sizes['original_sizes'] 31 | new_sizes = sizes['new_sizes'] 32 | 33 | # model inference 34 | _, detections = maskrcnn_model(images, sizes) 35 | points_output = superpoint_model(images) 36 | 37 | # postprocess 38 | detections, points_output = post.postprocess(new_sizes, original_sizes, detection_threshold, 39 | detections, points_output) 40 | 41 | # save results 42 | if save_dir is not None: 43 | image_names = batch['image_name'] 44 | results = post.save_detection_results(original_images, image_names, save_dir, detections, 45 | None, points_output, True, True) 46 | 47 | return points_output, detections, maskrcnn_targets 48 | 49 | 50 | def maskrcnn_inference(model, batch, use_gpu, gaussian_radius, data_config, save_dir=None): 51 | with torch.no_grad(): 52 | original_images = batch['image'] 53 | original_images = [tensor_to_numpy(img.clone()) for img in original_images] 54 | 55 | # preprocess 56 | images, sizes, maskrcnn_targets, _ = preprocess_validation_data(batch, use_gpu, gaussian_radius, data_config) 57 | original_sizes = sizes['original_sizes'] 58 | new_sizes = sizes['new_sizes'] 59 | 60 | # model inference 61 | _, detections = model(images, sizes) 62 | 63 | # postprocess 64 | detections, _ = post.postprocess(new_sizes, original_sizes, detections=detections) 65 | 66 | # save results 67 | if save_dir is not None: 68 | image_names = batch['image_name'] 69 | results = post.save_detection_results(original_images, image_names, save_dir, detections, None, None, True, False) 70 | 71 | return detections, maskrcnn_targets 72 | 73 | 74 | def superpoint_inference(model, batch, use_gpu, gaussian_radius, data_config, detection_threshold, save_dir=None): 75 | with torch.no_grad(): 76 | original_images = batch['image'] 77 | original_images = [tensor_to_numpy(img.clone()) for img in original_images] 78 | 79 | # preprocess 80 | images, sizes, maskrcnn_targets, superpoint_targets = preprocess_validation_data(batch, use_gpu, gaussian_radius, data_config) 81 | original_sizes = sizes['original_sizes'] 82 | new_sizes = sizes['new_sizes'] 83 | 84 | # model inference 85 | points_output = model(images) 86 | 87 | # postprocess 88 | _, points_output = post.postprocess(new_sizes, original_sizes, detection_threshold, None, points_output) 89 | 90 | # save gt 91 | if save_dir is not None: 92 | print("save_dir = {}".format(save_dir)) 93 | save_dir_list = [os.path.join(save_dir, image_name) for image_name in batch['image_name']] 94 | for d in save_dir_list: 95 | makedir(d) 96 | images = copy.deepcopy(original_images) 97 | images = post.overlay_points(images, points_output) 98 | post.save_images(images, save_dir_list, "points") 99 | 100 | return points_output, maskrcnn_targets, superpoint_targets -------------------------------------------------------------------------------- /model/superpoint/superpoint_loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import numpy as np 8 | import torch 9 | import torch.nn.functional as F 10 | 11 | from datasets.utils.homographies import warp_points_bacth 12 | 13 | 14 | def detector_loss(pred, heatmap, valid_mask): 15 | ''' 16 | Modified focal loss. Exactly the same as CornerNet. 17 | Runs faster and costs a little bit more memory 18 | inputs: 19 | pred: batch * c * h * w 20 | heatmap: batch * c * h * w 21 | valid_mask: batch * c * h * w 22 | ''' 23 | pos_inds = heatmap.eq(1).float() 24 | neg_inds = heatmap.lt(1).float() 25 | 26 | neg_weights = torch.pow(1 - heatmap, 4) 27 | 28 | loss = 0 29 | eps = 1e-7 30 | 31 | num_pos = pos_inds.float().sum() 32 | 33 | pos_loss = torch.log(pred + eps) * torch.pow(1 - pred, 2) * pos_inds * valid_mask 34 | neg_loss = torch.log(1 - pred + eps) * torch.pow(pred, 2) * neg_weights * neg_inds * valid_mask 35 | 36 | pos_loss = pos_loss.sum() 37 | neg_loss = neg_loss.sum() 38 | 39 | if num_pos == 0: 40 | loss = loss - neg_loss 41 | else: 42 | loss = loss - (pos_loss + neg_loss) / num_pos 43 | 44 | return loss 45 | 46 | 47 | def descriptor_loss(descriptors, warped_descriptors, homographies, 48 | valid_mask, warped_valid_mask, **config): 49 | # Compute the position of the center pixel of every cell in the image 50 | batch_size, Dc, Hc, Wc = descriptors.shape 51 | coord_cells = np.stack(np.meshgrid(range(Hc), range(Wc), indexing='ij'), axis=-1) 52 | coord_cells = coord_cells * config['cell'] + config['cell'] // 2 # (Hc, Wc, 2) 53 | coord_cells = coord_cells.astype(float) 54 | # coord_cells is now a grid containing the coordinates of the Hc * Wc 55 | # center pixels of the 8x8 cells of the image 56 | 57 | # Compute the position of the warped center pixels 58 | H_list = np.squeeze(homographies.cpu().numpy(), axis = 1) 59 | warped_coord_cells = warp_points_bacth(H_list, np.reshape(coord_cells, [-1, 2])) 60 | # warped_coord_cells is now a list of the warped coordinates of all the center 61 | # pixels of the 8x8 cells of the image, shape (N, Hc * Wc, 2) 62 | 63 | # Compute the pairwise distances and filter the ones less than a threshold 64 | # The distance is just the pairwise norm of the difference of the two grids 65 | # Using shape broadcasting, cell_distances has shape (N, Hc, Wc, Hc, Wc) 66 | coord_cells = np.reshape(coord_cells, [1, 1, 1, Hc, Wc, 2]) # represent warped_image coord_cells 67 | warped_coord_cells = np.reshape(warped_coord_cells, [batch_size, Hc, Wc, 1, 1, 2]) # represent oridin image coord_cells 68 | 69 | cell_distances = coord_cells - warped_coord_cells 70 | 71 | cell_distances = np.linalg.norm(cell_distances, axis=-1) 72 | 73 | # s = np.less_equal(cell_distances, config['cell'] - 0.5).astype(float) 74 | # s = torch.tensor(s, dtype=descriptors.dtype, device=descriptors.device) 75 | s = (cell_distances <= config['cell'] - 0.5) 76 | s = torch.tensor(s, device=descriptors.device) 77 | # s[id_batch, h, w, h', w'] == 1 if the point of coordinates (h, w) warped by the 78 | # homography is at a distance from (h', w') less than config['cell'] 79 | # and 0 otherwise 80 | 81 | # valid_mask 82 | normalization = torch.sum(warped_valid_mask).float() 83 | valid_mask = torch.nn.functional.interpolate(valid_mask.unsqueeze(1).float(), scale_factor=1.0/config['cell'], mode='bilinear') 84 | warped_valid_mask = torch.nn.functional.interpolate(warped_valid_mask.unsqueeze(1).float(), scale_factor=1.0/config['cell'], mode='bilinear') 85 | 86 | valid_mask = valid_mask.squeeze(1) > 0.5 87 | warped_valid_mask = warped_valid_mask.squeeze(1) > 0.5 88 | 89 | valid_mask = torch.reshape(valid_mask, [batch_size, Hc, Wc, 1, 1]) 90 | warped_valid_mask = torch.reshape(warped_valid_mask, [batch_size, 1, 1, Hc, Wc]) 91 | valid_mask = valid_mask * warped_valid_mask 92 | 93 | # Normalize the descriptors and 94 | # compute the pairwise dot product between descriptors: d^t * d' 95 | descriptors = descriptors.permute(0, 2, 3, 1) # B * C * H * W -> B * H * W *C 96 | descriptors = torch.reshape(descriptors, [batch_size, Hc, Wc, 1, 1, -1]) # B * Hc * Wc * 1 * 1 * 256 97 | descriptors = F.normalize(descriptors, dim=-1) 98 | 99 | warped_descriptors = warped_descriptors.permute(0, 2, 3, 1) # B * C * H * W -> B * H * W *C 100 | warped_descriptors = torch.reshape(warped_descriptors, [batch_size, 1, 1, Hc, Wc, -1]) # B * 1 * 1 * Hc * Wc * 256 101 | warped_descriptors = F.normalize(warped_descriptors, dim=-1) 102 | 103 | dot_product_desc = (warped_descriptors * descriptors).sum(dim=-1) # B * Hc * Wc * Hc * Wc 104 | dot_product_desc = F.relu(dot_product_desc) # B * Hc * Wc * Hc * Wc 105 | 106 | zero = torch.tensor(0.0, dtype=descriptors.dtype, device=descriptors.device) 107 | 108 | positive_dist = torch.max(zero, config['train']['positive_margin'] - dot_product_desc) 109 | negative_dist = torch.max(zero, dot_product_desc - config['train']['negative_margin']) 110 | 111 | loss = (config['train']['lambda_d'] * s * positive_dist + (~s) * negative_dist) * valid_mask 112 | loss = torch.sum(loss)/normalization 113 | 114 | return loss 115 | 116 | 117 | class SuperPointLoss(torch.nn.Module): 118 | ''' 119 | loss for magicpoint: detector loss 120 | ''' 121 | def __init__(self, config): 122 | super(SuperPointLoss, self).__init__() 123 | self.detector_loss = detector_loss 124 | self.descriptor_loss = descriptor_loss 125 | self.config = config 126 | 127 | def forward(self, inputs, outputs): 128 | loss = self.detector_loss(outputs['outputs']['prob'], inputs['ht'], inputs['valid_mask']) 129 | loss_dict = {'points_loss': loss} 130 | if 'warped_image' in inputs: 131 | warped_loss = self.detector_loss(outputs['warped_outputs']['prob'], inputs['warped_ht'], inputs['warped_valid_mask']) 132 | 133 | loss_dict['warped_points_loss'] = warped_loss 134 | loss = loss + warped_loss 135 | if self.config['train']['add_descriptor']: 136 | descriptor_loss = self.descriptor_loss(outputs['outputs']['desc_raw'], outputs['warped_outputs']['desc_raw'], 137 | inputs['H'], inputs['valid_mask'], inputs['warped_valid_mask'], **self.config) 138 | loss_dict['descriptor_loss'] = descriptor_loss 139 | loss = loss + self.config['train']['lambda_loss'] * descriptor_loss 140 | 141 | return loss, loss_dict 142 | -------------------------------------------------------------------------------- /model/superpoint/superpoint_public_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import torch 4 | import numpy as np 5 | 6 | class SuperPointNet(torch.nn.Module): 7 | """ Pytorch definition of SuperPoint Network. """ 8 | def __init__(self): 9 | super(SuperPointNet, self).__init__() 10 | self.relu = torch.nn.ReLU(inplace=True) 11 | self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2) 12 | c1, c2, c3, c4, c5, d1 = 64, 64, 128, 128, 256, 256 13 | # Shared Encoder. 14 | self.conv1a = torch.nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1) 15 | self.conv1b = torch.nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1) 16 | self.conv2a = torch.nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1) 17 | self.conv2b = torch.nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1) 18 | self.conv3a = torch.nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1) 19 | self.conv3b = torch.nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1) 20 | self.conv4a = torch.nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1) 21 | self.conv4b = torch.nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1) 22 | # Detector Head. 23 | self.convPa = torch.nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1) 24 | self.convPb = torch.nn.Conv2d(c5, 65, kernel_size=1, stride=1, padding=0) 25 | # Descriptor Head. 26 | self.convDa = torch.nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1) 27 | self.convDb = torch.nn.Conv2d(c5, d1, kernel_size=1, stride=1, padding=0) 28 | 29 | def forward(self, x): 30 | """ Forward pass that jointly computes unprocessed point and descriptor 31 | tensors. 32 | Input 33 | x: Image pytorch tensor shaped N x 1 x H x W. 34 | Output 35 | semi: Output point pytorch tensor shaped N x 65 x H/8 x W/8. 36 | desc: Output descriptor pytorch tensor shaped N x 256 x H/8 x W/8. 37 | """ 38 | # Shared Encoder. 39 | if x.shape[1] > 1: 40 | x = x[:, 0:1, :, :] 41 | 42 | x = self.relu(self.conv1a(x)) 43 | x = self.relu(self.conv1b(x)) 44 | x = self.pool(x) 45 | x = self.relu(self.conv2a(x)) 46 | x = self.relu(self.conv2b(x)) 47 | x = self.pool(x) 48 | x = self.relu(self.conv3a(x)) 49 | x = self.relu(self.conv3b(x)) 50 | x = self.pool(x) 51 | x = self.relu(self.conv4a(x)) 52 | x = self.relu(self.conv4b(x)) 53 | # Detector Head. 54 | cPa = self.relu(self.convPa(x)) 55 | semi = self.convPb(cPa) 56 | 57 | prob = torch.nn.functional.softmax(semi, dim=1) 58 | prob = prob[:, :-1, :, :] 59 | prob = torch.nn.functional.pixel_shuffle(prob, 8) 60 | 61 | # Descriptor Head. 62 | cDa = self.relu(self.convDa(x)) 63 | desc_raw = self.convDb(cDa) 64 | 65 | desc = torch.nn.functional.interpolate(desc_raw, scale_factor=8, mode='bilinear') 66 | desc = torch.nn.functional.normalize(desc, p=2, dim=1) 67 | 68 | return {'logits': semi, 'prob':prob, 'desc_raw': desc_raw, 'desc': desc} -------------------------------------------------------------------------------- /model/superpoint/vgg_like.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optim 6 | from torchvision import models 7 | 8 | class VggLike(nn.Module): 9 | 10 | def __init__(self, pretrained_net): 11 | super(VggLike, self).__init__() 12 | self.pretrained_net = pretrained_net 13 | self.relu = nn.ReLU(inplace=True) 14 | 15 | c1, c2, h1, h2 = 256, 256, 65, 256 16 | self.convPa = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1) 17 | self.bnPa = nn.BatchNorm2d(c2) 18 | self.convPb = nn.Conv2d(c2, h1, kernel_size=1, stride=1, padding=0) 19 | self.bnPb = nn.BatchNorm2d(h1) 20 | 21 | self.convDa = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1) 22 | self.bnDa = nn.BatchNorm2d(c2) 23 | self.convDb = nn.Conv2d(c2, h2, kernel_size=1, stride=1, padding=0) 24 | self.bnDb = nn.BatchNorm2d(h2) 25 | 26 | def forward(self, x): 27 | 28 | output = self.pretrained_net(x) 29 | x3 = output['x3'] 30 | 31 | cPa = self.bnPa(self.relu(self.convPa(x3))) 32 | semi = self.bnPb(self.convPb(cPa)) 33 | 34 | prob = nn.functional.softmax(semi, dim=1) 35 | prob = prob[:, :-1, :, :] 36 | prob = nn.functional.pixel_shuffle(prob, 8) 37 | 38 | # descriptor extraction 39 | cDa = self.bnDa(self.relu(self.convDa(x3))) 40 | desc_raw = self.bnDb(self.convDb(cDa)) 41 | desc = nn.functional.interpolate(desc_raw, scale_factor=8, mode='bilinear') 42 | desc = nn.functional.normalize(desc, p=2, dim=1) 43 | 44 | return {'logits': semi, 'prob':prob, 'desc_raw': desc_raw, 'desc': desc} -------------------------------------------------------------------------------- /structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sair-lab/AirCode/6f7aaeafa3b6f8c762170431447568855601c684/structures/__init__.py -------------------------------------------------------------------------------- /train_gcn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | import sys 6 | sys.path.append('.') 7 | import datetime 8 | import logging 9 | import os 10 | import time 11 | import argparse 12 | import yaml 13 | 14 | import torch 15 | import torch.distributed as dist 16 | 17 | import torch.optim as optim 18 | from torch.autograd import Variable 19 | from torch.optim import lr_scheduler 20 | from torchvision.models.detection.backbone_utils import resnet_fpn_backbone 21 | 22 | from datasets.utils.build_data import coco_loader 23 | from datasets.utils import pipeline as pp 24 | from model.build_model import build_maskrcnn, build_gcn 25 | from datasets.utils.preprocess import warp_batch_data, match_points_clusters 26 | from model.graph_models.descriptor_loss import DescriptorLoss 27 | from model.build_model import build_superpoint_model 28 | from model.inference import superpoint_inference 29 | from model.backbone.fcn import VGGNet 30 | from model.superpoint.vgg_like import VggLike 31 | 32 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 33 | 34 | def train(configs): 35 | # read configs 36 | ## command line config 37 | use_gpu = configs['use_gpu'] 38 | save_dir = configs['save_dir'] 39 | data_root = configs['data_root'] 40 | ## data cofig 41 | data_config = configs['data'] 42 | data_aug_config = data_config['augmentation'] 43 | # train_data_name = data_config['TRAIN'] 44 | train_data_name = data_config['VAL'] 45 | ## superpoint model config 46 | detection_threshold = configs['model']['superpoint']['detection_threshold'] 47 | ## graph model config 48 | gcn_config = configs['model']['gcn'] 49 | batch_szie = gcn_config['train']['batch_szie'] 50 | epochs = gcn_config['train']['epochs'] 51 | lr = gcn_config['train']['lr'] 52 | momentum = gcn_config['train']['momentum'] 53 | w_decay = gcn_config['train']['w_decay'] 54 | milestones = gcn_config['train']['milestones'] 55 | gamma = gcn_config['train']['gamma'] 56 | checkpoint = gcn_config['train']['checkpoint'] 57 | lambda_d = gcn_config['train']['lambda_d'] 58 | weight_lambda = gcn_config['train']['weight_lambda'] 59 | ## others 60 | configs['num_gpu'] = [0] 61 | configs['public_model'] = 0 62 | 63 | # data 64 | data_loader = coco_loader(data_root=data_root, name=train_data_name, config=data_config, 65 | batch_size=batch_szie, remove_images_without_annotations=True) 66 | 67 | # model 68 | superpoint_model = build_superpoint_model(configs, requires_grad=False) 69 | superpoint_model.eval() 70 | 71 | gcn_model = build_gcn(configs) 72 | gcn_model.train() 73 | 74 | # optimizer 75 | optimizer = optim.RMSprop(gcn_model.parameters(), lr=lr, momentum=momentum, weight_decay=w_decay) 76 | scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma) 77 | 78 | # loss 79 | criterion = DescriptorLoss(gcn_config) 80 | 81 | sum_iter = 0 82 | for _ in range(epochs): 83 | for _, batch in enumerate(data_loader): 84 | optimizer.zero_grad() 85 | original_images = batch['image'] 86 | original_sizes = [list(img.shape[-2:]) for img in original_images] 87 | points_output, maskrcnn_targets, _ = superpoint_inference( 88 | superpoint_model, batch, use_gpu, 1, data_config, detection_threshold, save_dir=None) 89 | 90 | warped_batch = warp_batch_data(batch, data_config) 91 | warped_points_output, warped_maskrcnn_targets, _ = superpoint_inference( 92 | superpoint_model, warped_batch, use_gpu, 1, data_config, detection_threshold, save_dir=None) 93 | 94 | masks = maskrcnn_targets['masks'] 95 | warped_masks = warped_maskrcnn_targets['masks'] 96 | if 'gcn_mask' in data_aug_config: 97 | gcn_aug = data_aug_config['gcn_mask'] 98 | if gcn_aug['enable']: 99 | masks = pp.mask_augmentation(masks, gcn_aug) 100 | masks = torch.tensor(masks) 101 | 102 | batch_points, batch_descs, connections = match_points_clusters(points_output, masks, 103 | warped_points_output, warped_masks) 104 | 105 | if len(connections) < 2: 106 | print("no object") 107 | continue 108 | 109 | batch_points = [points.cuda() for points in batch_points] 110 | batch_descs = [descs.cuda() for descs in batch_descs] 111 | batch_object_descs, locations = gcn_model(batch_points, batch_descs) 112 | connections = torch.stack(connections).cuda() 113 | 114 | # descriptor loss 115 | ploss, nloss = criterion(batch_object_descs, connections) 116 | 117 | # location loss 118 | locations_mean_loss = locations.mean() 119 | location_sum = torch.sum(locations, 0) 120 | norm_locations_sum = torch.nn.functional.normalize(location_sum, p=2, dim=-1) 121 | # locations_norm_loss = 1 - norm_locations_sum.mean() 122 | zero = torch.tensor(0.0, dtype=norm_locations_sum.dtype, device=norm_locations_sum.device) 123 | locations_norm_loss = torch.max(zero, 0.1 - norm_locations_sum.mean()) 124 | 125 | loss = ploss * lambda_d + nloss + locations_mean_loss * weight_lambda[0] + locations_norm_loss * weight_lambda[1] 126 | 127 | loss.backward() 128 | optimizer.step() 129 | scheduler.step() 130 | sum_iter = sum_iter + 1 131 | 132 | if sum_iter%1 == 0: 133 | print("sum_iter = {}, loss = {}".format(sum_iter, loss.item())) 134 | print("ploss = {}, nloss = {}, locations_mean_loss = {}, locations_norm_loss = {}".format( 135 | ploss.item(), nloss.item(), locations_mean_loss.item(), locations_norm_loss.item())) 136 | 137 | if sum_iter % checkpoint == 0: 138 | model_saving_path = os.path.join(save_dir, "gcn_model_{}.pth".format(sum_iter)) 139 | torch.save(gcn_model.state_dict(), model_saving_path) 140 | print("saving model to {}".format(model_saving_path)) 141 | 142 | 143 | def main(): 144 | parser = argparse.ArgumentParser(description="Training") 145 | parser.add_argument( 146 | "-c", "--config_file", 147 | dest = "config_file", 148 | type = str, 149 | default = "" 150 | ) 151 | parser.add_argument( 152 | "-g", "--gpu", 153 | dest = "gpu", 154 | type = int, 155 | default = 0 156 | ) 157 | parser.add_argument( 158 | "-s", "--save_dir", 159 | dest = "save_dir", 160 | type = str, 161 | default = "" 162 | ) 163 | parser.add_argument( 164 | "-d", "--data_root", 165 | dest = "data_root", 166 | type = str, 167 | default = "" 168 | ) 169 | parser.add_argument( 170 | "-sm", "--superpoint_model_path", 171 | dest = "superpoint_model_path", 172 | type = str, 173 | default = "" 174 | ) 175 | parser.add_argument( 176 | "-gm", "--graph_model_path", 177 | dest = "graph_model_path", 178 | type = str, 179 | default = "" 180 | ) 181 | args = parser.parse_args() 182 | 183 | config_file = args.config_file 184 | f = open(config_file, 'r', encoding='utf-8') 185 | configs = f.read() 186 | configs = yaml.load(configs) 187 | configs['use_gpu'] = args.gpu 188 | configs['save_dir'] = args.save_dir 189 | configs['data_root'] = args.data_root 190 | configs['superpoint_model_path'] = args.superpoint_model_path 191 | configs['graph_model_path'] = args.graph_model_path 192 | 193 | train(configs) 194 | 195 | if __name__ == "__main__": 196 | main() -------------------------------------------------------------------------------- /train_maskrcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import datetime 3 | import logging 4 | import os 5 | import time 6 | import argparse 7 | import yaml 8 | import copy 9 | 10 | import torch 11 | import torch.distributed as dist 12 | 13 | import torch.optim as optim 14 | from torch.autograd import Variable 15 | from torch.optim import lr_scheduler 16 | 17 | from model.mask_rcnn.mask_rcnn import MaskRCNN 18 | from datasets.utils.build_data import coco_loader 19 | from datasets.utils.preprocess import preprocess_maskrcnn_train_data 20 | from model.build_model import build_maskrcnn 21 | 22 | os.environ["CUDA_VISIBLE_DEVICES"] = "2, 3" 23 | 24 | def train(configs): 25 | # read configs 26 | ## command line config 27 | use_gpu = configs['use_gpu'] 28 | model_dir = configs['model_dir'] 29 | data_root = configs['data_root'] 30 | ## data cofig 31 | data_config = configs['data'] 32 | train_data_name = data_config['TRAIN'] 33 | ## model config 34 | model_config = configs['model']['maskrcnn'] 35 | train_batch_size = model_config['batch_size'] 36 | epochs = model_config['epochs'] 37 | lr = model_config['lr'] 38 | momentum = model_config['momentum'] 39 | w_decay = model_config['w_decay'] 40 | milestones = model_config['milestones'] 41 | gamma = model_config['gamma'] 42 | checkpoint = model_config['checkpoint'] 43 | ## others 44 | configs['num_gpu'] = [0, 1] 45 | 46 | # data 47 | train_loader = coco_loader( 48 | data_root=data_root, name=train_data_name, config=data_config, batch_size=train_batch_size, 49 | remove_images_without_annotations=True) 50 | 51 | # model 52 | model = build_maskrcnn(configs) 53 | 54 | model.train() 55 | 56 | # optimizer 57 | optimizer = optim.RMSprop(model.parameters(), lr=lr, momentum=momentum, weight_decay=w_decay) 58 | scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma) 59 | 60 | sum_iter = 0 61 | for _ in range(epochs): 62 | for iter, batch in enumerate(train_loader): 63 | optimizer.zero_grad() 64 | images, sizes, maskrcnn_targets = preprocess_maskrcnn_train_data(batch, use_gpu, data_config) 65 | result = model(images, sizes, maskrcnn_targets) 66 | 67 | losses_dict = result[0] 68 | losses_dict_print = {} 69 | for k in losses_dict: 70 | losses_dict[k] = torch.sum(losses_dict[k]) 71 | losses_dict_print[k] = losses_dict[k].cpu().item() 72 | 73 | losses = [losses_dict[k] for k in losses_dict.keys()] 74 | losses = sum(losses) 75 | losses.backward() 76 | optimizer.step() 77 | 78 | if iter%10 == 0: 79 | print("sum_iter = {}, loss = {}".format(sum_iter, losses.item())) 80 | print("loss_dict = {}".format(losses_dict_print)) 81 | 82 | if sum_iter % checkpoint == 0: 83 | model_saving_path = os.path.join(model_dir, "maskrcnn_iter{}.pth".format(sum_iter)) 84 | torch.save(model.state_dict(), model_saving_path) 85 | print("saving model to {}".format(model_saving_path)) 86 | 87 | scheduler.step() 88 | sum_iter += 1 89 | 90 | def main(): 91 | parser = argparse.ArgumentParser(description="Training") 92 | parser.add_argument( 93 | "-c", "--config_file", 94 | dest = "config_file", 95 | type = str, 96 | default = "" 97 | ) 98 | parser.add_argument( 99 | "-g", "--gpu", 100 | dest = "gpu", 101 | type = int, 102 | default = 0 103 | ) 104 | parser.add_argument( 105 | "-s", "--save_dir", 106 | dest = "save_dir", 107 | type = str, 108 | default = "" 109 | ) 110 | parser.add_argument( 111 | "-d", "--data_root", 112 | dest = "data_root", 113 | type = str, 114 | default = "" 115 | ) 116 | parser.add_argument( 117 | "-m", "--model_path", 118 | dest = "pretrained_model_path", 119 | type = str, 120 | default = "" 121 | ) 122 | parser.add_argument( 123 | "-p", "--public_model", 124 | dest = "public_model", 125 | type = int, 126 | default = 0 127 | ) 128 | args = parser.parse_args() 129 | 130 | config_file = args.config_file 131 | f = open(config_file, 'r', encoding='utf-8') 132 | configs = f.read() 133 | configs = yaml.load(configs) 134 | configs['use_gpu'] = args.gpu 135 | configs['model_dir'] = args.save_dir 136 | configs['data_root'] = args.data_root 137 | configs['maskrcnn_model_path'] = args.pretrained_model_path 138 | configs['public_model'] = args.public_model 139 | 140 | train(configs) 141 | 142 | if __name__ == "__main__": 143 | main() 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /train_superpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import datetime 3 | import logging 4 | import os 5 | import time 6 | import argparse 7 | import yaml 8 | import copy 9 | 10 | import torch 11 | import torch.distributed as dist 12 | 13 | import torch.optim as optim 14 | from torch.autograd import Variable 15 | from torch.optim import lr_scheduler 16 | from torchvision.models.detection.backbone_utils import resnet_fpn_backbone 17 | from torch.utils.data import DataLoader 18 | 19 | from model.build_model import build_superpoint_model 20 | from model.superpoint.superpoint_loss import SuperPointLoss 21 | from datasets.utils.build_data import coco_loader 22 | from datasets.synthetic.synthetic import SyntheticDataset 23 | from datasets.utils.batch_collator import BatchCollator 24 | from datasets.utils.preprocess import preprocess_superpoint_train_data 25 | 26 | 27 | os.environ["CUDA_VISIBLE_DEVICES"] = "2, 3" 28 | 29 | def update_gaussian_radius(gaussian_radius, iter, gaussian_gamma, gaussian_milestones): 30 | r = gaussian_radius 31 | if r < 0: 32 | return 1, gaussian_radius 33 | 34 | for i in range(len(gaussian_milestones)): 35 | if iter > gaussian_milestones[i]: 36 | r = r * gaussian_gamma 37 | else: 38 | break 39 | 40 | r = int(r) 41 | if r < 2: 42 | gaussian_radius = -1 43 | return r, gaussian_radius 44 | 45 | def train(configs): 46 | # read configs 47 | ## command line config 48 | use_gpu = configs['use_gpu'] 49 | model_dir = configs['model_dir'] 50 | data_root = configs['data_root'] 51 | ## data cofig 52 | data_config = configs['data'] 53 | dataset_name = data_config['name'] 54 | ## superpoint model config 55 | superpoint_model_config = configs['model']['superpoint'] 56 | train_batch_size = superpoint_model_config['train']['batch_size'] 57 | epochs = superpoint_model_config['train']['epochs'] 58 | lr = superpoint_model_config['train']['lr'] 59 | momentum = superpoint_model_config['train']['momentum'] 60 | w_decay = superpoint_model_config['train']['w_decay'] 61 | milestones = superpoint_model_config['train']['milestones'] 62 | gamma = superpoint_model_config['train']['gamma'] 63 | gaussian_region = superpoint_model_config['train']['gaussian_region'] 64 | gaussian_radius = gaussian_region['radius'] 65 | gaussian_gamma = gaussian_region['gamma'] 66 | gaussian_milestones = gaussian_region['milestones'] 67 | train_batch_size = superpoint_model_config['train']['batch_size'] 68 | checkpoint = superpoint_model_config['train']['checkpoint'] 69 | ## others 70 | configs['num_gpu'] = [0, 1] 71 | 72 | # data 73 | if 'coco' in dataset_name: 74 | train_data_name = data_config['TRAIN'] 75 | train_loader = coco_loader( 76 | data_root=data_root, name=train_data_name, config=data_config, batch_size=train_batch_size, 77 | remove_images_without_annotations=True) 78 | elif 'synthetic' in dataset_name: 79 | train_dataset = SyntheticDataset(data_root=data_root, use_for='training') 80 | sampler = torch.utils.data.sampler.RandomSampler(train_dataset) 81 | batch_sampler = torch.utils.data.sampler.BatchSampler(sampler=sampler, batch_size=train_batch_size, drop_last=True) 82 | collator = BatchCollator() 83 | train_loader = DataLoader(train_dataset, batch_sampler=batch_sampler, collate_fn=collator, num_workers=8) 84 | 85 | # model 86 | model = build_superpoint_model(configs) 87 | model.train() 88 | 89 | # optimizer 90 | optimizer = optim.RMSprop(model.parameters(), lr=lr, momentum=momentum, weight_decay=w_decay) 91 | scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma) 92 | 93 | # loss 94 | criterion = SuperPointLoss(config=superpoint_model_config) 95 | 96 | sum_iter = 0 97 | r = gaussian_radius 98 | for _ in range(epochs): 99 | for iter, batch in enumerate(train_loader): 100 | optimizer.zero_grad() 101 | batch = preprocess_superpoint_train_data(batch, use_gpu, r, data_config) 102 | 103 | if use_gpu: 104 | for key in batch: 105 | if key == 'image_name': 106 | continue 107 | batch[key] = batch[key].cuda() 108 | 109 | outputs = model(batch['image']) 110 | batch_outputs = {'outputs': outputs} 111 | if 'warped_image' in batch: 112 | warped_outputs = model(batch['warped_image']) 113 | batch_outputs['warped_outputs'] = warped_outputs 114 | 115 | loss, loss_dict = criterion(batch, batch_outputs) 116 | loss = loss / train_batch_size 117 | 118 | for k in loss_dict: 119 | loss_dict[k] = loss_dict[k].cpu().item() / train_batch_size 120 | 121 | loss.backward() 122 | optimizer.step() 123 | 124 | if iter%10 == 0: 125 | print("sum_iter = {}, gaussian_radius={}, loss = {}".format(sum_iter, r, loss.item())) 126 | 127 | sum_iter += 1 128 | r, gaussian_radius = update_gaussian_radius(gaussian_radius, sum_iter, gaussian_gamma, gaussian_milestones) 129 | scheduler.step() 130 | 131 | if sum_iter % checkpoint == 0: 132 | model_saving_path = os.path.join(model_dir, "superpoint_iter{}.pth".format(sum_iter)) 133 | torch.save(model.state_dict(), model_saving_path) 134 | print("saving model to {}".format(model_saving_path)) 135 | 136 | 137 | def main(): 138 | parser = argparse.ArgumentParser(description="Training") 139 | parser.add_argument( 140 | "-c", "--config_file", 141 | dest = "config_file", 142 | type = str, 143 | default = "" 144 | ) 145 | parser.add_argument( 146 | "-g", "--gpu", 147 | dest = "gpu", 148 | type = int, 149 | default = 0 150 | ) 151 | parser.add_argument( 152 | "-s", "--save_dir", 153 | dest = "save_dir", 154 | type = str, 155 | default = "" 156 | ) 157 | parser.add_argument( 158 | "-d", "--data_root", 159 | dest = "data_root", 160 | type = str, 161 | default = "" 162 | ) 163 | parser.add_argument( 164 | "-m", "--model_path", 165 | dest = "pretrained_model_path", 166 | type = str, 167 | default = "" 168 | ) 169 | args = parser.parse_args() 170 | 171 | config_file = args.config_file 172 | f = open(config_file, 'r', encoding='utf-8') 173 | configs = f.read() 174 | configs = yaml.load(configs) 175 | configs['use_gpu'] = args.gpu 176 | configs['model_dir'] = args.save_dir 177 | configs['data_root'] = args.data_root 178 | configs['superpoint_model_path'] = args.pretrained_model_path 179 | 180 | train(configs) 181 | 182 | if __name__ == "__main__": 183 | main() 184 | 185 | 186 | 187 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sair-lab/AirCode/6f7aaeafa3b6f8c762170431447568855601c684/utils/__init__.py -------------------------------------------------------------------------------- /utils/checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import logging 3 | import os 4 | 5 | import torch 6 | 7 | from maskrcnn_benchmark.utils.model_serialization import load_state_dict 8 | from maskrcnn_benchmark.utils.c2_model_loading import load_c2_format 9 | from maskrcnn_benchmark.utils.imports import import_file 10 | from maskrcnn_benchmark.utils.model_zoo import cache_url 11 | 12 | 13 | class Checkpointer(object): 14 | def __init__( 15 | self, 16 | model, 17 | optimizer=None, 18 | scheduler=None, 19 | save_dir="", 20 | save_to_disk=None, 21 | logger=None, 22 | ): 23 | self.model = model 24 | self.optimizer = optimizer 25 | self.scheduler = scheduler 26 | self.save_dir = save_dir 27 | self.save_to_disk = save_to_disk 28 | if logger is None: 29 | logger = logging.getLogger(__name__) 30 | self.logger = logger 31 | 32 | def save(self, name, **kwargs): 33 | if not self.save_dir: 34 | return 35 | 36 | if not self.save_to_disk: 37 | return 38 | 39 | data = {} 40 | data["model"] = self.model.state_dict() 41 | if self.optimizer is not None: 42 | data["optimizer"] = self.optimizer.state_dict() 43 | if self.scheduler is not None: 44 | data["scheduler"] = self.scheduler.state_dict() 45 | data.update(kwargs) 46 | 47 | save_file = os.path.join(self.save_dir, "{}.pth".format(name)) 48 | self.logger.info("Saving checkpoint to {}".format(save_file)) 49 | torch.save(data, save_file) 50 | self.tag_last_checkpoint(save_file) 51 | 52 | def load(self, f=None, use_latest=True): 53 | if self.has_checkpoint() and use_latest: 54 | # override argument with existing checkpoint 55 | f = self.get_checkpoint_file() 56 | if not f: 57 | # no checkpoint could be found 58 | self.logger.info("No checkpoint found. Initializing model from scratch") 59 | return {} 60 | self.logger.info("Loading checkpoint from {}".format(f)) 61 | checkpoint = self._load_file(f) 62 | self._load_model(checkpoint) 63 | if "optimizer" in checkpoint and self.optimizer: 64 | self.logger.info("Loading optimizer from {}".format(f)) 65 | self.optimizer.load_state_dict(checkpoint.pop("optimizer")) 66 | if "scheduler" in checkpoint and self.scheduler: 67 | self.logger.info("Loading scheduler from {}".format(f)) 68 | self.scheduler.load_state_dict(checkpoint.pop("scheduler")) 69 | 70 | # return any further checkpoint data 71 | return checkpoint 72 | 73 | def has_checkpoint(self): 74 | save_file = os.path.join(self.save_dir, "last_checkpoint") 75 | return os.path.exists(save_file) 76 | 77 | def get_checkpoint_file(self): 78 | save_file = os.path.join(self.save_dir, "last_checkpoint") 79 | try: 80 | with open(save_file, "r") as f: 81 | last_saved = f.read() 82 | last_saved = last_saved.strip() 83 | except IOError: 84 | # if file doesn't exist, maybe because it has just been 85 | # deleted by a separate process 86 | last_saved = "" 87 | return last_saved 88 | 89 | def tag_last_checkpoint(self, last_filename): 90 | save_file = os.path.join(self.save_dir, "last_checkpoint") 91 | with open(save_file, "w") as f: 92 | f.write(last_filename) 93 | 94 | def _load_file(self, f): 95 | return torch.load(f, map_location=torch.device("cpu")) 96 | 97 | def _load_model(self, checkpoint): 98 | load_state_dict(self.model, checkpoint.pop("model")) 99 | 100 | 101 | class DetectronCheckpointer(Checkpointer): 102 | def __init__( 103 | self, 104 | cfg, 105 | model, 106 | optimizer=None, 107 | scheduler=None, 108 | save_dir="", 109 | save_to_disk=None, 110 | logger=None, 111 | ): 112 | super(DetectronCheckpointer, self).__init__( 113 | model, optimizer, scheduler, save_dir, save_to_disk, logger 114 | ) 115 | self.cfg = cfg.clone() 116 | 117 | def _load_file(self, f): 118 | # catalog lookup 119 | if f.startswith("catalog://"): 120 | paths_catalog = import_file( 121 | "maskrcnn_benchmark.config.paths_catalog", self.cfg.PATHS_CATALOG, True 122 | ) 123 | catalog_f = paths_catalog.ModelCatalog.get(f[len("catalog://") :]) 124 | self.logger.info("{} points to {}".format(f, catalog_f)) 125 | f = catalog_f 126 | # download url files 127 | if f.startswith("http"): 128 | # if the file is a url path, download it and cache it 129 | cached_f = cache_url(f) 130 | self.logger.info("url {} cached in {}".format(f, cached_f)) 131 | f = cached_f 132 | # convert Caffe2 checkpoint from pkl 133 | if f.endswith(".pkl"): 134 | return load_c2_format(self.cfg, f) 135 | # load native detectron.pytorch checkpoint 136 | loaded = super(DetectronCheckpointer, self)._load_file(f) 137 | if "model" not in loaded: 138 | loaded = dict(model=loaded) 139 | return loaded 140 | -------------------------------------------------------------------------------- /utils/cv2_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for cv2 utility functions and maintaining version compatibility 3 | between 3.x and 4.x 4 | """ 5 | import cv2 6 | 7 | 8 | def findContours(*args, **kwargs): 9 | """ 10 | Wraps cv2.findContours to maintain compatiblity between versions 11 | 3 and 4 12 | 13 | Returns: 14 | contours, hierarchy 15 | """ 16 | if cv2.__version__.startswith('4'): 17 | contours, hierarchy = cv2.findContours(*args, **kwargs) 18 | elif cv2.__version__.startswith('3'): 19 | _, contours, hierarchy = cv2.findContours(*args, **kwargs) 20 | else: 21 | raise AssertionError( 22 | 'cv2 must be either version 3 or 4 to call this method') 23 | 24 | return contours, hierarchy 25 | -------------------------------------------------------------------------------- /utils/imports.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | if torch._six.PY3: 4 | import importlib 5 | import importlib.util 6 | import sys 7 | 8 | def import_file(module_name, file_path, make_importable=False): 9 | spec = importlib.util.spec_from_file_location(module_name, file_path) 10 | module = importlib.util.module_from_spec(spec) 11 | spec.loader.exec_module(module) 12 | if make_importable: 13 | sys.modules[module_name] = module 14 | return module 15 | else: 16 | import imp 17 | 18 | def import_file(module_name, file_path, make_importable=None): 19 | module = imp.load_source(module_name, file_path) 20 | return module 21 | -------------------------------------------------------------------------------- /utils/tools.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import numpy as np 3 | import importlib 4 | import cv2 5 | 6 | def get_module(path, name): 7 | if path == '': 8 | mod = importlib.import_module(name) 9 | else: 10 | mod = importlib.import_module('{}.{}'.format(path, name)) 11 | return getattr(mod, name) 12 | 13 | def tensor_to_numpy(image): 14 | img = image.data.cpu().numpy() 15 | img = img.transpose(1, 2, 0) 16 | img = (img * 255.0 + 0.5).astype(np.uint8) 17 | img = np.clip(img, 0, 255) 18 | if img.shape[2] == 1: 19 | img = cv2.merge([img, img, img]) 20 | else: 21 | img = img.copy() 22 | return img 23 | 24 | 25 | def dict_update(d, u): 26 | """Improved update for nested dictionaries. 27 | 28 | Arguments: 29 | d: The dictionary to be updated. 30 | u: The update dictionary. 31 | 32 | Returns: 33 | The updated dictionary. 34 | """ 35 | for k, v in u.items(): 36 | if isinstance(v, collections.Mapping): 37 | d[k] = dict_update(d.get(k, {}), v) 38 | else: 39 | d[k] = v 40 | return d -------------------------------------------------------------------------------- /validate_detection.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import datetime 3 | import logging 4 | import os 5 | import time 6 | import argparse 7 | import yaml 8 | 9 | import torch 10 | import torch.distributed as dist 11 | 12 | import torch.optim as optim 13 | from torch.autograd import Variable 14 | from torch.optim import lr_scheduler 15 | 16 | from model.mask_rcnn.mask_rcnn import MaskRCNN 17 | from datasets.utils.build_data import coco_loader 18 | from model.build_model import build_maskrcnn, build_superpoint_model 19 | from model.inference import detection_inference 20 | 21 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 22 | 23 | def validate(configs): 24 | # read configs 25 | ## command line config 26 | use_gpu = configs['use_gpu'] 27 | save_dir = configs['save_dir'] 28 | data_root = configs['data_root'] 29 | ## data cofig 30 | data_config = configs['data'] 31 | val_data_name = data_config['VAL'] 32 | ## superpoint model config 33 | superpoint_model_config = configs['model']['superpoint'] 34 | detection_threshold = superpoint_model_config['detection_threshold'] 35 | val_batch_size = superpoint_model_config['batch_size'] 36 | gaussian_radius = 2 37 | ## others 38 | configs['num_gpu'] = [0] 39 | configs['public_model'] = 0 40 | 41 | # data 42 | val_loader = coco_loader(data_root=data_root, name=val_data_name, config=data_config, 43 | batch_size=val_batch_size, remove_images_without_annotations=True) 44 | 45 | # model 46 | maskrcnn_model = build_maskrcnn(configs) 47 | superpoint_model = build_superpoint_model(configs) 48 | 49 | with torch.no_grad(): 50 | maskrcnn_model.eval() 51 | for iter, batch in enumerate(val_loader): 52 | result = detection_inference(maskrcnn_model, superpoint_model, batch, use_gpu, gaussian_radius, 53 | detection_threshold, data_config, save_dir) 54 | 55 | def main(): 56 | parser = argparse.ArgumentParser(description="Training") 57 | parser.add_argument( 58 | "-c", "--config_file", 59 | dest = "config_file", 60 | type = str, 61 | default = "" 62 | ) 63 | parser.add_argument( 64 | "-g", "--gpu", 65 | dest = "gpu", 66 | type = int, 67 | default = 0 68 | ) 69 | parser.add_argument( 70 | "-s", "--save_dir", 71 | dest = "save_dir", 72 | type = str, 73 | default = "" 74 | ) 75 | parser.add_argument( 76 | "-d", "--data_root", 77 | dest = "data_root", 78 | type = str, 79 | default = "" 80 | ) 81 | parser.add_argument( 82 | "-mm", "--maskrcnn_model_path", 83 | dest = "maskrcnn_model_path", 84 | type = str, 85 | default = "" 86 | ) 87 | parser.add_argument( 88 | "-sm", "--superpoint_model_path", 89 | dest = "superpoint_model_path", 90 | type = str, 91 | default = "" 92 | ) 93 | args = parser.parse_args() 94 | 95 | config_file = args.config_file 96 | f = open(config_file, 'r', encoding='utf-8') 97 | configs = f.read() 98 | configs = yaml.load(configs) 99 | configs['use_gpu'] = args.gpu 100 | configs['save_dir'] = args.save_dir 101 | configs['data_root'] = args.data_root 102 | configs['maskrcnn_model_path'] = args.maskrcnn_model_path 103 | configs['superpoint_model_path'] = args.superpoint_model_path 104 | 105 | validate(configs) 106 | 107 | if __name__ == "__main__": 108 | main() 109 | 110 | 111 | -------------------------------------------------------------------------------- /validate_gcn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import datetime 3 | import logging 4 | import os 5 | import time 6 | import argparse 7 | import yaml 8 | 9 | import torch 10 | import torch.distributed as dist 11 | 12 | from torchvision.models.detection.backbone_utils import resnet_fpn_backbone 13 | 14 | from datasets.utils.build_data import coco_loader 15 | from model.build_model import build_maskrcnn, build_gcn 16 | from datasets.utils.preprocess import warp_batch_data, match_points_clusters 17 | from validate import maskrcnn_inference 18 | from model.graph_models.descriptor_loss import DescriptorLoss 19 | 20 | 21 | def calculate_f1(maskrcnn_model, gcn_model, loader, configs): 22 | with torch.no_grad(): 23 | maskrcnn_model.eval() 24 | gcn_model.eval() 25 | 26 | ## data cofig 27 | data_config = configs['data'] 28 | ## superpoint model config 29 | superpoint_model_config = configs['model']['superpoint'] 30 | detection_threshold = superpoint_model_config['eval']['detection_threshold'] 31 | 32 | precisions, recalls, weights = [], [], [] 33 | 34 | for iter, batch in enumerate(loader): 35 | optimizer.zero_grad() 36 | original_images = batch['image'] 37 | original_sizes = [list(img.shape[-2:]) for img in original_images] 38 | _, points_output, maskrcnn_targets, _ = maskrcnn_inference( 39 | maskrcnn_model, batch, use_gpu, 1, data_config, detection_threshold) 40 | 41 | warped_batch = warp_batch_data(batch, data_config) 42 | _, warped_points_output, warped_maskrcnn_targets, _ = maskrcnn_inference( 43 | maskrcnn_model, warped_batch, use_gpu, 1, data_config, detection_threshold) 44 | 45 | batch_points, batch_descs, connections = match_points_clusters(points_output, maskrcnn_targets['masks'], 46 | warped_points_output, warped_maskrcnn_targets['masks']) 47 | 48 | if len(connections) < 2: 49 | print("no object") 50 | continue 51 | 52 | batch_points = [points.cuda() for points in batch_points] 53 | batch_descs = [descs.cuda() for descs in batch_descs] 54 | batch_object_descs = gcn_model(batch_points, batch_descs) 55 | connections = torch.stack(connections).cuda() 56 | 57 | distances = torch.einsum('nd,dm->nm', descs, descs.t()) # N * N 58 | good_matchs = (distances > dist_thr).float() 59 | num_correct_matches = torch.sum(good_matchs * connections) 60 | num_connections = torch.sum(connections) 61 | 62 | recall = num_correct_matches / num_connections 63 | precision = num_correct_matches / torch.sum(good_matchs) 64 | 65 | recalls.append(recall) 66 | precisions.append(precision) 67 | weights.append(num_connections) 68 | 69 | if(len(weights) == 0): 70 | return 0., 0., 0. 71 | 72 | recalls = torch.tensor(recalls) 73 | precisions = torch.tensor(precisions) 74 | weights = torch.tensor(weights) 75 | 76 | total_number = torch.sum(weights) 77 | aver_recall = torch.sum(recalls * weights) / total_number 78 | aver_precision = torch.sum(precisions * weights) / total_number 79 | aver_f1 = 0. if (aver_recall + aver_precision) = 0 else aver_recall * aver_precision / (aver_recall + aver_precision) 80 | 81 | return aver_recall, aver_precision, aver_f1 82 | 83 | 84 | def validate(configs): 85 | # read configs 86 | ## command line config 87 | use_gpu = configs['use_gpu'] 88 | save_dir = configs['save_dir'] 89 | data_root = configs['data_root'] 90 | ## data cofig 91 | data_config = configs['data'] 92 | validation_data_name = data_config['VAL'] 93 | ## superpoint model config 94 | superpoint_model_config = configs['model']['superpoint'] 95 | detection_threshold = superpoint_model_config['eval']['detection_threshold'] 96 | ## graph model config 97 | gcn_config = configs['model']['gcn'] 98 | batch_szie = gcn_config['train']['batch_szie'] 99 | ## others 100 | configs['num_gpu'] = [0] 101 | configs['public_model'] = 0 102 | 103 | # data 104 | data_loader = coco_loader(data_root=data_root, name=validation_data_name, config=data_config, 105 | batch_size=batch_szie, remove_images_without_annotations=True) 106 | 107 | # model 108 | maskrcnn_model = build_maskrcnn(configs) 109 | gcn_model = build_gcn(configs) 110 | 111 | recall, precision, f1 = calculate_f1(maskrcnn_model, gcn_model, data_loader, configs) 112 | print("recall = {}, precision = {}, f1 = {}".format(recall, precision, f1)) 113 | 114 | 115 | def main(): 116 | parser = argparse.ArgumentParser(description="Validation") 117 | parser.add_argument( 118 | "-c", "--config_file", 119 | dest = "config_file", 120 | type = str, 121 | default = "" 122 | ) 123 | parser.add_argument( 124 | "-g", "--gpu", 125 | dest = "gpu", 126 | type = int, 127 | default = 0 128 | ) 129 | parser.add_argument( 130 | "-s", "--save_dir", 131 | dest = "save_dir", 132 | type = str, 133 | default = "" 134 | ) 135 | parser.add_argument( 136 | "-d", "--data_root", 137 | dest = "data_root", 138 | type = str, 139 | default = "" 140 | ) 141 | parser.add_argument( 142 | "-mm", "--maskrcnn_model_path", 143 | dest = "maskrcnn_model_path", 144 | type = str, 145 | default = "" 146 | ) 147 | parser.add_argument( 148 | "-gm", "--graph_model_path", 149 | dest = "graph_model_path", 150 | type = str, 151 | default = "" 152 | ) 153 | args = parser.parse_args() 154 | 155 | config_file = args.config_file 156 | f = open(config_file, 'r', encoding='utf-8') 157 | configs = f.read() 158 | configs = yaml.load(configs) 159 | configs['use_gpu'] = args.gpu 160 | configs['save_dir'] = args.save_dir 161 | configs['data_root'] = args.data_root 162 | configs['pretrained_model_path'] = args.maskrcnn_model_path 163 | configs['graph_model_path'] = args.graph_model_path 164 | 165 | validate(configs) 166 | 167 | if __name__ == "__main__": 168 | main() 169 | --------------------------------------------------------------------------------