├── rfvision ├── data │ ├── __init__.py │ ├── alfred │ │ ├── __init__.py │ │ ├── README.md │ │ ├── batch_load.sh │ │ └── load_alfred_data.py │ ├── scannet │ │ ├── __init__.py │ │ ├── meta_data │ │ │ ├── scannet_means.npz │ │ │ └── scannetv2_test.txt │ │ └── README.md │ └── sunrgbd │ │ └── matlab │ │ ├── parsave.m │ │ ├── extract_split.m │ │ └── extract_rgbd_data_v1.m ├── datasets │ ├── custom_dataset │ │ └── __init__.py │ ├── pipelines │ │ ├── pose │ │ │ └── __init__.py │ │ ├── transform_custom.py │ │ ├── keypointnet_pipeline.py │ │ ├── loading_custom.py │ │ ├── loading_pose.py │ │ └── compose.py │ ├── api_wrappers │ │ ├── __init__.py │ │ └── coco_api.py │ ├── pose_dataset │ │ ├── body │ │ │ ├── __init__.py │ │ │ └── body3d_base_dataset.py │ │ ├── hand │ │ │ └── __init__.py │ │ ├── base │ │ │ └── __init__.py │ │ ├── mesh │ │ │ ├── __init__.py │ │ │ └── mesh_adv_dataset.py │ │ └── __init__.py │ ├── samplers │ │ ├── __init__.py │ │ └── distributed_sampler.py │ ├── __init__.py │ └── shapenet_v2.py ├── tools │ ├── dataset_converters │ │ ├── __init__.py │ │ └── create_data.sh │ ├── __init__.py │ ├── model_converters │ │ └── publish_model.py │ └── misc │ │ └── print_config.py ├── models │ ├── detectors3d │ │ ├── category_ppf │ │ │ ├── utils │ │ │ │ └── __init__.py │ │ │ ├── __init__.py │ │ │ ├── train.py │ │ │ ├── cfg.py │ │ │ ├── nocs.py │ │ │ └── utils.py │ │ ├── 3d_touch_and_vision │ │ │ └── __init__.py │ │ └── __init__.py │ ├── human_analyzers │ │ ├── misc │ │ │ └── __init__.py │ │ ├── utils │ │ │ └── __init__.py │ │ └── __init__.py │ ├── pose_estimators │ │ ├── articulation │ │ │ ├── optimization │ │ │ │ └── __init__.py │ │ │ ├── __init__.py │ │ │ ├── models │ │ │ │ └── __init__.py │ │ │ └── datasets │ │ │ │ ├── __init__.py │ │ │ │ └── utils.py │ │ └── __init__.py │ ├── detectors │ │ ├── htc.py │ │ ├── __init__.py │ │ ├── fcos.py │ │ ├── mask_rcnn.py │ │ ├── yolo.py │ │ ├── solo.py │ │ └── cascade_rcnn.py │ └── __init__.py ├── core │ ├── evaluation3d │ │ └── __init__.py │ ├── voxel │ │ ├── __init__.py │ │ └── builder.py │ ├── visualizer3d │ │ └── __init__.py │ ├── visualizer_pose │ │ └── __init__.py │ ├── camera │ │ ├── __init__.py │ │ └── camera_base.py │ ├── data_structures │ │ └── __init__.py │ ├── bbox │ │ ├── iou_calculators │ │ │ ├── __init__.py │ │ │ └── builder.py │ │ ├── match_costs │ │ │ ├── __init__.py │ │ │ └── builder.py │ │ ├── coder │ │ │ ├── __init__.py │ │ │ ├── base_bbox_coder.py │ │ │ └── pseudo_bbox_coder.py │ │ ├── assigners │ │ │ ├── base_assigner.py │ │ │ └── __init__.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── combined_sampler.py │ │ │ └── pseudo_sampler.py │ │ ├── builder.py │ │ ├── demodata.py │ │ └── __init__.py │ ├── visualizer │ │ └── __init__.py │ ├── utils_pose │ │ ├── __init__.py │ │ └── dist_utils.py │ ├── post_processing3d │ │ └── __init__.py │ ├── mask │ │ └── __init__.py │ ├── bbox3d │ │ ├── coders │ │ │ └── __init__.py │ │ ├── iou_calculators │ │ │ └── __init__.py │ │ ├── structures │ │ │ └── __init__.py │ │ └── __init__.py │ ├── post_processing │ │ └── __init__.py │ ├── post_processing_pose │ │ └── __init__.py │ ├── hook │ │ ├── __init__.py │ │ ├── checkloss_hook.py │ │ └── sync_norm_hook.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── class_names.py │ │ └── bbox_overlaps.py │ ├── utils │ │ └── __init__.py │ ├── anchor │ │ ├── builder.py │ │ └── __init__.py │ ├── evaluation_pose │ │ └── __init__.py │ ├── points │ │ └── __init__.py │ └── __init__.py ├── components │ ├── roi_heads │ │ ├── shared_heads │ │ │ └── __init__.py │ │ ├── roi_extractors │ │ │ └── __init__.py │ │ ├── mask_heads │ │ │ ├── __init__.py │ │ │ └── htc_mask_head.py │ │ ├── bbox_heads │ │ │ └── __init__.py │ │ ├── __init__.py │ │ └── double_roi_head.py │ ├── losses_pose │ │ ├── __init__.py │ │ ├── regression_loss.py │ │ └── classfication_loss.py │ ├── necks │ │ ├── __init__.py │ │ └── gap_neck.py │ ├── fusion_layers │ │ └── __init__.py │ ├── __init__.py │ ├── keypoint_head │ │ └── __init__.py │ ├── backbones │ │ ├── base_pointnet.py │ │ └── __init__.py │ ├── dense_heads │ │ └── __init__.py │ ├── utils │ │ ├── knn.py │ │ ├── ops.py │ │ ├── __init__.py │ │ ├── builder.py │ │ └── mlp.py │ └── losses │ │ ├── cosine_simlarity_loss.py │ │ └── __init__.py ├── __init__.py ├── utils │ ├── __init__.py │ ├── collect_env.py │ ├── logger.py │ ├── export.py │ ├── util_random.py │ └── profiling.py ├── apis │ └── __init__.py └── version.py ├── flows ├── task_flow │ └── README.md ├── detectors │ ├── yolo │ │ ├── yolov3_d53_fp16_mstrain-608_273e_coco.py │ │ ├── .yolov3_d53_mstrain-608_273e_coco.py.swp │ │ ├── yolov3_d53_320_273e_coco.py │ │ └── yolov3_d53_mstrain-416_273e_coco.py │ ├── mask_rcnn │ │ ├── mask_rcnn_r101_fpn_1x_coco.py │ │ ├── mask_rcnn_r101_fpn_2x_coco.py │ │ ├── mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py │ │ ├── mask_rcnn_r101_caffe_fpn_1x_coco.py │ │ ├── mask_rcnn_r50_fpn_1x_coco.py │ │ ├── mask_rcnn_r50_fpn_2x_coco.py │ │ ├── mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py │ │ ├── mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py │ │ ├── mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py │ │ ├── mask_rcnn_x101_32x4d_fpn_1x_coco.py │ │ ├── mask_rcnn_x101_32x4d_fpn_2x_coco.py │ │ ├── mask_rcnn_x101_64x4d_fpn_1x_coco.py │ │ ├── mask_rcnn_x101_64x4d_fpn_2x_coco.py │ │ ├── mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py │ │ ├── mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py │ │ ├── mask_rcnn_r50_fpn_poly_1x_coco.py │ │ ├── mask_rcnn_r50_caffe_fpn_1x_coco.py │ │ ├── mask_rcnn_r50_caffe_c4_1x_coco.py │ │ ├── mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py │ │ ├── mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py │ │ ├── mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py │ │ └── mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco.py │ ├── cascade_rcnn │ │ ├── cascade_mask_rcnn_r101_fpn_1x_coco.py │ │ ├── cascade_mask_rcnn_r101_fpn_20e_coco.py │ │ ├── cascade_mask_rcnn_r50_fpn_1x_coco.py │ │ ├── cascade_mask_rcnn_r50_fpn_20e_coco.py │ │ ├── cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py │ │ ├── cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py │ │ ├── cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py │ │ └── cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py │ ├── fcos │ │ ├── fcos_center_r50_caffe_fpn_gn-head_1x_coco.py │ │ ├── fcos_r101_caffe_fpn_gn-head_1x_coco.py │ │ ├── fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py │ │ ├── fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py │ │ ├── fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py │ │ └── fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py │ ├── htc │ │ ├── htc_r50_fpn_20e_coco.py │ │ ├── htc_r101_fpn_20e_coco.py │ │ ├── htc_x101_32x4d_fpn_16x1_20e_coco.py │ │ ├── htc_x101_64x4d_fpn_16x1_20e_coco.py │ │ └── htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py │ ├── yolox │ │ ├── yolox_m_8x8_300e_coco.py │ │ ├── yolox_l_8x8_300e_coco.py │ │ ├── yolox_x_8x8_300e_coco.py │ │ ├── yolox_nano_8x8_300e_coco.py │ │ └── yolox_tiny_8x8_300e_coco.py │ ├── _base_ │ │ ├── schedules │ │ │ ├── schedule_1x.py │ │ │ ├── schedule_2x.py │ │ │ └── schedule_20e.py │ │ ├── default_runtime.py │ │ └── datasets │ │ │ ├── coco_detection.py │ │ │ └── coco_instance.py │ └── solo │ │ └── solo_r50_fpn_1x_coco.py ├── README.md ├── train_pipeline │ ├── dist_train.sh │ └── dist_test.sh ├── detectors3d │ ├── votenet │ │ ├── votenet_iouloss_8x8_scannet-3d-18class.py │ │ ├── votenet_16x8_sunrgbd-3d-10class.py │ │ └── votenet_8x8_scannet-3d-18class.py │ └── _base_ │ │ ├── schedules │ │ └── schedule_3x.py │ │ └── default_runtime.py ├── human_analyzers │ ├── _base_ │ │ └── default_runtime.py │ ├── body │ │ ├── 3d_mesh_sview_rgb_img │ │ │ └── hmr │ │ │ │ ├── README.md │ │ │ │ └── mixed │ │ │ │ └── resnet_mixed.yml │ │ └── 3d_kpt_sview_rgb_img │ │ │ ├── README.md │ │ │ └── pose_lift │ │ │ ├── h36m │ │ │ ├── simplebaseline3d_h36m.yml │ │ │ └── simplebaseline3d_h36m.md │ │ │ └── README.md │ └── hand │ │ ├── rhd │ │ └── resnet_rhd2d.yml │ │ ├── others │ │ └── iknet.py │ │ └── interhand3d │ │ └── internet_interhand3d.yml └── pose_estimators │ └── articulation │ └── articulation_estimator.py ├── requirements ├── readthedocs.txt ├── 3d.txt ├── docs.txt ├── build.txt ├── optional.txt ├── runtime.txt └── tests.txt ├── tests ├── data │ ├── VOCdevkit │ │ ├── VOC2007 │ │ │ ├── ImageSets │ │ │ │ └── Main │ │ │ │ │ ├── test.txt │ │ │ │ │ └── trainval.txt │ │ │ └── JPEGImages │ │ │ │ └── 000001.jpg │ │ └── VOC2012 │ │ │ ├── ImageSets │ │ │ └── Main │ │ │ │ ├── test.txt │ │ │ │ └── trainval.txt │ │ │ └── JPEGImages │ │ │ └── 000001.jpg │ ├── gray.jpg │ ├── color.jpg │ └── coco_sample.json ├── test_onnx │ ├── __init__.py │ ├── data │ │ ├── yolov3_neck.pkl │ │ ├── ssd_head_get_bboxes.pkl │ │ ├── fsaf_head_get_bboxes.pkl │ │ ├── retina_head_get_bboxes.pkl │ │ └── yolov3_head_get_bboxes.pkl │ └── utils.py ├── test_models │ ├── test_roi_heads │ │ ├── __init__.py │ │ └── utils.py │ ├── test_backbones │ │ ├── __init__.py │ │ ├── utils.py │ │ ├── test_hourglass.py │ │ └── test_resnest.py │ ├── test_loss.py │ ├── test_utils │ │ ├── test_se_layer.py │ │ └── test_position_encoding.py │ └── test_dense_heads │ │ └── test_dense_heads_attr.py ├── test_data │ ├── test_datasets │ │ ├── test_xml_dataset.py │ │ └── test_coco_dataset.py │ └── test_pipelines │ │ └── test_formatting.py └── test_utils │ └── test_version.py ├── requirements.txt ├── setup.cfg ├── .gitignore └── docs └── get_started.md /rfvision/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rfvision/data/alfred/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rfvision/data/scannet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rfvision/datasets/custom_dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rfvision/datasets/pipelines/pose/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rfvision/datasets/pipelines/transform_custom.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rfvision/tools/dataset_converters/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flows/task_flow/README.md: -------------------------------------------------------------------------------- 1 | # Task Flow 2 | 3 | ## Tutorial -------------------------------------------------------------------------------- /rfvision/models/detectors3d/category_ppf/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements/readthedocs.txt: -------------------------------------------------------------------------------- 1 | mmcv 2 | torch 3 | torchvision 4 | -------------------------------------------------------------------------------- /rfvision/models/detectors3d/3d_touch_and_vision/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/data/VOCdevkit/VOC2007/ImageSets/Main/test.txt: -------------------------------------------------------------------------------- 1 | 000001 2 | -------------------------------------------------------------------------------- /tests/data/VOCdevkit/VOC2012/ImageSets/Main/test.txt: -------------------------------------------------------------------------------- 1 | 000001 2 | -------------------------------------------------------------------------------- /tests/data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt: -------------------------------------------------------------------------------- 1 | 000001 2 | -------------------------------------------------------------------------------- /tests/data/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt: -------------------------------------------------------------------------------- 1 | 000001 2 | -------------------------------------------------------------------------------- /requirements/3d.txt: -------------------------------------------------------------------------------- 1 | open3d 2 | trimesh 3 | h5py 4 | numba 5 | opencv-python -------------------------------------------------------------------------------- /requirements/docs.txt: -------------------------------------------------------------------------------- 1 | recommonmark 2 | sphinx 3 | sphinx_markdown_tables 4 | sphinx_rtd_theme 5 | -------------------------------------------------------------------------------- /requirements/build.txt: -------------------------------------------------------------------------------- 1 | # These must be installed before building rfvision 2 | 3 | cython 4 | numpy 5 | -------------------------------------------------------------------------------- /rfvision/models/human_analyzers/misc/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | -------------------------------------------------------------------------------- /tests/data/gray.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robotflow-initiative/rfvision/HEAD/tests/data/gray.jpg -------------------------------------------------------------------------------- /tests/test_onnx/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import ort_validate 2 | 3 | __all__ = ['ort_validate'] 4 | -------------------------------------------------------------------------------- /rfvision/models/pose_estimators/articulation/optimization/__init__.py: -------------------------------------------------------------------------------- 1 | from .optimizer import optimize_pose -------------------------------------------------------------------------------- /tests/data/color.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robotflow-initiative/rfvision/HEAD/tests/data/color.jpg -------------------------------------------------------------------------------- /rfvision/core/evaluation3d/__init__.py: -------------------------------------------------------------------------------- 1 | from .indoor_eval import indoor_eval 2 | 3 | __all__ = ['indoor_eval', ] 4 | -------------------------------------------------------------------------------- /rfvision/data/sunrgbd/matlab/parsave.m: -------------------------------------------------------------------------------- 1 | function parsave(filename, instance) 2 | save(filename, 'instance'); 3 | end -------------------------------------------------------------------------------- /rfvision/models/human_analyzers/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .mano_layers import ManoLayer 2 | 3 | __all__ = ['mano_layers'] -------------------------------------------------------------------------------- /requirements/optional.txt: -------------------------------------------------------------------------------- 1 | albumentations>=0.3.2 2 | cityscapesscripts 3 | imagecorruptions 4 | scipy 5 | scikit-learn 6 | -------------------------------------------------------------------------------- /rfvision/components/roi_heads/shared_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .res_layer import ResLayer 2 | 3 | __all__ = ['ResLayer'] 4 | -------------------------------------------------------------------------------- /rfvision/datasets/api_wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_api import COCO, COCOeval 2 | 3 | __all__ = ['COCO', 'COCOeval'] 4 | -------------------------------------------------------------------------------- /tests/test_models/test_roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import _dummy_bbox_sampling 2 | 3 | __all__ = ['_dummy_bbox_sampling'] 4 | -------------------------------------------------------------------------------- /tests/test_onnx/data/yolov3_neck.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robotflow-initiative/rfvision/HEAD/tests/test_onnx/data/yolov3_neck.pkl -------------------------------------------------------------------------------- /rfvision/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | from .tools import * 3 | __all__ = ['__version__', 'short_version'] 4 | -------------------------------------------------------------------------------- /rfvision/models/pose_estimators/articulation/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import ArticulationEstimator 2 | from .datasets import ArticulationDataset 3 | 4 | -------------------------------------------------------------------------------- /tests/test_onnx/data/ssd_head_get_bboxes.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robotflow-initiative/rfvision/HEAD/tests/test_onnx/data/ssd_head_get_bboxes.pkl -------------------------------------------------------------------------------- /tests/test_onnx/data/fsaf_head_get_bboxes.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robotflow-initiative/rfvision/HEAD/tests/test_onnx/data/fsaf_head_get_bboxes.pkl -------------------------------------------------------------------------------- /rfvision/data/scannet/meta_data/scannet_means.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robotflow-initiative/rfvision/HEAD/rfvision/data/scannet/meta_data/scannet_means.npz -------------------------------------------------------------------------------- /rfvision/models/detectors3d/category_ppf/__init__.py: -------------------------------------------------------------------------------- 1 | from .category_ppf import CategoryPPF 2 | from .category_ppf_dataset import NOCSForPPF, ShapeNetDatasetForPPF -------------------------------------------------------------------------------- /rfvision/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .collect_env import collect_env 2 | from .logger import get_root_logger 3 | 4 | __all__ = ['get_root_logger', 'collect_env'] 5 | -------------------------------------------------------------------------------- /tests/test_onnx/data/retina_head_get_bboxes.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robotflow-initiative/rfvision/HEAD/tests/test_onnx/data/retina_head_get_bboxes.pkl -------------------------------------------------------------------------------- /tests/test_onnx/data/yolov3_head_get_bboxes.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robotflow-initiative/rfvision/HEAD/tests/test_onnx/data/yolov3_head_get_bboxes.pkl -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements/build.txt 2 | -r requirements/optional.txt 3 | -r requirements/runtime.txt 4 | -r requirements/tests.txt 5 | -r requirements/3d.txt 6 | -------------------------------------------------------------------------------- /rfvision/models/pose_estimators/articulation/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .articulation_estimator import ArticulationEstimator 2 | 3 | __all__ = ['ArticulationEstimator'] -------------------------------------------------------------------------------- /tests/data/VOCdevkit/VOC2007/JPEGImages/000001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robotflow-initiative/rfvision/HEAD/tests/data/VOCdevkit/VOC2007/JPEGImages/000001.jpg -------------------------------------------------------------------------------- /tests/data/VOCdevkit/VOC2012/JPEGImages/000001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robotflow-initiative/rfvision/HEAD/tests/data/VOCdevkit/VOC2012/JPEGImages/000001.jpg -------------------------------------------------------------------------------- /tests/test_models/test_backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import check_norm_state, is_block, is_norm 2 | 3 | __all__ = ['is_block', 'is_norm', 'check_norm_state'] 4 | -------------------------------------------------------------------------------- /flows/detectors/yolo/yolov3_d53_fp16_mstrain-608_273e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './yolov3_d53_mstrain-608_273e_coco.py' 2 | # fp16 settings 3 | fp16 = dict(loss_scale='dynamic') 4 | -------------------------------------------------------------------------------- /rfvision/components/losses_pose/__init__.py: -------------------------------------------------------------------------------- 1 | from .regression_loss import L1LossPose 2 | from .classfication_loss import BCELoss 3 | 4 | 5 | __all__ = ['L1LossPose', 'BCELoss'] -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_fpn_1x_coco.py' 2 | model = dict(init_cfg='torchvision://resnet101', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_fpn_2x_coco.py' 2 | model = dict(init_cfg='torchvision://resnet101', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /flows/detectors/yolo/.yolov3_d53_mstrain-608_273e_coco.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robotflow-initiative/rfvision/HEAD/flows/detectors/yolo/.yolov3_d53_mstrain-608_273e_coco.py.swp -------------------------------------------------------------------------------- /rfvision/models/pose_estimators/__init__.py: -------------------------------------------------------------------------------- 1 | from .articulation import ArticulationEstimator, ArticulationDataset 2 | __all__ = [ 3 | 'ArticulationDataset', 'ArticulationEstimator' 4 | ] -------------------------------------------------------------------------------- /flows/README.md: -------------------------------------------------------------------------------- 1 | # Vision Flow 2 | 3 | ## Features 4 | 1. Configurable vision task model/component flow 5 | 2. The state (train/eval) of the task model/component can be individually specified -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/mstrain-poly_3x_coco_instance.py', 3 | '../_base_/models/mask_rcnn_r50_fpn.py' 4 | ] 5 | -------------------------------------------------------------------------------- /rfvision/core/voxel/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_voxel_generator 2 | from .voxel_generator import VoxelGenerator 3 | 4 | __all__ = ['build_voxel_generator', 'VoxelGenerator'] 5 | -------------------------------------------------------------------------------- /rfvision/models/pose_estimators/articulation/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .articulation_dataset import ArticulationDataset 2 | from .pipelines_train import * 3 | from .pipelines_test import * 4 | -------------------------------------------------------------------------------- /flows/detectors/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py' 2 | model = dict(init_cfg='torchvision://resnet101', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /flows/detectors/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './cascade_mask_rcnn_r50_fpn_20e_coco.py' 2 | model = dict(init_cfg='torchvision://resnet101', backbone=dict(depth=101)) 3 | -------------------------------------------------------------------------------- /flows/detectors/fcos/fcos_center_r50_caffe_fpn_gn-head_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py' 2 | model = dict(bbox_head=dict(center_sampling=True, center_sample_radius=1.5)) 3 | -------------------------------------------------------------------------------- /flows/detectors/htc/htc_r50_fpn_20e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './htc_r50_fpn_1x_coco.py' 2 | # learning policy 3 | lr_config = dict(step=[16, 19]) 4 | runner = dict(type='EpochBasedRunner', max_epochs=20) 5 | -------------------------------------------------------------------------------- /rfvision/core/visualizer3d/__init__.py: -------------------------------------------------------------------------------- 1 | from .show_result import show_result, show_multi_modality_result, show_seg_result 2 | 3 | __all__ = ['show_result', 'show_multi_modality_result', 'show_seg_result'] 4 | -------------------------------------------------------------------------------- /rfvision/core/visualizer_pose/__init__.py: -------------------------------------------------------------------------------- 1 | from .image import imshow_bboxes, imshow_keypoints, imshow_keypoints_3d, imshow_mesh_3d 2 | 3 | __all__ = ['imshow_bboxes', 'imshow_keypoints', 'imshow_keypoints_3d'] -------------------------------------------------------------------------------- /rfvision/core/camera/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .camera_base import CAMERAS 3 | from .single_camera import SimpleCamera 4 | 5 | __all__ = ['CAMERAS', 'SimpleCamera'] 6 | -------------------------------------------------------------------------------- /rfvision/datasets/pose_dataset/body/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .body3d_h36m_dataset import Body3DH36MDataset 3 | 4 | __all__ = [ 5 | 'Body3DH36MDataset', 6 | ] 7 | -------------------------------------------------------------------------------- /requirements/runtime.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | numpy 3 | pycocotools; platform_system == "Linux" 4 | pycocotools-windows; platform_system == "Windows" 5 | six 6 | terminaltables 7 | h5py 8 | jax 9 | numba 10 | json_tricks -------------------------------------------------------------------------------- /flows/detectors/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://detectron/resnet101_caffe', 4 | backbone=dict(depth=101)) 5 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_caffe_fpn_1x_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://detectron2/resnet101_caffe', 4 | backbone=dict(depth=101)) 5 | -------------------------------------------------------------------------------- /rfvision/core/data_structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .general_data import GeneralData 3 | from .instance_data import InstanceData 4 | 5 | __all__ = ['GeneralData', 'InstanceData'] 6 | -------------------------------------------------------------------------------- /rfvision/core/bbox/iou_calculators/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_iou_calculator 2 | from .iou2d_calculator import BboxOverlaps2D, bbox_overlaps 3 | 4 | __all__ = ['build_iou_calculator', 'BboxOverlaps2D', 'bbox_overlaps'] 5 | -------------------------------------------------------------------------------- /rfvision/core/visualizer/__init__.py: -------------------------------------------------------------------------------- 1 | from .image import (color_val_matplotlib, imshow_det_bboxes, 2 | imshow_gt_det_bboxes) 3 | 4 | __all__ = ['imshow_det_bboxes', 'imshow_gt_det_bboxes', 'color_val_matplotlib'] 5 | -------------------------------------------------------------------------------- /flows/detectors/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py: -------------------------------------------------------------------------------- 1 | # TODO: Remove this config after benchmarking all related configs 2 | _base_ = 'fcos_r50_caffe_fpn_gn-head_1x_coco.py' 3 | 4 | data = dict(samples_per_gpu=4, workers_per_gpu=4) 5 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask_rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask_rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /rfvision/core/utils_pose/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .dist_utils import allreduce_grads 3 | from .regularizations import WeightNormClipHook 4 | 5 | __all__ = ['allreduce_grads', 'WeightNormClipHook'] 6 | -------------------------------------------------------------------------------- /requirements/tests.txt: -------------------------------------------------------------------------------- 1 | asynctest 2 | codecov 3 | flake8 4 | interrogate 5 | isort==4.3.21 6 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future. 7 | kwarray 8 | pytest 9 | ubelt 10 | xdoctest>=0.10.0 11 | yapf 12 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py' 2 | # learning policy 3 | lr_config = dict(step=[16, 23]) 4 | runner = dict(type='EpochBasedRunner', max_epochs=24) 5 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py' 2 | # learning policy 3 | lr_config = dict(step=[28, 34]) 4 | runner = dict(type='EpochBasedRunner', max_epochs=36) 5 | -------------------------------------------------------------------------------- /flows/detectors/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/cascade_mask_rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /rfvision/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .distributed_sampler import DistributedSampler 2 | from .group_sampler import DistributedGroupSampler, GroupSampler 3 | 4 | __all__ = ['DistributedSampler', 'DistributedGroupSampler', 'GroupSampler', 5 | ] 6 | -------------------------------------------------------------------------------- /flows/detectors/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/cascade_mask_rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_20e.py', '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/mstrain-poly_3x_coco_instance.py', 3 | '../_base_/models/mask_rcnn_r50_fpn.py' 4 | ] 5 | 6 | model = dict(init_cfg='torchvision://resnet101', backbone=dict(depth=101)) 7 | -------------------------------------------------------------------------------- /rfvision/datasets/pose_dataset/hand/__init__.py: -------------------------------------------------------------------------------- 1 | from .interhand3d_dataset import InterHand3DDataset 2 | from .rhd2d_dataset import Rhd2DDataset 3 | from .rhd3d_dataset import Rhd3DDataset 4 | __all__ = ['InterHand3DDataset', 5 | 'Rhd2DDataset', 'Rhd3DDataset'] 6 | -------------------------------------------------------------------------------- /rfvision/tools/__init__.py: -------------------------------------------------------------------------------- 1 | from .debug_tools import (show_tensor_img, count_paras, format_single_data, 2 | init_constant_for_all, debug_model, debug_children, 3 | debug_dataset, draw_bbox_xyxy 4 | ) -------------------------------------------------------------------------------- /flows/detectors/htc/htc_r101_fpn_20e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './htc_r50_fpn_1x_coco.py' 2 | model = dict(init_cfg='torchvision://resnet101', backbone=dict(depth=101)) 3 | # learning policy 4 | lr_config = dict(step=[16, 19]) 5 | runner = dict(type='EpochBasedRunner', max_epochs=20) 6 | -------------------------------------------------------------------------------- /rfvision/core/bbox/match_costs/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_match_cost 2 | from .match_cost import BBoxL1Cost, ClassificationCost, FocalLossCost, IoUCost 3 | 4 | __all__ = [ 5 | 'build_match_cost', 'ClassificationCost', 'BBoxL1Cost', 'IoUCost', 6 | 'FocalLossCost' 7 | ] 8 | -------------------------------------------------------------------------------- /rfvision/core/post_processing3d/__init__.py: -------------------------------------------------------------------------------- 1 | from .box3d_nms import aligned_3d_nms, box3d_multiclass_nms, circle_nms 2 | from .merge_augs import merge_aug_bboxes_3d 3 | 4 | __all__ = ['box3d_multiclass_nms','aligned_3d_nms', 'merge_aug_bboxes_3d', 'circle_nms' 5 | 6 | ] 7 | -------------------------------------------------------------------------------- /rfvision/core/bbox/match_costs/builder.py: -------------------------------------------------------------------------------- 1 | from rflib.utils import Registry, build_from_cfg 2 | 3 | MATCH_COST = Registry('Match Cost') 4 | 5 | 6 | def build_match_cost(cfg, default_args=None): 7 | """Builder of IoU calculator.""" 8 | return build_from_cfg(cfg, MATCH_COST, default_args) 9 | -------------------------------------------------------------------------------- /rfvision/components/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import FPN 2 | from .pafpn import PAFPN 3 | from .yolo_neck import YOLOV3Neck 4 | from .gap_neck import GlobalAveragePooling 5 | from .yolox_pafpn import YOLOXPAFPN 6 | __all__ = [ 7 | 'FPN', 'PAFPN', 'YOLOV3Neck', 8 | 'GlobalAveragePooling' 9 | ] 10 | -------------------------------------------------------------------------------- /rfvision/components/roi_heads/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_roi_extractor import BaseRoIExtractor 2 | from .generic_roi_extractor import GenericRoIExtractor 3 | from .single_level_roi_extractor import SingleRoIExtractor 4 | 5 | __all__ = ['BaseRoIExtractor', 'SingleRoIExtractor', 'GenericRoIExtractor'] 6 | -------------------------------------------------------------------------------- /rfvision/core/bbox/iou_calculators/builder.py: -------------------------------------------------------------------------------- 1 | from rflib.utils import Registry, build_from_cfg 2 | 3 | IOU_CALCULATORS = Registry('IoU calculator') 4 | 5 | 6 | def build_iou_calculator(cfg, default_args=None): 7 | """Builder of IoU calculator.""" 8 | return build_from_cfg(cfg, IOU_CALCULATORS, default_args) 9 | -------------------------------------------------------------------------------- /flows/train_pipeline/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-29500} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /flows/detectors3d/votenet/votenet_iouloss_8x8_scannet-3d-18class.py: -------------------------------------------------------------------------------- 1 | _base_ = ['./votenet_8x8_scannet-3d-18class.py'] 2 | 3 | # model settings, add iou loss 4 | model = dict( 5 | bbox_head=dict( 6 | iou_loss=dict( 7 | type='AxisAlignedIoULoss', reduction='sum', loss_weight=10.0 / 8 | 3.0))) 9 | -------------------------------------------------------------------------------- /rfvision/components/fusion_layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .coord_transform import (apply_3d_transformation, bbox_2d_transform, 2 | coord_2d_transform) 3 | from .vote_fusion import VoteFusion 4 | 5 | __all__ = [ 6 | 'VoteFusion', 'apply_3d_transformation', 7 | 'bbox_2d_transform', 'coord_2d_transform' 8 | ] 9 | -------------------------------------------------------------------------------- /rfvision/components/roi_heads/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | from .fused_semantic_head import FusedSemanticHead 3 | from .htc_mask_head import HTCMaskHead 4 | from .dct_mask_head import MaskRCNNDCTHead 5 | __all__ = [ 6 | 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'MaskRCNNDCTHead' 7 | ] 8 | -------------------------------------------------------------------------------- /flows/detectors/yolox/yolox_m_8x8_300e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './yolox_s_8x8_300e_coco.py' 2 | 3 | # model settings 4 | model = dict( 5 | backbone=dict(deepen_factor=0.67, widen_factor=0.75), 6 | neck=dict(in_channels=[192, 384, 768], out_channels=192, num_csp_blocks=2), 7 | bbox_head=dict(in_channels=192, feat_channels=192), 8 | ) 9 | -------------------------------------------------------------------------------- /flows/detectors/yolox/yolox_l_8x8_300e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './yolox_s_8x8_300e_coco.py' 2 | 3 | # model settings 4 | model = dict( 5 | backbone=dict(deepen_factor=1.0, widen_factor=1.0), 6 | neck=dict( 7 | in_channels=[256, 512, 1024], out_channels=256, num_csp_blocks=3), 8 | bbox_head=dict(in_channels=256, feat_channels=256)) 9 | -------------------------------------------------------------------------------- /flows/detectors/yolox/yolox_x_8x8_300e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './yolox_s_8x8_300e_coco.py' 2 | 3 | # model settings 4 | model = dict( 5 | backbone=dict(deepen_factor=1.33, widen_factor=1.25), 6 | neck=dict( 7 | in_channels=[320, 640, 1280], out_channels=320, num_csp_blocks=4), 8 | bbox_head=dict(in_channels=320, feat_channels=320)) 9 | -------------------------------------------------------------------------------- /flows/train_pipeline/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 11 | -------------------------------------------------------------------------------- /rfvision/components/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * # noqa: F401,F403 2 | from .dense_heads import * # noqa: F401,F403 3 | from .losses import * # noqa: F401,F403 4 | from .losses_pose import * 5 | from .necks import * # noqa: F401,F403 6 | from .roi_heads import * # noqa: F401,F403 7 | from .fusion_layers import * 8 | from .keypoint_head import * -------------------------------------------------------------------------------- /rfvision/core/bbox/coder/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_bbox_coder import BaseBBoxCoder 2 | from .delta_xywh_bbox_coder import DeltaXYWHBBoxCoder 3 | from .pseudo_bbox_coder import PseudoBBoxCoder 4 | from .yolo_bbox_coder import YOLOBBoxCoder 5 | 6 | __all__ = [ 7 | 'BaseBBoxCoder', 'PseudoBBoxCoder', 'DeltaXYWHBBoxCoder', 8 | 'YOLOBBoxCoder' 9 | ] 10 | -------------------------------------------------------------------------------- /rfvision/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .mask_target import mask_target 2 | from .structures import BaseInstanceMasks, BitmapMasks, PolygonMasks 3 | from .utils import encode_mask_results, split_combined_polys 4 | 5 | __all__ = [ 6 | 'split_combined_polys', 'mask_target', 'BaseInstanceMasks', 'BitmapMasks', 7 | 'PolygonMasks', 'encode_mask_results' 8 | ] 9 | -------------------------------------------------------------------------------- /rfvision/models/human_analyzers/__init__.py: -------------------------------------------------------------------------------- 1 | # from .handtailor import HandTailor 2 | from .pose_lifter import PoseLifter 3 | from .top_down import TopDown 4 | from .interhand_3d import Interhand3D 5 | from .base import BasePose 6 | from .iknet import IKNet 7 | from .handtailor import * 8 | __all__ = ['TopDown', 'Interhand3D', 'BasePose', 9 | 'IKNet', 10 | 'PoseLifter'] -------------------------------------------------------------------------------- /rfvision/datasets/pose_dataset/base/__init__.py: -------------------------------------------------------------------------------- 1 | from .kpt_3d_sview_rgb_img_top_down_dataset import Kpt3dSviewRgbImgTopDownDataset 2 | from .kpt_2d_sview_rgb_img_top_down_dataset import Kpt2dSviewRgbImgTopDownDataset 3 | from .kpt_3d_sview_kpt_2d_dataset import Kpt3dSviewKpt2dDataset 4 | __all__ = ['Kpt3dSviewRgbImgTopDownDataset', 'Kpt2dSviewRgbImgTopDownDataset', 5 | 'Kpt3dSviewKpt2dDataset'] -------------------------------------------------------------------------------- /rfvision/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | """Base assigner that assigns boxes to ground truth boxes.""" 6 | 7 | @abstractmethod 8 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 9 | """Assign boxes to either a ground truth boxes or a negative boxes.""" 10 | -------------------------------------------------------------------------------- /flows/detectors/_base_/schedules/schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /flows/detectors/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 22]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=24) 12 | -------------------------------------------------------------------------------- /flows/detectors/_base_/schedules/schedule_20e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 19]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=20) 12 | -------------------------------------------------------------------------------- /rfvision/components/roi_heads/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import (ConvFCBBoxHead, Shared2FCBBoxHead, 3 | Shared4Conv1FCBBoxHead) 4 | from .double_bbox_head import DoubleConvFCBBoxHead 5 | 6 | __all__ = [ 7 | 'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead', 8 | 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead' 9 | ] 10 | -------------------------------------------------------------------------------- /rfvision/datasets/pose_dataset/mesh/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .mesh_adv_dataset import MeshAdversarialDataset 3 | from .mesh_h36m_dataset import MeshH36MDataset 4 | from .mesh_mix_dataset import MeshMixDataset 5 | from .mosh_dataset import MoshDataset 6 | 7 | __all__ = [ 8 | 'MeshH36MDataset', 'MoshDataset', 'MeshMixDataset', 9 | 'MeshAdversarialDataset' 10 | ] 11 | -------------------------------------------------------------------------------- /flows/human_analyzers/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | log_config = dict( 3 | interval=50, 4 | hooks=[ 5 | dict(type='TextLoggerHook'), 6 | dict(type='TensorboardLoggerHook') 7 | ]) 8 | # yapf:enable 9 | dist_params = dict(backend='nccl') 10 | log_level = 'INFO' 11 | work_dir = None 12 | load_from = None 13 | resume_from = None 14 | workflow = [('train', 1)] 15 | -------------------------------------------------------------------------------- /flows/detectors3d/_base_/schedules/schedule_3x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on indoor dataset, 3 | # e.g., VoteNet on SUNRGBD and ScanNet 4 | lr = 0.008 # max learning rate 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01) 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32]) 8 | # runtime settings 9 | total_epochs = 36 10 | -------------------------------------------------------------------------------- /flows/detectors/yolox/yolox_nano_8x8_300e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './yolox_tiny_8x8_300e_coco.py' 2 | 3 | # model settings 4 | model = dict( 5 | backbone=dict(deepen_factor=0.33, widen_factor=0.25, use_depthwise=True), 6 | neck=dict( 7 | in_channels=[64, 128, 256], 8 | out_channels=64, 9 | num_csp_blocks=1, 10 | use_depthwise=True), 11 | bbox_head=dict(in_channels=64, feat_channels=64, use_depthwise=True)) 12 | -------------------------------------------------------------------------------- /rfvision/models/detectors3d/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Base3DDetector 2 | from .single_stage import SingleStage3DDetector 3 | from .votenet import VoteNet 4 | from .imvotenet import ImVoteNet 5 | from .skeleton_merger import SkeletonMerger 6 | from .category_ppf import CategoryPPF 7 | __all__ = ['Base3DDetector', 'SingleStage3DDetector', 'VoteNet', 8 | 'ImVoteNet', 'SkeletonMerger', 9 | 'CategoryPPF' 10 | ] 11 | -------------------------------------------------------------------------------- /rfvision/core/bbox3d/coders/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_free_bbox_coder import AnchorFreeBBoxCoder 2 | from .centerpoint_bbox_coders import CenterPointBBoxCoder 3 | from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder 4 | from .partial_bin_based_bbox_coder import PartialBinBasedBBoxCoder 5 | 6 | __all__ = [ 7 | 'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'PartialBinBasedBBoxCoder', 8 | 'CenterPointBBoxCoder', 'AnchorFreeBBoxCoder' 9 | ] 10 | -------------------------------------------------------------------------------- /flows/detectors/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable 3 | log_config = dict( 4 | interval=50, 5 | hooks=[ 6 | dict(type='TextLoggerHook'), 7 | # dict(type='TensorboardLoggerHook') 8 | ]) 9 | # yapf:enable 10 | custom_hooks = [dict(type='NumClassCheckHook')] 11 | 12 | dist_params = dict(backend='nccl') 13 | log_level = 'INFO' 14 | load_from = None 15 | resume_from = None 16 | workflow = [('train', 1)] 17 | -------------------------------------------------------------------------------- /rfvision/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import multiclass_nms, multiclass_nms_with_coef 2 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks, 3 | merge_aug_proposals, merge_aug_scores) 4 | from .matrix_nms import mask_matrix_nms 5 | 6 | __all__ = [ 7 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 8 | 'merge_aug_scores', 'merge_aug_masks', 'multiclass_nms_with_coef', 9 | 'mask_matrix_nms' 10 | ] 11 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r101_fpn_1x_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://resnext101_32x4d', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=32, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch')) 14 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r101_fpn_2x_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://resnext101_32x4d', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=32, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch')) 14 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_x101_32x4d_fpn_1x_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://resnext101_64x4d', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch')) 14 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_x101_32x4d_fpn_2x_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://resnext101_64x4d', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch')) 14 | -------------------------------------------------------------------------------- /flows/detectors/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://resnext101_32x4d', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=32, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch')) 14 | -------------------------------------------------------------------------------- /flows/detectors/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://resnext101_64x4d', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch')) 14 | -------------------------------------------------------------------------------- /flows/detectors/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './cascade_mask_rcnn_r50_fpn_20e_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://resnext101_32x4d', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=32, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch')) 14 | -------------------------------------------------------------------------------- /flows/detectors/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './cascade_mask_rcnn_r50_fpn_20e_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://resnext101_64x4d', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch')) 14 | -------------------------------------------------------------------------------- /rfvision/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .inference import (async_inference_detector, inference_detector, 2 | init_detector, show_result_pyplot) 3 | from .test import multi_gpu_test, single_gpu_test 4 | from .train import get_root_logger, set_random_seed, train_detector 5 | __all__ = [ 6 | 'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector', 7 | 'async_inference_detector', 'inference_detector', 'show_result_pyplot', 8 | 'multi_gpu_test', 'single_gpu_test' 9 | ] 10 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 79 3 | multi_line_output = 0 4 | known_standard_library = setuptools 5 | known_first_party = rfvision 6 | known_third_party = PIL,asynctest,cityscapesscripts,cv2,gather_models,matplotlib,rflib,numpy,pycocotools,pytest,seaborn,six,terminaltables,torch,ts,yaml 7 | no_lines_before = STDLIB,LOCALFOLDER 8 | default_section = THIRDPARTY 9 | 10 | [yapf] 11 | BASED_ON_STYLE = pep8 12 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 13 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 14 | -------------------------------------------------------------------------------- /rfvision/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | from rflib.utils import collect_env as collect_base_env 2 | from rflib.utils import get_git_hash 3 | 4 | import rfvision 5 | 6 | 7 | def collect_env(): 8 | """Collect the information of the running environments.""" 9 | env_info = collect_base_env() 10 | env_info['RFVision'] = rfvision.__version__ + '+' + get_git_hash()[:7] 11 | return env_info 12 | 13 | 14 | if __name__ == '__main__': 15 | for name, val in collect_env().items(): 16 | print(f'{name}: {val}') 17 | -------------------------------------------------------------------------------- /rfvision/core/bbox3d/iou_calculators/__init__.py: -------------------------------------------------------------------------------- 1 | from .iou3d_calculator import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D, 2 | BboxOverlapsNearest3D, 3 | axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d, 4 | bbox_overlaps_nearest_3d) 5 | 6 | __all__ = [ 7 | 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d', 8 | 'bbox_overlaps_3d', 'AxisAlignedBboxOverlaps3D', 9 | 'axis_aligned_bbox_overlaps_3d' 10 | ] 11 | -------------------------------------------------------------------------------- /rfvision/core/voxel/builder.py: -------------------------------------------------------------------------------- 1 | import rflib 2 | 3 | from . import voxel_generator 4 | 5 | 6 | def build_voxel_generator(cfg, **kwargs): 7 | """Builder of voxel generator.""" 8 | if isinstance(cfg, voxel_generator.VoxelGenerator): 9 | return cfg 10 | elif isinstance(cfg, dict): 11 | return rflib.runner.obj_from_dict( 12 | cfg, voxel_generator, default_args=kwargs) 13 | else: 14 | raise TypeError('Invalid type {} for building a sampler'.format( 15 | type(cfg))) 16 | -------------------------------------------------------------------------------- /rfvision/models/detectors/htc.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .cascade_rcnn import CascadeRCNN 3 | 4 | 5 | @DETECTORS.register_module() 6 | class HybridTaskCascade(CascadeRCNN): 7 | """Implementation of `HTC `_""" 8 | 9 | def __init__(self, **kwargs): 10 | super(HybridTaskCascade, self).__init__(**kwargs) 11 | 12 | @property 13 | def with_semantic(self): 14 | """bool: whether the detector has a semantic head""" 15 | return self.roi_head.with_semantic 16 | -------------------------------------------------------------------------------- /rfvision/models/pose_estimators/articulation/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def point_3d_offset_joint(joint, point): 4 | """ 5 | joint: [x, y, z] or [[x, y, z] + [rx, ry, rz]] 6 | point: N * 3 7 | """ 8 | if len(joint) == 2: 9 | P0 = np.array(joint[0]) 10 | P = np.array(point) 11 | l = np.array(joint[1]).reshape(1, 3) 12 | P0P= P - P0 13 | # projection of P in joint minus P 14 | PP = np.dot(P0P, l.T) * l / np.linalg.norm(l)**2 - P0P 15 | return PP 16 | -------------------------------------------------------------------------------- /rfvision/core/bbox/coder/base_bbox_coder.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseBBoxCoder(metaclass=ABCMeta): 5 | """Base bounding box coder.""" 6 | 7 | def __init__(self, **kwargs): 8 | pass 9 | 10 | @abstractmethod 11 | def encode(self, bboxes, gt_bboxes): 12 | """Encode deltas between bboxes and ground truth boxes.""" 13 | 14 | @abstractmethod 15 | def decode(self, bboxes, bboxes_pred): 16 | """Decode the predicted bboxes according to prediction and base 17 | boxes.""" 18 | -------------------------------------------------------------------------------- /rfvision/core/post_processing_pose/__init__.py: -------------------------------------------------------------------------------- 1 | from .post_transforms import (affine_transform, flip_back, fliplr_joints, 2 | fliplr_regression, get_affine_transform, 3 | get_warp_matrix, rotate_point, transform_preds, 4 | warp_affine_joints) 5 | 6 | __all__ = [ 7 | 'affine_transform', 'flip_back', 'fliplr_joints', 8 | 'fliplr_regression', 'get_affine_transform', 9 | 'get_warp_matrix', 'rotate_point', 'transform_preds', 10 | 'warp_affine_joints' 11 | ] 12 | -------------------------------------------------------------------------------- /tests/test_models/test_loss.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | 4 | from rfvision.components.losses import (BoundedIoULoss, CIoULoss, DIoULoss, GIoULoss, 5 | IoULoss) 6 | 7 | 8 | @pytest.mark.parametrize( 9 | 'loss_class', [IoULoss, BoundedIoULoss, GIoULoss, DIoULoss, CIoULoss]) 10 | def test_iou_type_loss_zeros_weight(loss_class): 11 | pred = torch.rand((10, 4)) 12 | target = torch.rand((10, 4)) 13 | weight = torch.zeros(10) 14 | 15 | loss = loss_class()(pred, target, weight) 16 | assert loss == 0. 17 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/mstrain-poly_3x_coco_instance.py', 3 | '../_base_/models/mask_rcnn_r50_fpn.py' 4 | ] 5 | 6 | model = dict( 7 | init_cfg='open-mmlab://resnext101_32x4d', 8 | backbone=dict( 9 | type='ResNeXt', 10 | depth=101, 11 | groups=32, 12 | base_width=4, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | frozen_stages=1, 16 | norm_cfg=dict(type='BN', requires_grad=True), 17 | style='pytorch')) 18 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/mstrain-poly_3x_coco_instance.py', 3 | '../_base_/models/mask_rcnn_r50_fpn.py' 4 | ] 5 | 6 | model = dict( 7 | init_cfg='open-mmlab://resnext101_64x4d', 8 | backbone=dict( 9 | type='ResNeXt', 10 | depth=101, 11 | groups=64, 12 | base_width=4, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | frozen_stages=1, 16 | norm_cfg=dict(type='BN', requires_grad=True), 17 | style='pytorch')) 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | weights/ 2 | work_dirs/ 3 | __pycache__/ 4 | .DS_Store 5 | *.mp4 6 | *.png 7 | *.xyz 8 | *.mtl 9 | *.xml 10 | *.so 11 | build/ 12 | devel/ 13 | .catkin_workspace 14 | git.sh 15 | pddlstream/ 16 | *.pyc 17 | FastDownward/ 18 | checkpoint/ 19 | tmp/ 20 | temp/ 21 | *.stl 22 | *.dae 23 | *.obj 24 | .vscode/ 25 | .idea/ 26 | # Distribution / packaging 27 | .Python 28 | build/ 29 | develop-eggs/ 30 | dist/ 31 | downloads/ 32 | .eggs/ 33 | lib/ 34 | lib64/ 35 | parts/ 36 | sdist/ 37 | var/ 38 | wheels/ 39 | *.egg-info/ 40 | *.egg-info 41 | .installed.cfg 42 | *.egg 43 | MANIFEST 44 | debug/ -------------------------------------------------------------------------------- /flows/detectors3d/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable push 3 | # By default we use textlogger hook and tensorboard 4 | # For more loggers see 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook 6 | log_config = dict( 7 | interval=50, 8 | hooks=[ 9 | dict(type='TextLoggerHook'), 10 | dict(type='TensorboardLoggerHook') 11 | ]) 12 | # yapf:enable 13 | dist_params = dict(backend='nccl') 14 | log_level = 'INFO' 15 | work_dir = None 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | -------------------------------------------------------------------------------- /rfvision/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from rflib.utils import get_logger 4 | 5 | 6 | def get_root_logger(log_file=None, log_level=logging.INFO): 7 | """Get root logger. 8 | 9 | Args: 10 | log_file (str, optional): File path of log. Defaults to None. 11 | log_level (int, optional): The level of logger. 12 | Defaults to logging.INFO. 13 | 14 | Returns: 15 | :obj:`logging.Logger`: The obtained logger 16 | """ 17 | logger = get_logger(name='rfvision', log_file=log_file, log_level=log_level) 18 | logger.propagate = False 19 | return logger 20 | -------------------------------------------------------------------------------- /rfvision/core/hook/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .checkloss_hook import CheckInvalidLossHook 3 | from .ema import ExpMomentumEMAHook, LinearMomentumEMAHook 4 | from .sync_norm_hook import SyncNormHook 5 | from .sync_random_size_hook import SyncRandomSizeHook 6 | from .yolox_lrupdater_hook import YOLOXLrUpdaterHook 7 | from .yolox_mode_switch_hook import YOLOXModeSwitchHook 8 | 9 | __all__ = [ 10 | 'SyncRandomSizeHook', 'YOLOXModeSwitchHook', 'SyncNormHook', 11 | 'ExpMomentumEMAHook', 'LinearMomentumEMAHook', 'YOLOXLrUpdaterHook', 12 | 'CheckInvalidLossHook' 13 | ] 14 | -------------------------------------------------------------------------------- /rfvision/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) RobotFlow. All rights reserved. 2 | 3 | __version__ = '0.0.1' 4 | short_version = __version__ 5 | 6 | 7 | def parse_version_info(version_str): 8 | version_info = [] 9 | for x in version_str.split('.'): 10 | if x.isdigit(): 11 | version_info.append(int(x)) 12 | elif x.find('rc') != -1: 13 | patch_version = x.split('rc') 14 | version_info.append(int(patch_version[0])) 15 | version_info.append(f'rc{patch_version[1]}') 16 | return tuple(version_info) 17 | 18 | 19 | version_info = parse_version_info(__version__) 20 | -------------------------------------------------------------------------------- /rfvision/datasets/pose_dataset/body/body3d_base_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from abc import ABCMeta 3 | 4 | from torch.utils.data import Dataset 5 | 6 | 7 | class Body3DBaseDataset(Dataset, metaclass=ABCMeta): 8 | """This class has been deprecated and replaced by 9 | Kpt3dSviewKpt2dDataset.""" 10 | 11 | def __init__(self, *args, **kwargs): 12 | raise (ImportError( 13 | 'Body3DBaseDataset has been replaced by ' 14 | 'Kpt3dSviewKpt2dDataset' 15 | 'check https://github.com/open-mmlab/mmpose/pull/663 for details.') 16 | ) 17 | -------------------------------------------------------------------------------- /rfvision/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDetector 2 | from .cascade_rcnn import CascadeRCNN 3 | from .fcos import FCOS 4 | from .htc import HybridTaskCascade 5 | from .mask_rcnn import MaskRCNN 6 | from .single_stage import SingleStageDetector 7 | from .two_stage import TwoStageDetector 8 | from .yolo import YOLOV3 9 | from .solo import SOLO 10 | from .solov2 import SOLOv2 11 | from .yolox import YOLOX 12 | 13 | __all__ = [ 14 | 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 15 | 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade', 'FCOS', 16 | 'YOLOV3', 'SOLO', 'SOLOv2', 'YOLOX' 17 | ] 18 | -------------------------------------------------------------------------------- /rfvision/components/keypoint_head/__init__.py: -------------------------------------------------------------------------------- 1 | from .topdown_heatmap_simple_head import TopdownHeatmapSimpleHead 2 | from .pose_head import Interhand3DHead, Heatmap3DHead, Heatmap1DHead, MultilabelClassificationHead 3 | from .topdown_heatmap_simple_head import TopdownHeatmapBaseHead 4 | from .topdown_heatmap_simple_head_3d import Topdown3DHeatmapSimpleHead 5 | from .temporal_regression_head import TemporalRegressionHead 6 | __all__ = ['TopdownHeatmapSimpleHead', 'Heatmap3DHead', 'Heatmap1DHead', 7 | 'Interhand3DHead', 'TopdownHeatmapBaseHead', 'Topdown3DHeatmapSimpleHead', 'MultilabelClassificationHead', 8 | 'TemporalRegressionHead'] -------------------------------------------------------------------------------- /rfvision/datasets/pipelines/keypointnet_pipeline.py: -------------------------------------------------------------------------------- 1 | from rfvision.datasets import PIPELINES 2 | from rfvision.components.utils import normalize_point_cloud 3 | 4 | @PIPELINES.register_module() 5 | class NormalizePoints: 6 | def __call__(self, results): 7 | points = results['points'] 8 | pc_normalized, centroid, m = normalize_point_cloud(points) 9 | 10 | if 'keypoints_xyz' in results: 11 | keypoints_xyz = results['keypoints_xyz'] 12 | keypoints_xyz_normalized = (keypoints_xyz - centroid) / m 13 | results['keypoints_xyz'] = keypoints_xyz_normalized 14 | return results 15 | -------------------------------------------------------------------------------- /rfvision/models/detectors/fcos.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class FCOS(SingleStageDetector): 7 | """Implementation of `FCOS `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | neck, 12 | bbox_head, 13 | train_cfg=None, 14 | test_cfg=None, 15 | init_cfg=None): 16 | super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg, 17 | test_cfg, init_cfg) 18 | -------------------------------------------------------------------------------- /rfvision/core/bbox/coder/pseudo_bbox_coder.py: -------------------------------------------------------------------------------- 1 | from ..builder import BBOX_CODERS 2 | from .base_bbox_coder import BaseBBoxCoder 3 | 4 | 5 | @BBOX_CODERS.register_module() 6 | class PseudoBBoxCoder(BaseBBoxCoder): 7 | """Pseudo bounding box coder.""" 8 | 9 | def __init__(self, **kwargs): 10 | super(BaseBBoxCoder, self).__init__(**kwargs) 11 | 12 | def encode(self, bboxes, gt_bboxes): 13 | """torch.Tensor: return the given ``bboxes``""" 14 | return gt_bboxes 15 | 16 | def decode(self, bboxes, pred_bboxes): 17 | """torch.Tensor: return the given ``pred_bboxes``""" 18 | return pred_bboxes 19 | -------------------------------------------------------------------------------- /rfvision/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .combined_sampler import CombinedSampler 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 5 | from .ohem_sampler import OHEMSampler 6 | from .pseudo_sampler import PseudoSampler 7 | from .random_sampler import RandomSampler 8 | from .sampling_result import SamplingResult 9 | 10 | __all__ = [ 11 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 12 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 13 | 'OHEMSampler', 'SamplingResult' 14 | ] 15 | -------------------------------------------------------------------------------- /rfvision/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (coco_classes, dataset_aliases, 2 | get_classes) 3 | from .eval_hooks import DistEvalHook, EvalHook 4 | from .mean_ap import average_precision, eval_map, print_map_summary 5 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall, 6 | print_recall_summary) 7 | 8 | __all__ = [ 9 | 'coco_classes', 'dataset_aliases', 'get_classes', 10 | 'DistEvalHook', 'EvalHook', 'average_precision', 'eval_map', 11 | 'print_map_summary', 'eval_recalls', 'print_recall_summary', 12 | 'plot_num_recall', 'plot_iou_recall', 13 | 14 | ] 15 | -------------------------------------------------------------------------------- /flows/detectors/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './htc_r50_fpn_1x_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://resnext101_32x4d', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=32, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | norm_eval=True, 14 | style='pytorch')) 15 | data = dict(samples_per_gpu=1, workers_per_gpu=1) 16 | # learning policy 17 | lr_config = dict(step=[16, 19]) 18 | runner = dict(type='EpochBasedRunner', max_epochs=20) 19 | -------------------------------------------------------------------------------- /flows/detectors/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './htc_r50_fpn_1x_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://resnext101_64x4d', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | norm_eval=True, 14 | style='pytorch')) 15 | data = dict(samples_per_gpu=1, workers_per_gpu=1) 16 | # learning policy 17 | lr_config = dict(step=[16, 19]) 18 | runner = dict(type='EpochBasedRunner', max_epochs=20) 19 | -------------------------------------------------------------------------------- /rfvision/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .dist_utils import (DistOptimizerHook, all_reduce_dict, allreduce_grads, 3 | reduce_mean) 4 | from .misc import (center_of_mass, filter_scores_and_topk, flip_tensor, 5 | generate_coordinate, mask2ndarray, multi_apply, 6 | select_single_mlvl, unmap) 7 | 8 | __all__ = [ 9 | 'allreduce_grads', 'DistOptimizerHook', 'reduce_mean', 'multi_apply', 10 | 'unmap', 'mask2ndarray', 'flip_tensor', 'all_reduce_dict', 11 | 'center_of_mass', 'generate_coordinate', 'select_single_mlvl', 12 | 'filter_scores_and_topk' 13 | ] 14 | -------------------------------------------------------------------------------- /rfvision/core/anchor/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings 3 | 4 | from rflib.utils import Registry, build_from_cfg 5 | 6 | PRIOR_GENERATORS = Registry('Generator for anchors and points') 7 | 8 | ANCHOR_GENERATORS = PRIOR_GENERATORS 9 | 10 | 11 | def build_prior_generator(cfg, default_args=None): 12 | return build_from_cfg(cfg, PRIOR_GENERATORS, default_args) 13 | 14 | 15 | def build_anchor_generator(cfg, default_args=None): 16 | warnings.warn( 17 | '``build_anchor_generator`` would be deprecated soon, please use ' 18 | '``build_prior_generator`` ') 19 | return build_prior_generator(cfg, default_args=default_args) 20 | -------------------------------------------------------------------------------- /rfvision/core/bbox/builder.py: -------------------------------------------------------------------------------- 1 | from rflib.utils import Registry, build_from_cfg 2 | 3 | BBOX_ASSIGNERS = Registry('bbox_assigner') 4 | BBOX_SAMPLERS = Registry('bbox_sampler') 5 | BBOX_CODERS = Registry('bbox_coder') 6 | 7 | 8 | def build_assigner(cfg, **default_args): 9 | """Builder of box assigner.""" 10 | return build_from_cfg(cfg, BBOX_ASSIGNERS, default_args) 11 | 12 | 13 | def build_sampler(cfg, **default_args): 14 | """Builder of box sampler.""" 15 | return build_from_cfg(cfg, BBOX_SAMPLERS, default_args) 16 | 17 | 18 | def build_bbox_coder(cfg, **default_args): 19 | """Builder of box coder.""" 20 | return build_from_cfg(cfg, BBOX_CODERS, default_args) 21 | -------------------------------------------------------------------------------- /rfvision/datasets/pose_dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .hand import InterHand3DDataset, Rhd2DDataset, Rhd3DDataset 2 | from .base import Kpt2dSviewRgbImgTopDownDataset, Kpt3dSviewRgbImgTopDownDataset 3 | from .dataset_info import DatasetInfo 4 | from .mesh import (MeshAdversarialDataset, MeshH36MDataset, MeshMixDataset, 5 | MoshDataset) 6 | from .body import Body3DH36MDataset 7 | __all__ = ['InterHand3DDataset', 'Kpt2dSviewRgbImgTopDownDataset', 8 | 'Kpt3dSviewRgbImgTopDownDataset', 'DatasetInfo', 'Rhd2DDataset', 'Rhd3DDataset', 9 | 'MeshH36MDataset', 'MoshDataset', 'MeshMixDataset', 10 | 'MeshAdversarialDataset', 'Body3DH36MDataset' 11 | ] -------------------------------------------------------------------------------- /rfvision/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner 2 | from .assign_result import AssignResult 3 | from .base_assigner import BaseAssigner 4 | from .center_region_assigner import CenterRegionAssigner 5 | from .max_iou_assigner import MaxIoUAssigner 6 | from .region_assigner import RegionAssigner 7 | from .uniform_assigner import UniformAssigner 8 | from .grid_assigner import GridAssigner 9 | from .sim_ota_assigner import SimOTAAssigner 10 | __all__ = [ 11 | 'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult', 12 | 'CenterRegionAssigner', 'RegionAssigner', 'UniformAssigner', 'GridAssigner', 13 | 'SimOTAAssigner' 14 | ] 15 | -------------------------------------------------------------------------------- /rfvision/core/bbox3d/structures/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import (get_box_type, limit_period, points_cam2img, 2 | rotation_3d_in_axis, xywhr2xyxyr) 3 | from .depth_box3d import DepthInstance3DBoxes 4 | from .base_box3d import BaseInstance3DBoxes 5 | from .box_3d_mode import Box3DMode 6 | from .cam_box3d import CameraInstance3DBoxes 7 | from .lidar_box3d import LiDARInstance3DBoxes 8 | from .coord_3d_mode import Coord3DMode 9 | 10 | __all__ = ['DepthInstance3DBoxes', 'BaseInstance3DBoxes', 'points_cam2img', 11 | 'xywhr2xyxyr', 'get_box_type', 'rotation_3d_in_axis', 'limit_period', 12 | 'Box3DMode', 'CameraInstance3DBoxes', 'LiDARInstance3DBoxes', 'Coord3DMode'] -------------------------------------------------------------------------------- /rfvision/datasets/pipelines/loading_custom.py: -------------------------------------------------------------------------------- 1 | from .loading3d import LoadPointsFromFile 2 | from rfvision.datasets.builder import PIPELINES 3 | import numpy as np 4 | import rflib 5 | @PIPELINES.register_module() 6 | class LoadPointsFromFilePointFormer(LoadPointsFromFile): 7 | def _load_points(self, pts_filename): 8 | # pts_filename = './data/alfred/alfred_instance_data/' \ 9 | # + pts_filename.split('/')[-1][:-4] + '_vert.npy' 10 | rflib.check_file_exist(pts_filename) 11 | if pts_filename.endswith('.npy'): 12 | points = np.load(pts_filename) 13 | else: 14 | points = np.fromfile(pts_filename, dtype=np.float32) 15 | return points -------------------------------------------------------------------------------- /rfvision/tools/dataset_converters/create_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | export PYTHONPATH=`pwd`:$PYTHONPATH 5 | 6 | PARTITION=$1 7 | JOB_NAME=$2 8 | CONFIG=$3 9 | WORK_DIR=$4 10 | GPUS=${GPUS:-1} 11 | GPUS_PER_NODE=${GPUS_PER_NODE:-1} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | JOB_NAME=create_data 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --kill-on-bad-exit=1 \ 21 | ${SRUN_ARGS} \ 22 | python -u tools/create_data.py kitti \ 23 | --root-path ./data/kitti \ 24 | --out-dir ./data/kitti \ 25 | --extra-tag kitti 26 | -------------------------------------------------------------------------------- /tests/test_data/test_datasets/test_xml_dataset.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from rfvision.datasets import DATASETS 4 | 5 | 6 | def test_xml_dataset(): 7 | dataconfig = { 8 | 'ann_file': 'data/VOCdevkit/VOC2007/ImageSets/Main/test.txt', 9 | 'img_prefix': 'data/VOCdevkit/VOC2007/', 10 | 'pipeline': [{ 11 | 'type': 'LoadImageFromFile' 12 | }] 13 | } 14 | XMLDataset = DATASETS.get('XMLDataset') 15 | 16 | class XMLDatasetSubClass(XMLDataset): 17 | CLASSES = None 18 | 19 | # get_ann_info and _filter_imgs of XMLDataset 20 | # would use self.CLASSES, we added CLASSES not NONE 21 | with pytest.raises(AssertionError): 22 | XMLDatasetSubClass(**dataconfig) 23 | -------------------------------------------------------------------------------- /rfvision/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .detectors import * # noqa: F401,F403 2 | from .detectors3d import * 3 | from .human_analyzers import * 4 | from .pose_estimators import * 5 | from .builder import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS, HUMAN_ANALYZERS, 6 | ROI_EXTRACTORS, SHARED_HEADS, build_backbone, 7 | build_detector, build_head, build_loss, build_neck, 8 | build_roi_extractor, build_shared_head) 9 | 10 | __all__ = [ 11 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES', 'HUMAN_ANALYZERS', 12 | 'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor', 13 | 'build_shared_head', 'build_head', 'build_loss', 'build_detector' 14 | ] 15 | -------------------------------------------------------------------------------- /tests/test_models/test_utils/test_se_layer.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | 4 | from rfvision.models.utils import SELayer 5 | 6 | 7 | def test_se_layer(): 8 | with pytest.raises(AssertionError): 9 | # act_cfg sequence length must equal to 2 10 | SELayer(channels=32, act_cfg=(dict(type='ReLU'), )) 11 | 12 | with pytest.raises(AssertionError): 13 | # act_cfg sequence must be a tuple of dict 14 | SELayer(channels=32, act_cfg=[dict(type='ReLU'), dict(type='ReLU')]) 15 | 16 | # Test SELayer forward 17 | layer = SELayer(channels=32) 18 | layer.init_weights() 19 | layer.train() 20 | 21 | x = torch.randn((1, 32, 10, 10)) 22 | x_out = layer(x) 23 | assert x_out.shape == torch.Size((1, 32, 10, 10)) 24 | -------------------------------------------------------------------------------- /rfvision/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .anchor_generator import (AnchorGenerator, LegacyAnchorGenerator, 3 | YOLOAnchorGenerator) 4 | from .builder import (ANCHOR_GENERATORS, PRIOR_GENERATORS, 5 | build_anchor_generator, build_prior_generator) 6 | from .point_generator import MlvlPointGenerator, PointGenerator 7 | from .utils import anchor_inside_flags, calc_region, images_to_levels 8 | 9 | 10 | __all__ = [ 11 | 'AnchorGenerator', 'LegacyAnchorGenerator', 'anchor_inside_flags', 12 | 'PointGenerator', 'images_to_levels', 'calc_region', 13 | 'build_anchor_generator', 'ANCHOR_GENERATORS', 'YOLOAnchorGenerator', 14 | 'build_prior_generator', 'PRIOR_GENERATORS', 'MlvlPointGenerator' 15 | ] 16 | -------------------------------------------------------------------------------- /flows/human_analyzers/body/3d_mesh_sview_rgb_img/hmr/README.md: -------------------------------------------------------------------------------- 1 | # End-to-end Recovery of Human Shape and Pose 2 | 3 | ## Introduction 4 | 5 | 6 | 7 |
8 | HMR (CVPR'2018) 9 | 10 | ```bibtex 11 | @inProceedings{kanazawaHMR18, 12 | title={End-to-end Recovery of Human Shape and Pose}, 13 | author = {Angjoo Kanazawa 14 | and Michael J. Black 15 | and David W. Jacobs 16 | and Jitendra Malik}, 17 | booktitle={Computer Vision and Pattern Recognition (CVPR)}, 18 | year={2018} 19 | } 20 | ``` 21 | 22 |
23 | 24 | HMR is an end-to-end framework for reconstructing a full 3D mesh of a human body from a single RGB image. 25 | -------------------------------------------------------------------------------- /rfvision/core/hook/checkloss_hook.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from rflib.runner.hooks import HOOKS, Hook 4 | 5 | 6 | @HOOKS.register_module() 7 | class CheckInvalidLossHook(Hook): 8 | """Check invalid loss hook. 9 | 10 | This hook will regularly check whether the loss is valid 11 | during training. 12 | 13 | Args: 14 | interval (int): Checking interval (every k iterations). 15 | Default: 50. 16 | """ 17 | 18 | def __init__(self, interval=50): 19 | self.interval = interval 20 | 21 | def after_train_iter(self, runner): 22 | if self.every_n_iters(runner, self.interval): 23 | assert torch.isfinite(runner.outputs['loss']), \ 24 | runner.logger.info('loss become infinite or NaN!') 25 | -------------------------------------------------------------------------------- /rfvision/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from ..builder import BBOX_SAMPLERS, build_sampler 2 | from .base_sampler import BaseSampler 3 | 4 | 5 | @BBOX_SAMPLERS.register_module() 6 | class CombinedSampler(BaseSampler): 7 | """A sampler that combines positive sampler and negative sampler.""" 8 | 9 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 10 | super(CombinedSampler, self).__init__(**kwargs) 11 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 12 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 13 | 14 | def _sample_pos(self, **kwargs): 15 | """Sample positive samples.""" 16 | raise NotImplementedError 17 | 18 | def _sample_neg(self, **kwargs): 19 | """Sample negative samples.""" 20 | raise NotImplementedError 21 | -------------------------------------------------------------------------------- /rfvision/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class MaskRCNN(TwoStageDetector): 7 | """Implementation of `Mask R-CNN `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | rpn_head, 12 | roi_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | init_cfg=None): 17 | super(MaskRCNN, self).__init__( 18 | backbone=backbone, 19 | neck=neck, 20 | rpn_head=rpn_head, 21 | roi_head=roi_head, 22 | train_cfg=train_cfg, 23 | test_cfg=test_cfg, 24 | init_cfg=init_cfg) 25 | -------------------------------------------------------------------------------- /tests/test_utils/test_version.py: -------------------------------------------------------------------------------- 1 | from rfvision import digit_version 2 | 3 | 4 | def test_version_check(): 5 | assert digit_version('1.0.5') > digit_version('1.0.5rc0') 6 | assert digit_version('1.0.5') > digit_version('1.0.4rc0') 7 | assert digit_version('1.0.5') > digit_version('1.0rc0') 8 | assert digit_version('1.0.0') > digit_version('0.6.2') 9 | assert digit_version('1.0.0') > digit_version('0.2.16') 10 | assert digit_version('1.0.5rc0') > digit_version('1.0.0rc0') 11 | assert digit_version('1.0.0rc1') > digit_version('1.0.0rc0') 12 | assert digit_version('1.0.0rc2') > digit_version('1.0.0rc0') 13 | assert digit_version('1.0.0rc2') > digit_version('1.0.0rc1') 14 | assert digit_version('1.0.1rc1') > digit_version('1.0.0rc1') 15 | assert digit_version('1.0.0') > digit_version('1.0.0rc1') 16 | -------------------------------------------------------------------------------- /rfvision/models/detectors/yolo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 Western Digital Corporation or its affiliates. 2 | 3 | from ..builder import DETECTORS 4 | from .single_stage import SingleStageDetector 5 | from rflib.cnn import kaiming_init, constant_init 6 | from rflib.runner import load_checkpoint 7 | import logging 8 | import torch.nn as nn 9 | from torch.nn.modules.batchnorm import _BatchNorm 10 | 11 | @DETECTORS.register_module() 12 | class YOLOV3(SingleStageDetector): 13 | 14 | def __init__(self, 15 | backbone, 16 | neck, 17 | bbox_head, 18 | train_cfg=None, 19 | test_cfg=None, 20 | init_cfg=None): 21 | super(YOLOV3, self).__init__(backbone, neck, bbox_head, train_cfg, 22 | test_cfg, init_cfg) 23 | -------------------------------------------------------------------------------- /flows/human_analyzers/body/3d_kpt_sview_rgb_img/README.md: -------------------------------------------------------------------------------- 1 | # Single-view 3D Human Body Pose Estimation 2 | 3 | 3D pose estimation is the detection and analysis of X, Y, Z coordinates of human body joints from an RGB image. 4 | For single-person 3D pose estimation from a monocular camera, existing works can be classified into three categories: 5 | (1) from 2D poses to 3D poses (2D-to-3D pose lifting) 6 | (2) jointly learning 2D and 3D poses, and 7 | (3) directly regressing 3D poses from images. 8 | 9 | ## Data preparation 10 | 11 | Please follow [DATA Preparation](/docs/tasks/3d_body_keypoint.md) to prepare data. 12 | 13 | ## Demo 14 | 15 | Please follow [Demo](/demo/docs/3d_human_pose_demo.md) to run demos. 16 | 17 |
18 | -------------------------------------------------------------------------------- /tests/test_data/test_pipelines/test_formatting.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from rflib.utils import build_from_cfg 4 | 5 | from rfvision.datasets.builder import PIPELINES 6 | 7 | 8 | def test_default_format_bundle(): 9 | results = dict( 10 | img_prefix=osp.join(osp.dirname(__file__), '../../data'), 11 | img_info=dict(filename='color.jpg')) 12 | load = dict(type='LoadImageFromFile') 13 | load = build_from_cfg(load, PIPELINES) 14 | bundle = dict(type='DefaultFormatBundle') 15 | bundle = build_from_cfg(bundle, PIPELINES) 16 | results = load(results) 17 | assert 'pad_shape' not in results 18 | assert 'scale_factor' not in results 19 | assert 'img_norm_cfg' not in results 20 | results = bundle(results) 21 | assert 'pad_shape' in results 22 | assert 'scale_factor' in results 23 | assert 'img_norm_cfg' in results 24 | -------------------------------------------------------------------------------- /rfvision/components/backbones/base_pointnet.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | from rflib.runner import BaseModule 3 | 4 | 5 | class BasePointNet(BaseModule, metaclass=ABCMeta): 6 | """Base class for PointNet.""" 7 | 8 | @staticmethod 9 | def _split_point_feats(points): 10 | """Split coordinates and features of input points. 11 | 12 | Args: 13 | points (torch.Tensor): Point coordinates with features, 14 | with shape (B, N, 3 + input_feature_dim). 15 | 16 | Returns: 17 | torch.Tensor: Coordinates of input points. 18 | torch.Tensor: Features of input points. 19 | """ 20 | xyz = points[..., 0:3].contiguous() 21 | if points.size(-1) > 3: 22 | features = points[..., 3:].transpose(1, 2).contiguous() 23 | else: 24 | features = None 25 | 26 | return xyz, features 27 | -------------------------------------------------------------------------------- /rfvision/utils/export.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def get_k_for_topk(k, size): 4 | """Get k of TopK for onnx exporting. 5 | 6 | The K of TopK in TensorRT should not be a Tensor, while in ONNX Runtime 7 | it could be a Tensor.Due to dynamic shape feature, we have to decide 8 | whether to do TopK and what K it should be while exporting to ONNX. 9 | If returned K is less than zero, it means we do not have to do 10 | TopK operation. 11 | 12 | Args: 13 | k (int or Tensor): The set k value for nms from config file. 14 | size (Tensor or torch.Size): The number of elements of \ 15 | TopK's input tensor 16 | Returns: 17 | tuple: (int or Tensor): The final K for TopK. 18 | """ 19 | ret_k = -1 20 | if k <= 0 or size <= 0: 21 | return ret_k 22 | if k < size: 23 | ret_k = k 24 | else: 25 | # ret_k is -1 26 | pass 27 | return ret_k -------------------------------------------------------------------------------- /rfvision/core/evaluation_pose/__init__.py: -------------------------------------------------------------------------------- 1 | from .top_down_eval import (keypoint_auc, keypoint_epe, keypoint_pck_accuracy, 2 | keypoints_from_heatmaps, keypoints_from_heatmaps3d, 3 | keypoints_from_regression, 4 | multilabel_classification_accuracy, 5 | pose_pck_accuracy, post_dark_udp) 6 | 7 | from .mesh_eval import compute_similarity_transform 8 | from .pose3d_eval import keypoint_mpjpe, keypoint_3d_pck, keypoint_3d_auc 9 | 10 | 11 | __all__ = ['keypoint_auc', 'keypoint_epe', 'keypoint_pck_accuracy', 12 | 'keypoints_from_heatmaps', 'keypoints_from_heatmaps3d', 13 | 'keypoints_from_regression', 14 | 'multilabel_classification_accuracy', 15 | 'pose_pck_accuracy', 'post_dark_udp', 16 | 'compute_similarity_transform', 17 | 'keypoint_mpjpe', 'keypoint_3d_pck', 'keypoint_3d_auc'] -------------------------------------------------------------------------------- /rfvision/models/detectors3d/category_ppf/train.py: -------------------------------------------------------------------------------- 1 | from rfvision.apis import train_detector 2 | import rflib 3 | from rfvision.models.builder import build_detector 4 | from rfvision.datasets.builder import build_dataset 5 | 6 | if __name__ == '__main__': 7 | for category in range(1, 7): 8 | print(f'Training category{str(category)}') 9 | cfg = './cfg.py' 10 | cfg = rflib.Config.fromfile(cfg) 11 | cfg.model.category = category 12 | cfg.work_dir = f'/home/hanyang/rfvision/work_dir/category_ppf/category{str(category)}' 13 | cfg.data.train.category = category 14 | 15 | model = build_detector(cfg.model) 16 | model.init_weights() 17 | dataset = build_dataset(cfg.data.train) 18 | train_detector(model=model, 19 | dataset=dataset, 20 | cfg=cfg, 21 | distributed=False, 22 | validate=False) -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_r50_fpn_poly_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask_rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | 7 | img_norm_cfg = dict( 8 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 9 | train_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict( 12 | type='LoadAnnotations', 13 | with_bbox=True, 14 | with_mask=True, 15 | poly2mask=False), 16 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 17 | dict(type='RandomFlip', flip_ratio=0.5), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='Pad', size_divisor=32), 20 | dict(type='DefaultFormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 22 | ] 23 | data = dict(train=dict(pipeline=train_pipeline)) 24 | -------------------------------------------------------------------------------- /rfvision/models/detectors/solo.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage_instance_seg import SingleStageInstanceSegmentor 3 | 4 | 5 | @DETECTORS.register_module() 6 | class SOLO(SingleStageInstanceSegmentor): 7 | """`SOLO: Segmenting Objects by Locations 8 | `_ 9 | 10 | """ 11 | 12 | def __init__(self, 13 | backbone, 14 | neck=None, 15 | bbox_head=None, 16 | mask_head=None, 17 | train_cfg=None, 18 | test_cfg=None, 19 | init_cfg=None, 20 | pretrained=None): 21 | super().__init__( 22 | backbone=backbone, 23 | neck=neck, 24 | bbox_head=bbox_head, 25 | mask_head=mask_head, 26 | train_cfg=train_cfg, 27 | test_cfg=test_cfg, 28 | init_cfg=init_cfg, 29 | pretrained=pretrained) 30 | -------------------------------------------------------------------------------- /flows/human_analyzers/body/3d_mesh_sview_rgb_img/hmr/mixed/resnet_mixed.yml: -------------------------------------------------------------------------------- 1 | Collections: 2 | - Metadata: 3 | Architecture: 4 | - HMR 5 | - ResNet 6 | Name: Body 3D Mesh hmr mixed 7 | Paper: 8 | Title: End-to-end Recovery of Human Shape and Pose 9 | URL: http://openaccess.thecvf.com/content_cvpr_2018/html/Kanazawa_End-to-End_Recovery_of_CVPR_2018_paper.html 10 | README: configs/body/3d_mesh_sview_rgb_img/hmr/mixed/resnet_mixed.md 11 | Models: 12 | - Config: configs/body/3d_mesh_sview_rgb_img/hmr/mixed/res50_mixed_224x224.py 13 | In Collection: Body 3D Mesh hmr mixed 14 | Metadata: 15 | Training Data: Human3.6M 16 | Name: hmr_res50_mixed_224x224 17 | Results: 18 | - Dataset: Human3.6M 19 | Metrics: 20 | MPJPE (P1): 80.75 21 | MPJPE (P2): 80.35 22 | MPJPE-PA (P1): 55.08 23 | MPJPE-PA (P2): 52.6 24 | Task: Body 3D Mesh 25 | Weights: https://download.openmmlab.com/mmpose/mesh/hmr/hmr_mesh_224x224-c21e8229_20201015.pth 26 | -------------------------------------------------------------------------------- /flows/human_analyzers/hand/rhd/resnet_rhd2d.yml: -------------------------------------------------------------------------------- 1 | Collections: 2 | - Metadata: 3 | Architecture: 4 | - SimpleBaseline2D 5 | - ResNet 6 | Name: Hand 2D Keypoint topdown_heatmap rhd2d 7 | Paper: 8 | Title: Simple baselines for human pose estimation and tracking 9 | URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html 10 | README: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/resnet_rhd2d.md 11 | Models: 12 | - Config: configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/rhd2d/res50_rhd2d_256x256.py 13 | In Collection: Hand 2D Keypoint topdown_heatmap rhd2d 14 | Metadata: 15 | Training Data: RHD 16 | Name: topdown_heatmap_res50_rhd2d_256x256 17 | Results: 18 | - Dataset: RHD 19 | Metrics: 20 | AUC: 0.898 21 | EPE: 2.33 22 | PCK@0.2: 0.991 23 | Task: Hand 2D Keypoint 24 | Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_rhd2d_256x256-5dc7e4cc_20210330.pth 25 | -------------------------------------------------------------------------------- /flows/human_analyzers/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.yml: -------------------------------------------------------------------------------- 1 | Collections: 2 | - Metadata: 3 | Architecture: 4 | - SimpleBaseline3D 5 | Name: Body 3D Keypoint pose_lift h36m 6 | Paper: 7 | Title: A simple yet effective baseline for 3d human pose estimation 8 | URL: http://openaccess.thecvf.com/content_iccv_2017/html/Martinez_A_Simple_yet_ICCV_2017_paper.html 9 | README: configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.md 10 | Models: 11 | - Config: configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.py 12 | In Collection: Body 3D Keypoint pose_lift h36m 13 | Metadata: 14 | Training Data: Human3.6M 15 | Name: pose_lift_simplebaseline3d_h36m 16 | Results: 17 | - Dataset: Human3.6M 18 | Metrics: 19 | MPJPE: 43.4 20 | P-MPJPE: 34.3 21 | Task: Body 3D Keypoint 22 | Weights: https://download.openmmlab.com/mmpose/body3d/simple_baseline/simple3Dbaseline_h36m-f0ad73a4_20210419.pth 23 | -------------------------------------------------------------------------------- /flows/detectors3d/votenet/votenet_16x8_sunrgbd-3d-10class.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/sunrgbd-3d-10class.py', '../_base_/models/votenet.py', 3 | '../_base_/schedules/schedule_3x.py', '../_base_/default_runtime.py' 4 | ] 5 | # model settings 6 | model = dict( 7 | bbox_head=dict( 8 | num_classes=10, 9 | bbox_coder=dict( 10 | type='PartialBinBasedBBoxCoder', 11 | num_sizes=10, 12 | num_dir_bins=12, 13 | with_rot=True, 14 | mean_sizes=[ 15 | [2.114256, 1.620300, 0.927272], [0.791118, 1.279516, 0.718182], 16 | [0.923508, 1.867419, 0.845495], [0.591958, 0.552978, 0.827272], 17 | [0.699104, 0.454178, 0.75625], [0.69519, 1.346299, 0.736364], 18 | [0.528526, 1.002642, 1.172878], [0.500618, 0.632163, 0.683424], 19 | [0.404671, 1.071108, 1.688889], [0.76584, 1.398258, 0.472728] 20 | ]), 21 | )) 22 | 23 | checkpoint_config = dict(interval=10) -------------------------------------------------------------------------------- /rfvision/core/points/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_points import BasePoints 2 | from .cam_points import CameraPoints 3 | from .depth_points import DepthPoints 4 | from .lidar_points import LiDARPoints 5 | 6 | __all__ = ['BasePoints', 'CameraPoints', 'DepthPoints', 'LiDARPoints'] 7 | 8 | 9 | def get_points_type(points_type): 10 | """Get the class of points according to coordinate type. 11 | 12 | Args: 13 | points_type (str): The type of points coordinate. 14 | The valid value are "CAMERA", "LIDAR", or "DEPTH". 15 | 16 | Returns: 17 | class: Points type. 18 | """ 19 | if points_type == 'CAMERA': 20 | points_cls = CameraPoints 21 | elif points_type == 'LIDAR': 22 | points_cls = LiDARPoints 23 | elif points_type == 'DEPTH': 24 | points_cls = DepthPoints 25 | else: 26 | raise ValueError('Only "points_type" of "CAMERA", "LIDAR", or "DEPTH"' 27 | f' are supported, got {points_type}') 28 | 29 | return points_cls 30 | -------------------------------------------------------------------------------- /rfvision/components/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .darknet import Darknet 2 | from .hourglass import HourglassNet 3 | from .resnet import ResNet, ResNetV1d 4 | from .resnext import ResNeXt 5 | from rfvision.models.pose_estimators.articulation.models.articulation_backbone import PointNet2ForArticulation 6 | from .pointnet2_sa_ssg import PointNet2SASSG 7 | from .pointnet2_sa_msg import PointNet2SAMSG 8 | from .skeleton_merger_backbone import PointNet2ForSkeletonMerger 9 | from .handtailor_backbone import HandTailor3DBackbone, HandTailor2DBackbone, ManoNetBackbone 10 | from .base_pointnet import BasePointNet 11 | from .tcn import TCN 12 | from .ptr_base import Pointformer 13 | from .csp_darknet import CSPDarknet 14 | 15 | __all__ = [ 16 | 'ResNet', 'ResNetV1d', 'ResNeXt', 17 | 'HourglassNet','Darknet', 18 | 'PointNet2ForArticulation', 'PointNet2SASSG', 19 | 'PointNet2SAMSG', 'PointNet2ForSkeletonMerger', 20 | 'BasePointNet', 'HandTailor3DBackbone', 'HandTailor2DBackbone', 'TCN', 21 | 'Pointformer', 'CSPDarknet' 22 | ] 23 | -------------------------------------------------------------------------------- /flows/human_analyzers/body/3d_kpt_sview_rgb_img/pose_lift/README.md: -------------------------------------------------------------------------------- 1 | # A simple yet effective baseline for 3d human pose estimation 2 | 3 | 4 | 5 |
6 | SimpleBaseline3D (ICCV'2017) 7 | 8 | ```bibtex 9 | @inproceedings{martinez_2017_3dbaseline, 10 | title={A simple yet effective baseline for 3d human pose estimation}, 11 | author={Martinez, Julieta and Hossain, Rayat and Romero, Javier and Little, James J.}, 12 | booktitle={ICCV}, 13 | year={2017} 14 | } 15 | ``` 16 | 17 |
18 | 19 | Simple 3D baseline proposes to break down the task of 3d human pose estimation into 2 stages: (1) Image → 2D pose 20 | (2) 2D pose → 3D pose. 21 | 22 | The authors find that “lifting” ground truth 2D joint locations to 3D space is a task that can be solved with a low error rate. 23 | Based on the success of 2d human pose estimation, it directly "lifts" 2d joint locations to 3d space. 24 | -------------------------------------------------------------------------------- /rfvision/components/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_free_head import AnchorFreeHead 2 | from .anchor_head import AnchorHead 3 | from .cascade_rpn_head import CascadeRPNHead, StageCascadeRPNHead 4 | from .fcos_head import FCOSHead 5 | from .rpn_head import RPNHead 6 | from .yolo_head import YOLOV3Head 7 | from .usd_head import USDSegYOLOV3Head, USDSegFCOSHead 8 | from .base_conv_bbox_head import BaseConvBboxHead 9 | from .vote_head import VoteHead 10 | from .skeleton_merger_head import SkeletonMergerHead 11 | from .ancsh_head import ANCSHHead 12 | from .solo_head import SOLOHead 13 | from .solov2_head import SOLOv2Head 14 | from .mask_feat_head import MaskFeatHead 15 | from .yolox_head import YOLOXHead 16 | __all__ = [ 17 | 'AnchorFreeHead', 'AnchorHead', 18 | 'RPNHead', 'FCOSHead', 'YOLOV3Head', 'StageCascadeRPNHead', 19 | 'CascadeRPNHead','CascadeRPNHead', 'SOLOHead', 20 | 'USDSegYOLOV3Head', 'USDSegFCOSHead', 21 | 'VoteHead', 'BaseConvBboxHead', 'SkeletonMergerHead', 22 | 'ANCSHHead', 23 | 'SOLOv2Head', 'MaskFeatHead', 24 | 'YOLOXHead', 25 | ] 26 | -------------------------------------------------------------------------------- /docs/get_started.md: -------------------------------------------------------------------------------- 1 | ## Requirements 2 | > lower version may also work, we just have not test it yet. 3 | > Some ops are required by torchvision.ops, please make sure the pytorch version is compatible. 4 | + Python >= 3.6 5 | + Ubuntu >= 18.04 6 | + CUDA >= 10.2 7 | + Pytorch >= 1.6 8 | 9 | ## Installation 10 | 1. `install CUDA`. If you install CUDA 10.2 and meet the gcc version error, add `--override` at the end of `sudo sh xxx.run`. 11 | 2. `install pytorch`. Install pytorch with correct cuda version. Please follow the [official guidance](https://pytorch.org/get-started/previous-versions/). Since the RFLib is compatible to different CUDA versions, it largely relieves the burden of inner compatibility. 12 | 3. `install requirements`. 13 | + if you want 2D/tactile-related perception algorithm only 14 | ``` 15 | pip install -r requirements/2d.txt 16 | ``` 17 | + if you want 2.5D/3D-related perception only 18 | ``` 19 | pip install -r requirements/3d.txt 20 | ``` 21 | + if you want a full install 22 | ``` 23 | pip install -r requirements/full.txt 24 | ``` 25 | -------------------------------------------------------------------------------- /rfvision/models/detectors3d/category_ppf/cfg.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=100) 2 | log_config = dict( 3 | interval=20, 4 | hooks=[ 5 | dict(type='TextLoggerHook'), 6 | dict(type='TensorboardLoggerHook') 7 | ]) 8 | 9 | dist_params = dict(backend='nccl') 10 | log_level = 'INFO' 11 | work_dir = None 12 | load_from = None 13 | resume_from = None 14 | workflow = [('train', 1)] 15 | 16 | lr = 1e-3 17 | optimizer = dict(type='Adam', lr=lr, weight_decay=0) 18 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 19 | lr_config = dict(policy='step', warmup=None, step=[24, 32]) 20 | # runtime settings 21 | total_epochs = 200 22 | 23 | model = dict(type='CategoryPPF', 24 | category=2) 25 | train_dataset = 'ShapeNetDatasetForPPF' 26 | 27 | data = dict( 28 | samples_per_gpu=1, 29 | workers_per_gpu=0, 30 | train=dict(type=train_dataset, 31 | category=2, 32 | data_root='/hdd0/data/shapenet_v2/ShapeNetCore.v2', 33 | ann_file='/hdd0/data/ppf_dataset/shapenet_train.txt'), 34 | ) 35 | gpu_ids = [0] 36 | seed = 0 37 | -------------------------------------------------------------------------------- /rfvision/datasets/pipelines/loading_pose.py: -------------------------------------------------------------------------------- 1 | import rflib 2 | from rfvision.datasets import PIPELINES 3 | 4 | 5 | @PIPELINES.register_module() 6 | class LoadImageFromFileSimple: 7 | """Loading image from file. 8 | 9 | Args: 10 | color_type (str): Flags specifying the color type of a loaded image, 11 | candidates are 'color', 'grayscale' and 'unchanged'. 12 | channel_order (str): Order of channel, candidates are 'bgr' and 'rgb'. 13 | """ 14 | 15 | def __init__(self, 16 | to_float32=False, 17 | color_type='color', 18 | channel_order='rgb'): 19 | self.to_float32 = to_float32 20 | self.color_type = color_type 21 | self.channel_order = channel_order 22 | 23 | def __call__(self, results): 24 | """Loading image from file.""" 25 | image_file = results['image_file'] 26 | img = rflib.imread(image_file, self.color_type, self.channel_order) 27 | 28 | if img is None: 29 | raise ValueError('Fail to read {}'.format(image_file)) 30 | 31 | results['img'] = img 32 | return results -------------------------------------------------------------------------------- /rfvision/utils/util_random.py: -------------------------------------------------------------------------------- 1 | """Helpers for random number generators.""" 2 | import numpy as np 3 | 4 | 5 | def ensure_rng(rng=None): 6 | """Coerces input into a random number generator. 7 | 8 | If the input is None, then a global random state is returned. 9 | 10 | If the input is a numeric value, then that is used as a seed to construct a 11 | random state. Otherwise the input is returned as-is. 12 | 13 | Adapted from [1]_. 14 | 15 | Args: 16 | rng (int | numpy.random.RandomState | None): 17 | if None, then defaults to the global rng. Otherwise this can be an 18 | integer or a RandomState class 19 | Returns: 20 | (numpy.random.RandomState) : rng - 21 | a numpy random number generator 22 | 23 | References: 24 | .. [1] https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270 # noqa: E501 25 | """ 26 | 27 | if rng is None: 28 | rng = np.random.mtrand._rand 29 | elif isinstance(rng, int): 30 | rng = np.random.RandomState(rng) 31 | else: 32 | rng = rng 33 | return rng 34 | -------------------------------------------------------------------------------- /flows/human_analyzers/hand/others/iknet.py: -------------------------------------------------------------------------------- 1 | _base_ = ['../../_base_/default_runtime.py',] 2 | 3 | ############################# 4 | # Due to the full connection network ok IKNet, for less loss, lower 'lr' and 'samples_per_gpu' is needed 5 | ############################# 6 | optimizer = dict(type='SGD', lr=0.00001, momentum=0.9, weight_decay=0.0001) 7 | optimizer_config = dict(grad_clip=None) 8 | lr_config = dict( 9 | policy='step', 10 | warmup=None, 11 | step=[35, 45]) 12 | 13 | total_epochs = 50 14 | checkpoint_config = dict(interval=10) 15 | runner = dict(type='EpochBasedRunner', max_epochs=total_epochs) 16 | # model settings 17 | model = dict(type='IKNet') 18 | work_dir = '/home/hanyang/work_dir/iknet' 19 | data_root = '/home/hanyang/ikdata/' 20 | data = dict( 21 | samples_per_gpu=8, 22 | workers_per_gpu=0, 23 | train=dict( 24 | type='IKDataset', 25 | data_root=data_root, 26 | split='all'), 27 | val=dict( 28 | type='IKDataset', 29 | data_root=data_root, 30 | split='test'), 31 | test=dict( 32 | type='IKDataset', 33 | data_root=data_root, 34 | split='test')) -------------------------------------------------------------------------------- /rfvision/components/utils/knn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def euclidean_dist(x, y): 4 | """ 5 | Args: 6 | x: pytorch Variable, with shape [m, d] 7 | y: pytorch Variable, with shape [n, d] 8 | Returns: 9 | dist: pytorch Variable, with shape [m, n] 10 | """ 11 | assert len(x.shape) == len(y.shape) == 2 12 | m, n = x.size(0), y.size(0) 13 | xx = (x**2).sum(1, keepdim=True).expand(m, n) 14 | yy = (y**2).sum(1, keepdim=True).expand(n, m).T 15 | dist_mat = xx + yy - 2 * x.matmul(y.T) 16 | return dist_mat.T 17 | 18 | def knn_search(x, y, k=1): 19 | assert k > 0, 'k cannot less than 0' 20 | dist_mat = euclidean_dist(x, y) 21 | index = dist_mat.argsort(dim=-1)[:, :k] 22 | return index 23 | 24 | if __name__ == '__main__': 25 | k = 1 26 | x = torch.rand((5, 3)) 27 | y = torch.rand((2, 3)) 28 | 29 | # knn_search is cuda-supported 30 | index = knn_search(x, y, k) 31 | 32 | # KNNSearch is not cuda-supported 33 | # from open3d.ml.torch.layers import KNNSearch 34 | # nsearch = KNNSearch() 35 | # index_o3d = nsearch(x, y, k).neighbors_index.reshape(y.shape[0], k) 36 | -------------------------------------------------------------------------------- /tests/test_models/test_backbones/utils.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules import GroupNorm 2 | from torch.nn.modules.batchnorm import _BatchNorm 3 | 4 | from rfvision.models.backbones.res2net import Bottle2neck 5 | from rfvision.models.backbones.resnet import BasicBlock, Bottleneck 6 | from rfvision.models.backbones.resnext import Bottleneck as BottleneckX 7 | from rfvision.models.utils import SimplifiedBasicBlock 8 | 9 | 10 | def is_block(modules): 11 | """Check if is ResNet building block.""" 12 | if isinstance(modules, (BasicBlock, Bottleneck, BottleneckX, Bottle2neck, 13 | SimplifiedBasicBlock)): 14 | return True 15 | return False 16 | 17 | 18 | def is_norm(modules): 19 | """Check if is one of the norms.""" 20 | if isinstance(modules, (GroupNorm, _BatchNorm)): 21 | return True 22 | return False 23 | 24 | 25 | def check_norm_state(modules, train_state): 26 | """Check if norm layer is in correct train state.""" 27 | for mod in modules: 28 | if isinstance(mod, _BatchNorm): 29 | if mod.training != train_state: 30 | return False 31 | return True 32 | -------------------------------------------------------------------------------- /tests/test_onnx/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import warnings 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | 9 | 10 | class WrapFunction(nn.Module): 11 | """Wrap the function to be tested for torch.onnx.export tracking.""" 12 | 13 | def __init__(self, wrapped_function): 14 | super(WrapFunction, self).__init__() 15 | self.wrapped_function = wrapped_function 16 | 17 | def forward(self, *args, **kwargs): 18 | return self.wrapped_function(*args, **kwargs) 19 | 20 | 21 | def convert_result_list(outputs): 22 | """Convert the torch forward outputs containing tuple or list to a list 23 | only containing torch.Tensor. 24 | 25 | Args: 26 | output (list(Tensor) | tuple(list(Tensor) | ...): the outputs 27 | in torch env, maybe containing nested structures such as list 28 | or tuple. 29 | 30 | Returns: 31 | list(Tensor): a list only containing torch.Tensor 32 | """ 33 | # recursive end condition 34 | if isinstance(outputs, torch.Tensor): 35 | return [outputs] 36 | 37 | ret = [] 38 | for sub in outputs: 39 | ret += convert_result_list(sub) 40 | return ret 41 | -------------------------------------------------------------------------------- /rfvision/core/bbox3d/__init__.py: -------------------------------------------------------------------------------- 1 | from .structures import (DepthInstance3DBoxes, BaseInstance3DBoxes, points_cam2img, 2 | xywhr2xyxyr, get_box_type, rotation_3d_in_axis, limit_period, 3 | Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes, Coord3DMode) 4 | from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back 5 | 6 | from .iou_calculators import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D, 7 | BboxOverlapsNearest3D, 8 | axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d, 9 | bbox_overlaps_nearest_3d) 10 | from .coders import DeltaXYZWLHRBBoxCoder 11 | 12 | __all__ = ['DepthInstance3DBoxes', 'BaseInstance3DBoxes', 13 | 'xywhr2xyxyr', 'get_box_type', 'rotation_3d_in_axis', 'limit_period', 14 | 'bbox3d2roi', 'bbox3d2result', 'bbox3d_mapping_back', 15 | 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d', 16 | 'bbox_overlaps_3d', 'AxisAlignedBboxOverlaps3D', 17 | 'axis_aligned_bbox_overlaps_3d', 'Box3DMode', 'CameraInstance3DBoxes', 18 | 'LiDARInstance3DBoxes', 'Coord3DMode', 'DeltaXYZWLHRBBoxCoder' 19 | ] -------------------------------------------------------------------------------- /rfvision/components/utils/ops.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | 6 | 7 | def resize(input, 8 | size=None, 9 | scale_factor=None, 10 | mode='nearest', 11 | align_corners=None, 12 | warning=True): 13 | if warning: 14 | if size is not None and align_corners: 15 | input_h, input_w = tuple(int(x) for x in input.shape[2:]) 16 | output_h, output_w = tuple(int(x) for x in size) 17 | if output_h > input_h or output_w > output_h: 18 | if ((output_h > 1 and output_w > 1 and input_h > 1 19 | and input_w > 1) and (output_h - 1) % (input_h - 1) 20 | and (output_w - 1) % (input_w - 1)): 21 | warnings.warn( 22 | f'When align_corners={align_corners}, ' 23 | 'the output would more aligned if ' 24 | f'input size {(input_h, input_w)} is `x+1` and ' 25 | f'out size {(output_h, output_w)} is `nx+1`') 26 | if isinstance(size, torch.Size): 27 | size = tuple(int(x) for x in size) 28 | return F.interpolate(input, size, scale_factor, mode, align_corners) 29 | -------------------------------------------------------------------------------- /rfvision/components/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_roi_head import BaseRoIHead 2 | from .bbox_heads import (BBoxHead, ConvFCBBoxHead, 3 | DoubleConvFCBBoxHead, 4 | Shared2FCBBoxHead, Shared4Conv1FCBBoxHead) 5 | from .cascade_roi_head import CascadeRoIHead 6 | from .double_roi_head import DoubleHeadRoIHead 7 | from .dynamic_roi_head import DynamicRoIHead 8 | from .htc_roi_head import HybridTaskCascadeRoIHead 9 | from .mask_heads import (FCNMaskHead, 10 | FusedSemanticHead, 11 | HTCMaskHead) 12 | from .roi_extractors import (BaseRoIExtractor, GenericRoIExtractor, 13 | SingleRoIExtractor) 14 | from .shared_heads import ResLayer 15 | from .standard_roi_head import StandardRoIHead 16 | from .dct_roi_head import DCTRoIHead 17 | __all__ = [ 18 | 'BaseRoIHead', 'CascadeRoIHead', 'DoubleHeadRoIHead', 19 | 'HybridTaskCascadeRoIHead', 'ResLayer', 'BBoxHead', 20 | 'ConvFCBBoxHead', 'Shared2FCBBoxHead', 21 | 'StandardRoIHead', 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 22 | 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 23 | 'BaseRoIExtractor', 'GenericRoIExtractor', 24 | 'SingleRoIExtractor', 'DynamicRoIHead', 'DCTRoIHead' 25 | ] 26 | -------------------------------------------------------------------------------- /rfvision/components/losses_pose/regression_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from rfvision.models.builder import LOSSES 4 | 5 | @LOSSES.register_module() 6 | class L1LossPose(nn.Module): 7 | """L1Loss loss .""" 8 | 9 | def __init__(self, use_target_weight=False, loss_weight=1.): 10 | super().__init__() 11 | self.criterion = F.l1_loss 12 | self.use_target_weight = use_target_weight 13 | self.loss_weight = loss_weight 14 | 15 | def forward(self, output, target, target_weight=None): 16 | """Forward function. 17 | 18 | Note: 19 | batch_size: N 20 | num_keypoints: K 21 | 22 | Args: 23 | output (torch.Tensor[N, K, 2]): Output regression. 24 | target (torch.Tensor[N, K, 2]): Target regression. 25 | target_weight (torch.Tensor[N, K, 2]): 26 | Weights across different joint types. 27 | """ 28 | if self.use_target_weight: 29 | assert target_weight is not None 30 | loss = self.criterion(output * target_weight, 31 | target * target_weight) 32 | else: 33 | loss = self.criterion(output, target) 34 | 35 | return loss * self.loss_weight -------------------------------------------------------------------------------- /rfvision/core/bbox/demodata.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from rfvision.utils.util_random import ensure_rng 5 | 6 | 7 | def random_boxes(num=1, scale=1, rng=None): 8 | """Simple version of ``kwimage.Boxes.random`` 9 | 10 | Returns: 11 | Tensor: shape (n, 4) in x1, y1, x2, y2 format. 12 | 13 | References: 14 | https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390 15 | 16 | Example: 17 | >>> num = 3 18 | >>> scale = 512 19 | >>> rng = 0 20 | >>> boxes = random_boxes(num, scale, rng) 21 | >>> print(boxes) 22 | tensor([[280.9925, 278.9802, 308.6148, 366.1769], 23 | [216.9113, 330.6978, 224.0446, 456.5878], 24 | [405.3632, 196.3221, 493.3953, 270.7942]]) 25 | """ 26 | rng = ensure_rng(rng) 27 | 28 | tlbr = rng.rand(num, 4).astype(np.float32) 29 | 30 | tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2]) 31 | tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3]) 32 | br_x = np.maximum(tlbr[:, 0], tlbr[:, 2]) 33 | br_y = np.maximum(tlbr[:, 1], tlbr[:, 3]) 34 | 35 | tlbr[:, 0] = tl_x * scale 36 | tlbr[:, 1] = tl_y * scale 37 | tlbr[:, 2] = br_x * scale 38 | tlbr[:, 3] = br_y * scale 39 | 40 | boxes = torch.from_numpy(tlbr) 41 | return boxes 42 | -------------------------------------------------------------------------------- /rfvision/data/scannet/README.md: -------------------------------------------------------------------------------- 1 | ### Prepare ScanNet Data 2 | We follow the procedure in [votenet](https://github.com/facebookresearch/votenet/). 3 | 4 | 1. Download ScanNet v2 data [HERE](https://github.com/ScanNet/ScanNet). Link or move the 'scans' folder to this level of directory. 5 | 6 | 2. In this directory, extract point clouds and annotations by running `python batch_load_scannet_data.py`. 7 | 8 | 3. Enter the project root directory, generate training data by running 9 | ```bash 10 | python tools/create_data.py scannet --root-path ./data/scannet --out-dir ./data/scannet --extra-tag scannet 11 | ``` 12 | 13 | The overall process could be achieved through the following script 14 | ```bash 15 | python batch_load_scannet_data.py 16 | cd ../.. 17 | python tools/create_data.py scannet --root-path ./data/scannet --out-dir ./data/scannet --extra-tag scannet 18 | ``` 19 | 20 | The directory structure after pre-processing should be as below 21 | ``` 22 | scannet 23 | ├── scannet_utils.py 24 | ├── batch_load_scannet_data.py 25 | ├── load_scannet_data.py 26 | ├── scannet_utils.py 27 | ├── README.md 28 | ├── scans 29 | ├── scannet_train_instance_data 30 | ├── points 31 | │ ├── xxxxx.bin 32 | ├── instance_mask 33 | │ ├── xxxxx.bin 34 | ├── semantic_mask 35 | │ ├── xxxxx.bin 36 | ├── scannet_infos_train.pkl 37 | ├── scannet_infos_val.pkl 38 | 39 | ``` 40 | -------------------------------------------------------------------------------- /rfvision/components/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_linear_layer 2 | from .normed_predictor import NormedConv2d, NormedLinear 3 | from .res_layer import ResLayer 4 | from .vote_module import VoteModule 5 | from .knn import knn_search 6 | from .mlp import MLP 7 | from .top_down_utils import (batch_argmax, batch_uv2xyz, heatmap_to_uv, generate_heatmap_2d, 8 | get_K, xyz2uv, uv2xyz, affine_transform, 9 | normalize_point_cloud, normalize_quaternion, quaternion_to_angle_axis, 10 | quaternion_mul, quaternion_inv 11 | ) 12 | 13 | from .dct_utils import (dct1, idct1, dct, idct, dct_2d, idct_2d, dct_3d, 14 | idct_3d) 15 | from .ops import resize 16 | from .csp_layer import CSPLayer 17 | 18 | 19 | __all__ = [ 20 | 'ResLayer', 'MLP', 21 | 'build_linear_layer', 22 | 'NormedLinear', 'NormedConv2d', 23 | 'VoteModule', 'knn_search', 24 | 'batch_argmax', 'heatmap_to_uv', 'generate_heatmap_2d', 25 | 'get_K', 'xyz2uv', 'uv2xyz', 'affine_transform', 'batch_uv2xyz', 26 | 'normalize_point_cloud', 'normalize_quaternion', 'quaternion_to_angle_axis', 27 | 'quaternion_mul', 'quaternion_inv', 28 | 'dct1', 'idct1', 'dct', 'idct', 'dct_2d', 'idct_2d', 'dct_3d', 'idct_3d', 29 | 'resize', 'CSPLayer' 30 | ] 31 | -------------------------------------------------------------------------------- /rfvision/datasets/api_wrappers/coco_api.py: -------------------------------------------------------------------------------- 1 | # This file add snake case alias for coco api 2 | 3 | from pycocotools.coco import COCO as _COCO 4 | from pycocotools.cocoeval import COCOeval as _COCOeval 5 | 6 | 7 | class COCO(_COCO): 8 | """This class is almost the same as official pycocotools package. 9 | 10 | It implements some snake case function aliases. So that the COCO class has 11 | the same interface as LVIS class. 12 | """ 13 | 14 | def __init__(self, annotation_file=None): 15 | super().__init__(annotation_file=annotation_file) 16 | self.img_ann_map = self.imgToAnns 17 | self.cat_img_map = self.catToImgs 18 | 19 | def get_ann_ids(self, img_ids=[], cat_ids=[], area_rng=[], iscrowd=None): 20 | return self.getAnnIds(img_ids, cat_ids, area_rng, iscrowd) 21 | 22 | def get_cat_ids(self, cat_names=[], sup_names=[], cat_ids=[]): 23 | return self.getCatIds(cat_names, sup_names, cat_ids) 24 | 25 | def get_img_ids(self, img_ids=[], cat_ids=[]): 26 | return self.getImgIds(img_ids, cat_ids) 27 | 28 | def load_anns(self, ids): 29 | return self.loadAnns(ids) 30 | 31 | def load_cats(self, ids): 32 | return self.loadCats(ids) 33 | 34 | def load_imgs(self, ids): 35 | return self.loadImgs(ids) 36 | 37 | 38 | # just for the ease of import 39 | COCOeval = _COCOeval 40 | -------------------------------------------------------------------------------- /rfvision/tools/model_converters/publish_model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import subprocess 3 | 4 | import torch 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser( 9 | description='Process a checkpoint to be published') 10 | parser.add_argument('in_file', help='input checkpoint filename') 11 | parser.add_argument('out_file', help='output checkpoint filename') 12 | args = parser.parse_args() 13 | return args 14 | 15 | 16 | def process_checkpoint(in_file, out_file): 17 | checkpoint = torch.load(in_file, map_location='cpu') 18 | # remove optimizer for smaller file size 19 | if 'optimizer' in checkpoint: 20 | del checkpoint['optimizer'] 21 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 22 | # add the code here. 23 | torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False) 24 | 25 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 26 | if out_file.endswith('.pth'): 27 | out_file_name = out_file[:-4] 28 | else: 29 | out_file_name = out_file 30 | final_file = out_file_name + f'-{sha[:8]}.pth' 31 | subprocess.Popen(['mv', out_file, final_file]) 32 | 33 | 34 | def main(): 35 | args = parse_args() 36 | process_checkpoint(args.in_file, args.out_file) 37 | 38 | 39 | if __name__ == '__main__': 40 | main() 41 | -------------------------------------------------------------------------------- /rfvision/components/roi_heads/double_roi_head.py: -------------------------------------------------------------------------------- 1 | from rfvision.models.builder import HEADS 2 | from .standard_roi_head import StandardRoIHead 3 | 4 | 5 | @HEADS.register_module() 6 | class DoubleHeadRoIHead(StandardRoIHead): 7 | """RoI head for Double Head RCNN. 8 | 9 | https://arxiv.org/abs/1904.06493 10 | """ 11 | 12 | def __init__(self, reg_roi_scale_factor, **kwargs): 13 | super(DoubleHeadRoIHead, self).__init__(**kwargs) 14 | self.reg_roi_scale_factor = reg_roi_scale_factor 15 | 16 | def _bbox_forward(self, x, rois): 17 | """Box head forward function used in both training and testing time.""" 18 | bbox_cls_feats = self.bbox_roi_extractor( 19 | x[:self.bbox_roi_extractor.num_inputs], rois) 20 | bbox_reg_feats = self.bbox_roi_extractor( 21 | x[:self.bbox_roi_extractor.num_inputs], 22 | rois, 23 | roi_scale_factor=self.reg_roi_scale_factor) 24 | if self.with_shared_head: 25 | bbox_cls_feats = self.shared_head(bbox_cls_feats) 26 | bbox_reg_feats = self.shared_head(bbox_reg_feats) 27 | cls_score, bbox_pred = self.bbox_head(bbox_cls_feats, bbox_reg_feats) 28 | 29 | bbox_results = dict( 30 | cls_score=cls_score, 31 | bbox_pred=bbox_pred, 32 | bbox_feats=bbox_cls_feats) 33 | return bbox_results 34 | -------------------------------------------------------------------------------- /tests/test_models/test_roi_heads/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from rfvision.core import build_assigner, build_sampler 4 | 5 | 6 | def _dummy_bbox_sampling(proposal_list, gt_bboxes, gt_labels): 7 | """Create sample results that can be passed to BBoxHead.get_targets.""" 8 | num_imgs = 1 9 | feat = torch.rand(1, 1, 3, 3) 10 | assign_config = dict( 11 | type='MaxIoUAssigner', 12 | pos_iou_thr=0.5, 13 | neg_iou_thr=0.5, 14 | min_pos_iou=0.5, 15 | ignore_iof_thr=-1) 16 | sampler_config = dict( 17 | type='RandomSampler', 18 | num=512, 19 | pos_fraction=0.25, 20 | neg_pos_ub=-1, 21 | add_gt_as_proposals=True) 22 | bbox_assigner = build_assigner(assign_config) 23 | bbox_sampler = build_sampler(sampler_config) 24 | gt_bboxes_ignore = [None for _ in range(num_imgs)] 25 | sampling_results = [] 26 | for i in range(num_imgs): 27 | assign_result = bbox_assigner.assign(proposal_list[i], gt_bboxes[i], 28 | gt_bboxes_ignore[i], gt_labels[i]) 29 | sampling_result = bbox_sampler.sample( 30 | assign_result, 31 | proposal_list[i], 32 | gt_bboxes[i], 33 | gt_labels[i], 34 | feats=feat) 35 | sampling_results.append(sampling_result) 36 | 37 | return sampling_results 38 | -------------------------------------------------------------------------------- /rfvision/core/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Message: 3 | INFO - 2021-11-01 18:01:05,396 - acceleratesupport - OpenGL_accelerate module loaded 4 | INFO - 2021-11-01 18:01:05,401 - arraydatatype - Using accelerated ArrayDatatype 5 | 6 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 7 | The occurrence of message leads a low speed of rfvision during training and testing process. 8 | !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 9 | 10 | Above message results from the sequence of 'import open3d' and 'import pyrender'. 11 | 12 | If: 13 | import open3d 14 | import pyrender 15 | The message occurs! 16 | 17 | If: 18 | import pyrender 19 | import open3d 20 | The message does not occur. 21 | 22 | ''' 23 | 24 | 25 | from .evaluation_pose import * 26 | from .visualizer_pose import * 27 | from .post_processing_pose import * 28 | from .utils_pose import * 29 | 30 | from .anchor import * # noqa: F401, F403 31 | from .bbox import * # noqa: F401, F403 32 | from .evaluation import * # noqa: F401, F403 33 | from .mask import * # noqa: F401, F403 34 | from .post_processing import * # noqa: F401, F403 35 | from .utils import * # noqa: F401, F403 36 | from .visualizer import * 37 | from .data_structures import * 38 | 39 | from .bbox3d import * 40 | from .evaluation3d import * 41 | from .visualizer3d import * 42 | from .points import * 43 | from .voxel import * 44 | from .post_processing3d import * 45 | 46 | 47 | from .hook import * 48 | 49 | 50 | -------------------------------------------------------------------------------- /rfvision/components/necks/gap_neck.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | 5 | from rfvision.models.builder import NECKS 6 | 7 | 8 | @NECKS.register_module() 9 | class GlobalAveragePooling(nn.Module): 10 | """Global Average Pooling neck. 11 | 12 | Note that we use `view` to remove extra channel after pooling. We do not 13 | use `squeeze` as it will also remove the batch dimension when the tensor 14 | has a batch dimension of size 1, which can lead to unexpected errors. 15 | """ 16 | 17 | def __init__(self): 18 | super().__init__() 19 | self.gap = nn.AdaptiveAvgPool2d((1, 1)) 20 | 21 | def init_weights(self): 22 | pass 23 | 24 | def forward(self, inputs): 25 | if isinstance(inputs, tuple): 26 | outs = tuple([self.gap(x) for x in inputs]) 27 | outs = tuple( 28 | [out.view(x.size(0), -1) for out, x in zip(outs, inputs)]) 29 | elif isinstance(inputs, list): 30 | outs = [self.gap(x) for x in inputs] 31 | outs = [out.view(x.size(0), -1) for out, x in zip(outs, inputs)] 32 | elif isinstance(inputs, torch.Tensor): 33 | outs = self.gap(inputs) 34 | outs = outs.view(inputs.size(0), -1) 35 | else: 36 | raise TypeError('neck inputs should be tuple or torch.tensor') 37 | return outs 38 | -------------------------------------------------------------------------------- /rfvision/components/roi_heads/mask_heads/htc_mask_head.py: -------------------------------------------------------------------------------- 1 | from rflib.cnn import ConvModule 2 | 3 | from rfvision.models.builder import HEADS 4 | from .fcn_mask_head import FCNMaskHead 5 | 6 | 7 | @HEADS.register_module() 8 | class HTCMaskHead(FCNMaskHead): 9 | 10 | def __init__(self, with_conv_res=True, *args, **kwargs): 11 | super(HTCMaskHead, self).__init__(*args, **kwargs) 12 | self.with_conv_res = with_conv_res 13 | if self.with_conv_res: 14 | self.conv_res = ConvModule( 15 | self.conv_out_channels, 16 | self.conv_out_channels, 17 | 1, 18 | conv_cfg=self.conv_cfg, 19 | norm_cfg=self.norm_cfg) 20 | 21 | def forward(self, x, res_feat=None, return_logits=True, return_feat=True): 22 | if res_feat is not None: 23 | assert self.with_conv_res 24 | res_feat = self.conv_res(res_feat) 25 | x = x + res_feat 26 | for conv in self.convs: 27 | x = conv(x) 28 | res_feat = x 29 | outs = [] 30 | if return_logits: 31 | x = self.upsample(x) 32 | if self.upsample_method == 'deconv': 33 | x = self.relu(x) 34 | mask_pred = self.conv_logits(x) 35 | outs.append(mask_pred) 36 | if return_feat: 37 | outs.append(res_feat) 38 | return outs if len(outs) > 1 else outs[0] 39 | -------------------------------------------------------------------------------- /rfvision/utils/profiling.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import sys 3 | import time 4 | 5 | import torch 6 | 7 | if sys.version_info >= (3, 7): 8 | 9 | @contextlib.contextmanager 10 | def profile_time(trace_name, 11 | name, 12 | enabled=True, 13 | stream=None, 14 | end_stream=None): 15 | """Print time spent by CPU and GPU. 16 | 17 | Useful as a temporary context manager to find sweet spots of code 18 | suitable for async implementation. 19 | """ 20 | if (not enabled) or not torch.cuda.is_available(): 21 | yield 22 | return 23 | stream = stream if stream else torch.cuda.current_stream() 24 | end_stream = end_stream if end_stream else stream 25 | start = torch.cuda.Event(enable_timing=True) 26 | end = torch.cuda.Event(enable_timing=True) 27 | stream.record_event(start) 28 | try: 29 | cpu_start = time.monotonic() 30 | yield 31 | finally: 32 | cpu_end = time.monotonic() 33 | end_stream.record_event(end) 34 | end.synchronize() 35 | cpu_time = (cpu_end - cpu_start) * 1000 36 | gpu_time = start.elapsed_time(end) 37 | msg = f'{trace_name} {name} cpu_time {cpu_time:.2f} ms ' 38 | msg += f'gpu_time {gpu_time:.2f} ms stream {stream}' 39 | print(msg, end_stream) 40 | -------------------------------------------------------------------------------- /rfvision/data/alfred/README.md: -------------------------------------------------------------------------------- 1 | ### Prepare Alfred Data for Indoor Detection 2 | 3 | 1. Download the alfred data and save the raw data in the directory `json_2.1.0` 4 | 5 | 2. Save the corresponding path of train, val and test data in the directory `meta_data` 6 | 7 | 3. Generate numpy array data by running: 8 | 9 | ```bash 10 | bash ./batch_load.sh 11 | ``` 12 | 13 | If success, you should see a new directory called `alfred_instance_data` with `.npy` files in it. 14 | 15 | 4. Enter the **project root directory**, generate train, val and test data by running: 16 | 17 | ```bash 18 | python tools/create_data.py alfred --root-path ./data/alfred --out-dir ./data/alfred --extra-tag alfred 19 | ``` 20 | 21 | After that, all the data for training and testing are ready. 22 | 23 | #### Structure 24 | The directory structure after pre-processing should be as below 25 | 26 | ``` 27 | alfred 28 | ├── alfred_infos_test.pkl 29 | ├── alfred_infos_train.pkl 30 | ├── alfred_infos_val.pkl 31 | ├── alfred_instance_data 32 | │ ├── xxxxx_bbox.npy 33 | │ ├── xxxxx_label.npy 34 | │ ├── xxxxx_vert.npy 35 | ├── alfred_pc.py 36 | ├── batch_load_alfred_data.py 37 | ├── batch_load.sh 38 | ├── json_2.1.0 39 | ├── load_alfred_data.py 40 | ├── meta_data 41 | │ ├── train.txt 42 | │ ├── valid_seen.txt 43 | │ ├── valid_unseen.txt 44 | │ ├── tests_seen.txt 45 | │ ├── tests_unseen.txt 46 | ├── points 47 | │ ├── xxxxx.bin 48 | ├── read_demo.py 49 | ├── README.md 50 | ``` 51 | -------------------------------------------------------------------------------- /rfvision/components/losses_pose/classfication_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from rfvision.models.builder import LOSSES 6 | 7 | 8 | @LOSSES.register_module() 9 | class BCELoss(nn.Module): 10 | """Binary Cross Entropy loss.""" 11 | 12 | def __init__(self, use_target_weight=False, loss_weight=1.): 13 | super().__init__() 14 | self.criterion = F.binary_cross_entropy 15 | self.use_target_weight = use_target_weight 16 | self.loss_weight = loss_weight 17 | 18 | def forward(self, output, target, target_weight=None): 19 | """Forward function. 20 | 21 | Note: 22 | batch_size: N 23 | num_labels: K 24 | 25 | Args: 26 | output (torch.Tensor[N, K]): Output classification. 27 | target (torch.Tensor[N, K]): Target classification. 28 | target_weight (torch.Tensor[N, K] or torch.Tensor[N]): 29 | Weights across different labels. 30 | """ 31 | 32 | if self.use_target_weight: 33 | assert target_weight is not None 34 | loss = self.criterion(output, target, reduction='none') 35 | if target_weight.dim() == 1: 36 | target_weight = target_weight[:, None] 37 | loss = (loss * target_weight).mean() 38 | else: 39 | loss = self.criterion(output, target) 40 | 41 | return loss * self.loss_weight 42 | -------------------------------------------------------------------------------- /rfvision/datasets/samplers/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.utils.data import DistributedSampler as _DistributedSampler 5 | 6 | 7 | class DistributedSampler(_DistributedSampler): 8 | 9 | def __init__(self, 10 | dataset, 11 | num_replicas=None, 12 | rank=None, 13 | shuffle=True, 14 | seed=0): 15 | super().__init__( 16 | dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle) 17 | # for the compatibility from PyTorch 1.3+ 18 | self.seed = seed if seed is not None else 0 19 | 20 | def __iter__(self): 21 | # deterministically shuffle based on epoch 22 | if self.shuffle: 23 | g = torch.Generator() 24 | g.manual_seed(self.epoch + self.seed) 25 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 26 | else: 27 | indices = torch.arange(len(self.dataset)).tolist() 28 | 29 | # add extra samples to make it evenly divisible 30 | # in case that indices is shorter than half of total_size 31 | indices = (indices * 32 | math.ceil(self.total_size / len(indices)))[:self.total_size] 33 | assert len(indices) == self.total_size 34 | 35 | # subsample 36 | indices = indices[self.rank:self.total_size:self.num_replicas] 37 | assert len(indices) == self.num_samples 38 | 39 | return iter(indices) 40 | -------------------------------------------------------------------------------- /rfvision/data/alfred/batch_load.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | # Load train data 4 | python batch_load_alfred_data.py \ 5 | --output_folder ./alfred_instance_data \ 6 | --alfred_dir /disk1/data/alfred/alfred_pc_V4/json_2.1.0 \ 7 | --alfred_names_file /disk1/data/alfred/alfred_pc_V4/json_2.1.0/meta_data/train.txt \ 8 | --need_rgb 1 9 | 10 | # Load valid data 11 | python batch_load_alfred_data.py \ 12 | --output_folder ./alfred_instance_data \ 13 | --alfred_dir /disk1/data/alfred/alfred_pc_V4/json_2.1.0 \ 14 | --alfred_names_file /disk1/data/alfred/alfred_pc_V4/json_2.1.0/meta_data/valid_seen.txt \ 15 | --need_rgb 1 \ 16 | --test 1 17 | 18 | # Load valid data 19 | python batch_load_alfred_data.py \ 20 | --output_folder ./alfred_instance_data \ 21 | --alfred_dir /disk1/data/alfred/alfred_pc_V4/json_2.1.0 \ 22 | --alfred_names_file /disk1/data/alfred/alfred_pc_V4/json_2.1.0/meta_data/valid_unseen.txt \ 23 | --need_rgb 1 \ 24 | --test 1 25 | 26 | # Load test data 27 | python batch_load_alfred_data.py \ 28 | --output_folder ./alfred_instance_data \ 29 | --alfred_dir /disk1/data/alfred/alfred_pc_V4/json_2.1.0 \ 30 | --alfred_names_file /disk1/data/alfred/alfred_pc_V4/json_2.1.0/meta_data/tests_seen.txt \ 31 | --need_rgb 1 \ 32 | --test 1 33 | 34 | # Load test data 35 | python batch_load_alfred_data.py \ 36 | --output_folder ./alfred_instance_data \ 37 | --alfred_dir /disk1/data/alfred/alfred_pc_V4/json_2.1.0 \ 38 | --alfred_names_file /disk1/data/alfred/alfred_pc_V4/json_2.1.0/meta_data/tests_unseen.txt \ 39 | --need_rgb 1 \ 40 | --test 1 41 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://detectron2/resnet50_caffe', 4 | backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe')) 5 | # use caffe img_norm 6 | img_norm_cfg = dict( 7 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 11 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 12 | dict(type='RandomFlip', flip_ratio=0.5), 13 | dict(type='Normalize', **img_norm_cfg), 14 | dict(type='Pad', size_divisor=32), 15 | dict(type='DefaultFormatBundle'), 16 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 17 | ] 18 | test_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict( 21 | type='MultiScaleFlipAug', 22 | img_scale=(1333, 800), 23 | flip=False, 24 | transforms=[ 25 | dict(type='Resize', keep_ratio=True), 26 | dict(type='RandomFlip'), 27 | dict(type='Normalize', **img_norm_cfg), 28 | dict(type='Pad', size_divisor=32), 29 | dict(type='ImageToTensor', keys=['img']), 30 | dict(type='Collect', keys=['img']), 31 | ]) 32 | ] 33 | data = dict( 34 | train=dict(pipeline=train_pipeline), 35 | val=dict(pipeline=test_pipeline), 36 | test=dict(pipeline=test_pipeline)) 37 | -------------------------------------------------------------------------------- /tests/test_models/test_backbones/test_hourglass.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | 4 | from rfvision.models.backbones.hourglass import HourglassNet 5 | 6 | 7 | def test_hourglass_backbone(): 8 | with pytest.raises(AssertionError): 9 | # HourglassNet's num_stacks should larger than 0 10 | HourglassNet(num_stacks=0) 11 | 12 | with pytest.raises(AssertionError): 13 | # len(stage_channels) should equal len(stage_blocks) 14 | HourglassNet( 15 | stage_channels=[256, 256, 384, 384, 384], 16 | stage_blocks=[2, 2, 2, 2, 2, 4]) 17 | 18 | with pytest.raises(AssertionError): 19 | # len(stage_channels) should lagrer than downsample_times 20 | HourglassNet( 21 | downsample_times=5, 22 | stage_channels=[256, 256, 384, 384, 384], 23 | stage_blocks=[2, 2, 2, 2, 2]) 24 | 25 | # Test HourglassNet-52 26 | model = HourglassNet(num_stacks=1) 27 | model.init_weights() 28 | model.train() 29 | 30 | imgs = torch.randn(1, 3, 256, 256) 31 | feat = model(imgs) 32 | assert len(feat) == 1 33 | assert feat[0].shape == torch.Size([1, 256, 64, 64]) 34 | 35 | # Test HourglassNet-104 36 | model = HourglassNet(num_stacks=2) 37 | model.init_weights() 38 | model.train() 39 | 40 | imgs = torch.randn(1, 3, 256, 256) 41 | feat = model(imgs) 42 | assert len(feat) == 2 43 | assert feat[0].shape == torch.Size([1, 256, 64, 64]) 44 | assert feat[1].shape == torch.Size([1, 256, 64, 64]) 45 | -------------------------------------------------------------------------------- /tests/test_data/test_datasets/test_coco_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import tempfile 3 | 4 | import rflib 5 | import pytest 6 | 7 | from rfvision.datasets import CocoDataset 8 | 9 | 10 | def _create_ids_error_coco_json(json_name): 11 | image = { 12 | 'id': 0, 13 | 'width': 640, 14 | 'height': 640, 15 | 'file_name': 'fake_name.jpg', 16 | } 17 | 18 | annotation_1 = { 19 | 'id': 1, 20 | 'image_id': 0, 21 | 'category_id': 0, 22 | 'area': 400, 23 | 'bbox': [50, 60, 20, 20], 24 | 'iscrowd': 0, 25 | } 26 | 27 | annotation_2 = { 28 | 'id': 1, 29 | 'image_id': 0, 30 | 'category_id': 0, 31 | 'area': 900, 32 | 'bbox': [100, 120, 30, 30], 33 | 'iscrowd': 0, 34 | } 35 | 36 | categories = [{ 37 | 'id': 0, 38 | 'name': 'car', 39 | 'supercategory': 'car', 40 | }] 41 | 42 | fake_json = { 43 | 'images': [image], 44 | 'annotations': [annotation_1, annotation_2], 45 | 'categories': categories 46 | } 47 | rflib.dump(fake_json, json_name) 48 | 49 | 50 | def test_coco_annotation_ids_unique(): 51 | tmp_dir = tempfile.TemporaryDirectory() 52 | fake_json_file = osp.join(tmp_dir.name, 'fake_data.json') 53 | _create_ids_error_coco_json(fake_json_file) 54 | 55 | # test annotation ids not unique error 56 | with pytest.raises(AssertionError): 57 | CocoDataset(ann_file=fake_json_file, classes=('car', ), pipeline=[]) 58 | -------------------------------------------------------------------------------- /rfvision/models/detectors3d/category_ppf/nocs.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import glob 4 | import tqdm 5 | import pickle 6 | import numpy as np 7 | class NOCSForPPF(torch.utils.data.Dataset): 8 | def __init__(self, 9 | data_root, 10 | scene_id=1, 11 | category=2, 12 | pipeline=None, 13 | test_mode=False, 14 | ): 15 | self.data_root = data_root 16 | 17 | log_dir = os.path.join(self.data_root, 'real_test_20210511T2129') 18 | result_pkl_list = glob.glob(os.path.join(log_dir, 'results_*.pkl')) 19 | result_pkl_list = sorted(result_pkl_list)[:] 20 | final_results = [] 21 | for pkl_path in tqdm(result_pkl_list): 22 | with open(pkl_path, 'rb') as f: 23 | result = pickle.load(f) 24 | if not 'gt_handle_visibility' in result: 25 | result['gt_handle_visibility'] = np.ones_like(result['gt_class_ids']) 26 | print('can\'t find gt_handle_visibility in the pkl.') 27 | else: 28 | assert len(result['gt_handle_visibility']) == len(result['gt_class_ids']), "{} {}".format( 29 | result['gt_handle_visibility'], result['gt_class_ids']) 30 | if type(result) is list: 31 | final_results += result 32 | elif type(result) is dict: 33 | final_results.append(result) 34 | else: 35 | assert False 36 | 37 | 38 | -------------------------------------------------------------------------------- /flows/detectors/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py' 2 | img_norm_cfg = dict( 3 | mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) 4 | train_pipeline = [ 5 | dict(type='LoadImageFromFile'), 6 | dict(type='LoadAnnotations', with_bbox=True), 7 | dict( 8 | type='Resize', 9 | img_scale=[(1333, 640), (1333, 800)], 10 | multiscale_mode='value', 11 | keep_ratio=True), 12 | dict(type='RandomFlip', flip_ratio=0.5), 13 | dict(type='Normalize', **img_norm_cfg), 14 | dict(type='Pad', size_divisor=32), 15 | dict(type='DefaultFormatBundle'), 16 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 17 | ] 18 | test_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict( 21 | type='MultiScaleFlipAug', 22 | img_scale=(1333, 800), 23 | flip=False, 24 | transforms=[ 25 | dict(type='Resize', keep_ratio=True), 26 | dict(type='RandomFlip'), 27 | dict(type='Normalize', **img_norm_cfg), 28 | dict(type='Pad', size_divisor=32), 29 | dict(type='ImageToTensor', keys=['img']), 30 | dict(type='Collect', keys=['img']), 31 | ]) 32 | ] 33 | data = dict( 34 | train=dict(pipeline=train_pipeline), 35 | val=dict(pipeline=test_pipeline), 36 | test=dict(pipeline=test_pipeline)) 37 | # learning policy 38 | lr_config = dict(step=[16, 22]) 39 | runner = dict(type='EpochBasedRunner', max_epochs=24) 40 | -------------------------------------------------------------------------------- /rfvision/components/losses/cosine_simlarity_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from rfvision.models.builder import LOSSES 5 | from .utils import weighted_loss 6 | 7 | 8 | @weighted_loss 9 | def cosine_similarity_loss(pred, target, cos_func): 10 | assert isinstance(cos_func, nn.Module) 11 | assert pred.size() == target.size() and target.numel() > 0 12 | 13 | loss = cos_func(pred, target, torch.tensor(1, device=pred.device)) 14 | 15 | return loss 16 | 17 | 18 | @LOSSES.register_module() 19 | class CosineSimilarityLoss(nn.Module): 20 | 21 | def __init__(self, margin=0.0, reduction='mean', loss_weight=1.0): 22 | super(CosineSimilarityLoss, self).__init__() 23 | self.reduction = reduction 24 | self.loss_weight = loss_weight 25 | self.cos = nn.CosineEmbeddingLoss(margin=margin, reduction='none') 26 | 27 | def forward(self, 28 | pred, 29 | target, 30 | weight=None, 31 | avg_factor=None, 32 | reduction_override=None, 33 | **kwargs): 34 | assert reduction_override in (None, 'none', 'mean', 'sum') 35 | reduction = ( 36 | reduction_override if reduction_override else self.reduction) 37 | loss_cos = self.loss_weight * cosine_similarity_loss( 38 | pred, 39 | target, 40 | weight, 41 | cos_func=self.cos, 42 | reduction=reduction, 43 | avg_factor=avg_factor, 44 | **kwargs) 45 | return loss_cos -------------------------------------------------------------------------------- /rfvision/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset 2 | from .coco import CocoDataset 3 | from .custom import CustomDataset 4 | from .dataset_wrappers import (ClassBalancedDataset, ConcatDataset, 5 | RepeatDataset, MultiImageMixDataset) 6 | from .samplers import DistributedGroupSampler, DistributedSampler, GroupSampler 7 | from .utils import (NumClassCheckHook, get_loading_pipeline, 8 | replace_ImageToTensor) 9 | from .utils3d import (get_loading_pipeline_3d, is_loading_function, extract_result_dict) 10 | from .ik_dataset import IKDataset 11 | from .sunrgbd_dataset import SUNRGBDDataset 12 | from .shapenet_v2 import ShapeNetCoreV2HDF5 13 | from .keypointnet import KeypointNetDataset 14 | from .pose_dataset import DatasetInfo, InterHand3DDataset, Rhd2DDataset 15 | from .custom_dataset import * 16 | from .alfred_dataset import AlfredDataset 17 | 18 | __all__ = [ 19 | 'CustomDataset', 'CocoDataset', 'GroupSampler', 'DistributedGroupSampler', 20 | 'DistributedSampler', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 21 | 'ClassBalancedDataset', 'DATASETS', 'PIPELINES', 22 | 'build_dataset', 'replace_ImageToTensor', 'get_loading_pipeline', 23 | 'NumClassCheckHook', 24 | 'IKDataset','SUNRGBDDataset', 'ShapeNetCoreV2HDF5', 25 | 'KeypointNetDataset', 26 | 'get_loading_pipeline_3d', 'is_loading_function', 'extract_result_dict', 27 | 'DatasetInfo', 'InterHand3DDataset', 'Rhd2DDataset', 28 | 'AlfredDataset', 'MultiImageMixDataset' 29 | ] 30 | -------------------------------------------------------------------------------- /tests/test_models/test_utils/test_position_encoding.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | 4 | from rfvision.models.utils import (LearnedPositionalEncoding, 5 | SinePositionalEncoding) 6 | 7 | 8 | def test_sine_positional_encoding(num_feats=16, batch_size=2): 9 | # test invalid type of scale 10 | with pytest.raises(AssertionError): 11 | module = SinePositionalEncoding( 12 | num_feats, scale=(3., ), normalize=True) 13 | 14 | module = SinePositionalEncoding(num_feats) 15 | h, w = 10, 6 16 | mask = torch.rand(batch_size, h, w) > 0.5 17 | assert not module.normalize 18 | out = module(mask) 19 | assert out.shape == (batch_size, num_feats * 2, h, w) 20 | 21 | # set normalize 22 | module = SinePositionalEncoding(num_feats, normalize=True) 23 | assert module.normalize 24 | out = module(mask) 25 | assert out.shape == (batch_size, num_feats * 2, h, w) 26 | 27 | 28 | def test_learned_positional_encoding(num_feats=16, 29 | row_num_embed=10, 30 | col_num_embed=10, 31 | batch_size=2): 32 | module = LearnedPositionalEncoding(num_feats, row_num_embed, col_num_embed) 33 | assert module.row_embed.weight.shape == (row_num_embed, num_feats) 34 | assert module.col_embed.weight.shape == (col_num_embed, num_feats) 35 | h, w = 10, 6 36 | mask = torch.rand(batch_size, h, w) > 0.5 37 | out = module(mask) 38 | assert out.shape == (batch_size, num_feats * 2, h, w) 39 | -------------------------------------------------------------------------------- /rfvision/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .assigners import (AssignResult, BaseAssigner, CenterRegionAssigner, 2 | MaxIoUAssigner, RegionAssigner) 3 | from .builder import build_assigner, build_bbox_coder, build_sampler 4 | from .coder import (BaseBBoxCoder, DeltaXYWHBBoxCoder, PseudoBBoxCoder) 5 | from .iou_calculators import BboxOverlaps2D, bbox_overlaps 6 | from .samplers import (BaseSampler, CombinedSampler, 7 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 8 | OHEMSampler, PseudoSampler, RandomSampler, 9 | SamplingResult) 10 | from .transforms import (bbox2distance, bbox2result, bbox2roi, 11 | bbox_cxcywh_to_xyxy, bbox_flip, bbox_mapping, 12 | bbox_mapping_back, bbox_rescale, bbox_xyxy_to_cxcywh, 13 | distance2bbox, roi2bbox, bbox_mask2result) 14 | 15 | __all__ = [ 16 | 'bbox_overlaps', 'BboxOverlaps2D', 'BaseAssigner', 'MaxIoUAssigner', 17 | 'AssignResult', 'BaseSampler', 'PseudoSampler', 'RandomSampler', 18 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 19 | 'OHEMSampler', 'SamplingResult', 'build_assigner', 20 | 'build_sampler', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 21 | 'bbox2roi', 'roi2bbox', 'bbox2result', 'distance2bbox', 'bbox2distance', 22 | 'build_bbox_coder', 'BaseBBoxCoder', 'PseudoBBoxCoder', 23 | 'DeltaXYWHBBoxCoder', 'CenterRegionAssigner', 24 | 'bbox_rescale', 'bbox_cxcywh_to_xyxy', 'bbox_xyxy_to_cxcywh', 25 | 'RegionAssigner', 'bbox_mask2result' 26 | ] 27 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_r50_caffe_c4_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask_rcnn_r50_caffe_c4.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | # use caffe img_norm 7 | img_norm_cfg = dict( 8 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 9 | train_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 12 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 13 | dict(type='RandomFlip', flip_ratio=0.5), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size_divisor=32), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(1333, 800), 24 | flip=False, 25 | transforms=[ 26 | dict(type='Resize', keep_ratio=True), 27 | dict(type='RandomFlip'), 28 | dict(type='Normalize', **img_norm_cfg), 29 | dict(type='Pad', size_divisor=32), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | train=dict(pipeline=train_pipeline), 36 | val=dict(pipeline=test_pipeline), 37 | test=dict(pipeline=test_pipeline)) 38 | # optimizer 39 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 40 | -------------------------------------------------------------------------------- /rfvision/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..builder import BBOX_SAMPLERS 4 | from .base_sampler import BaseSampler 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | @BBOX_SAMPLERS.register_module() 9 | class PseudoSampler(BaseSampler): 10 | """A pseudo sampler that does not do sampling actually.""" 11 | 12 | def __init__(self, **kwargs): 13 | pass 14 | 15 | def _sample_pos(self, **kwargs): 16 | """Sample positive samples.""" 17 | raise NotImplementedError 18 | 19 | def _sample_neg(self, **kwargs): 20 | """Sample negative samples.""" 21 | raise NotImplementedError 22 | 23 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 24 | """Directly returns the positive and negative indices of samples. 25 | 26 | Args: 27 | assign_result (:obj:`AssignResult`): Assigned results 28 | bboxes (torch.Tensor): Bounding boxes 29 | gt_bboxes (torch.Tensor): Ground truth boxes 30 | 31 | Returns: 32 | :obj:`SamplingResult`: sampler results 33 | """ 34 | pos_inds = torch.nonzero( 35 | assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique() 36 | neg_inds = torch.nonzero( 37 | assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique() 38 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 39 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 40 | assign_result, gt_flags) 41 | return sampling_result 42 | -------------------------------------------------------------------------------- /rfvision/core/camera/camera_base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from abc import ABCMeta, abstractmethod 3 | 4 | from rflib.utils import Registry 5 | 6 | CAMERAS = Registry('camera') 7 | 8 | 9 | class SingleCameraBase(metaclass=ABCMeta): 10 | """Base class for single camera model. 11 | 12 | Args: 13 | param (dict): Camera parameters 14 | 15 | Methods: 16 | world_to_camera: Project points from world coordinates to camera 17 | coordinates 18 | camera_to_world: Project points from camera coordinates to world 19 | coordinates 20 | camera_to_pixel: Project points from camera coordinates to pixel 21 | coordinates 22 | world_to_pixel: Project points from world coordinates to pixel 23 | coordinates 24 | """ 25 | 26 | @abstractmethod 27 | def __init__(self, param): 28 | """Load camera parameters and check validity.""" 29 | 30 | def world_to_camera(self, X): 31 | """Project points from world coordinates to camera coordinates.""" 32 | raise NotImplementedError 33 | 34 | def camera_to_world(self, X): 35 | """Project points from camera coordinates to world coordinates.""" 36 | raise NotImplementedError 37 | 38 | def camera_to_pixel(self, X): 39 | """Project points from camera coordinates to pixel coordinates.""" 40 | raise NotImplementedError 41 | 42 | def world_to_pixel(self, X): 43 | """Project points from world coordinates to pixel coordinates.""" 44 | _X = self.world_to_camera(X) 45 | return self.camera_to_pixel(_X) 46 | -------------------------------------------------------------------------------- /tests/test_models/test_backbones/test_resnest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | 4 | from rfvision.models.backbones import ResNeSt 5 | from rfvision.models.backbones.resnest import Bottleneck as BottleneckS 6 | 7 | 8 | def test_resnest_bottleneck(): 9 | with pytest.raises(AssertionError): 10 | # Style must be in ['pytorch', 'caffe'] 11 | BottleneckS(64, 64, radix=2, reduction_factor=4, style='tensorflow') 12 | 13 | # Test ResNeSt Bottleneck structure 14 | block = BottleneckS( 15 | 64, 256, radix=2, reduction_factor=4, stride=2, style='pytorch') 16 | assert block.avd_layer.stride == 2 17 | assert block.conv2.channels == 256 18 | 19 | # Test ResNeSt Bottleneck forward 20 | block = BottleneckS(64, 16, radix=2, reduction_factor=4) 21 | x = torch.randn(2, 64, 56, 56) 22 | x_out = block(x) 23 | assert x_out.shape == torch.Size([2, 64, 56, 56]) 24 | 25 | 26 | def test_resnest_backbone(): 27 | with pytest.raises(KeyError): 28 | # ResNeSt depth should be in [50, 101, 152, 200] 29 | ResNeSt(depth=18) 30 | 31 | # Test ResNeSt with radix 2, reduction_factor 4 32 | model = ResNeSt( 33 | depth=50, radix=2, reduction_factor=4, out_indices=(0, 1, 2, 3)) 34 | model.init_weights() 35 | model.train() 36 | 37 | imgs = torch.randn(2, 3, 224, 224) 38 | feat = model(imgs) 39 | assert len(feat) == 4 40 | assert feat[0].shape == torch.Size([2, 256, 56, 56]) 41 | assert feat[1].shape == torch.Size([2, 512, 28, 28]) 42 | assert feat[2].shape == torch.Size([2, 1024, 14, 14]) 43 | assert feat[3].shape == torch.Size([2, 2048, 7, 7]) 44 | -------------------------------------------------------------------------------- /flows/pose_estimators/articulation/articulation_estimator.py: -------------------------------------------------------------------------------- 1 | model = dict(type='ArticulationEstimator') 2 | data_root = '/disk4/data/arti_data/real_data/box/' 3 | data = dict( 4 | samples_per_gpu=1, 5 | workers_per_gpu=1, 6 | train=dict( 7 | type='ArticulationDataset', 8 | ann_file=data_root + 'train_meta.txt', 9 | img_prefix=data_root, 10 | intrinsics_path=data_root + 'camera_intrinsic.json', 11 | n_max_parts=13), 12 | val=dict( 13 | type='ArticulationDataset', 14 | ann_file=data_root + 'test_meta.txt', 15 | img_prefix=data_root, 16 | intrinsics_path=data_root + 'camera_intrinsic.json', 17 | n_max_parts=13), 18 | test=dict( 19 | type='ArticulationDataset', 20 | ann_file=data_root + 'test.txt', 21 | img_prefix=data_root, 22 | intrinsics_path=data_root + 'camera_intrinsic.json', 23 | n_max_parts=13), 24 | ) 25 | 26 | 27 | checkpoint_config = dict(interval=20) 28 | log_config = dict( 29 | interval=10, 30 | hooks=[ 31 | dict(type='TextLoggerHook'), 32 | dict(type='TensorboardLoggerHook') 33 | ]) 34 | # yapf:enable 35 | dist_params = dict(backend='nccl') 36 | log_level = 'INFO' 37 | work_dir = None 38 | load_from = None 39 | resume_from = None 40 | workflow = [('train', 1)] 41 | 42 | lr = 0.001 # max learning rate 43 | optimizer = dict(type='Adam', lr=lr, weight_decay=0.01) 44 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 45 | lr_config = dict(policy='step', warmup='linear', warmup_iters=500, step=[80, 90]) 46 | # runtime settings 47 | total_epochs = 100 48 | find_unused_parameters = True -------------------------------------------------------------------------------- /flows/detectors/yolo/yolov3_d53_320_273e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './yolov3_d53_mstrain-608_273e_coco.py' 2 | # dataset settings 3 | img_norm_cfg = dict(mean=[0, 0, 0], std=[255., 255., 255.], to_rgb=True) 4 | train_pipeline = [ 5 | dict(type='LoadImageFromFile', to_float32=True), 6 | dict(type='LoadAnnotations', with_bbox=True), 7 | dict(type='PhotoMetricDistortion'), 8 | dict( 9 | type='Expand', 10 | mean=img_norm_cfg['mean'], 11 | to_rgb=img_norm_cfg['to_rgb'], 12 | ratio_range=(1, 2)), 13 | dict( 14 | type='MinIoURandomCrop', 15 | min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9), 16 | min_crop_size=0.3), 17 | dict(type='Resize', img_scale=(320, 320), keep_ratio=True), 18 | dict(type='RandomFlip', flip_ratio=0.5), 19 | dict(type='Normalize', **img_norm_cfg), 20 | dict(type='Pad', size_divisor=32), 21 | dict(type='DefaultFormatBundle'), 22 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) 23 | ] 24 | test_pipeline = [ 25 | dict(type='LoadImageFromFile'), 26 | dict( 27 | type='MultiScaleFlipAug', 28 | img_scale=(320, 320), 29 | flip=False, 30 | transforms=[ 31 | dict(type='Resize', keep_ratio=True), 32 | dict(type='RandomFlip'), 33 | dict(type='Normalize', **img_norm_cfg), 34 | dict(type='Pad', size_divisor=32), 35 | dict(type='ImageToTensor', keys=['img']), 36 | dict(type='Collect', keys=['img']) 37 | ]) 38 | ] 39 | data = dict( 40 | train=dict(pipeline=train_pipeline), 41 | val=dict(pipeline=test_pipeline), 42 | test=dict(pipeline=test_pipeline)) 43 | -------------------------------------------------------------------------------- /flows/detectors/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './htc_r50_fpn_1x_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://resnext101_64x4d', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | norm_eval=True, 14 | style='pytorch', 15 | dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), 16 | stage_with_dcn=(False, True, True, True))) 17 | # dataset settings 18 | img_norm_cfg = dict( 19 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 20 | train_pipeline = [ 21 | dict(type='LoadImageFromFile'), 22 | dict( 23 | type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), 24 | dict( 25 | type='Resize', 26 | img_scale=[(1600, 400), (1600, 1400)], 27 | multiscale_mode='range', 28 | keep_ratio=True), 29 | dict(type='RandomFlip', flip_ratio=0.5), 30 | dict(type='Normalize', **img_norm_cfg), 31 | dict(type='Pad', size_divisor=32), 32 | dict(type='SegRescale', scale_factor=1 / 8), 33 | dict(type='DefaultFormatBundle'), 34 | dict( 35 | type='Collect', 36 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), 37 | ] 38 | data = dict( 39 | samples_per_gpu=1, workers_per_gpu=1, train=dict(pipeline=train_pipeline)) 40 | # learning policy 41 | lr_config = dict(step=[16, 19]) 42 | runner = dict(type='EpochBasedRunner', max_epochs=20) 43 | -------------------------------------------------------------------------------- /flows/detectors/yolo/yolov3_d53_mstrain-416_273e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './yolov3_d53_mstrain-608_273e_coco.py' 2 | # dataset settings 3 | img_norm_cfg = dict(mean=[0, 0, 0], std=[255., 255., 255.], to_rgb=True) 4 | train_pipeline = [ 5 | dict(type='LoadImageFromFile', to_float32=True), 6 | dict(type='LoadAnnotations', with_bbox=True), 7 | dict(type='PhotoMetricDistortion'), 8 | dict( 9 | type='Expand', 10 | mean=img_norm_cfg['mean'], 11 | to_rgb=img_norm_cfg['to_rgb'], 12 | ratio_range=(1, 2)), 13 | dict( 14 | type='MinIoURandomCrop', 15 | min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9), 16 | min_crop_size=0.3), 17 | dict(type='Resize', img_scale=[(320, 320), (416, 416)], keep_ratio=True), 18 | dict(type='RandomFlip', flip_ratio=0.5), 19 | dict(type='Normalize', **img_norm_cfg), 20 | dict(type='Pad', size_divisor=32), 21 | dict(type='DefaultFormatBundle'), 22 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) 23 | ] 24 | test_pipeline = [ 25 | dict(type='LoadImageFromFile'), 26 | dict( 27 | type='MultiScaleFlipAug', 28 | img_scale=(416, 416), 29 | flip=False, 30 | transforms=[ 31 | dict(type='Resize', keep_ratio=True), 32 | dict(type='RandomFlip'), 33 | dict(type='Normalize', **img_norm_cfg), 34 | dict(type='Pad', size_divisor=32), 35 | dict(type='ImageToTensor', keys=['img']), 36 | dict(type='Collect', keys=['img']) 37 | ]) 38 | ] 39 | data = dict( 40 | train=dict(pipeline=train_pipeline), 41 | val=dict(pipeline=test_pipeline), 42 | test=dict(pipeline=test_pipeline)) 43 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://detectron2/resnet50_caffe', 4 | backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe')) 5 | # use caffe img_norm 6 | img_norm_cfg = dict( 7 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 11 | dict( 12 | type='Resize', 13 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 14 | (1333, 768), (1333, 800)], 15 | multiscale_mode='value', 16 | keep_ratio=True), 17 | dict(type='RandomFlip', flip_ratio=0.5), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='Pad', size_divisor=32), 20 | dict(type='DefaultFormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 22 | ] 23 | test_pipeline = [ 24 | dict(type='LoadImageFromFile'), 25 | dict( 26 | type='MultiScaleFlipAug', 27 | img_scale=(1333, 800), 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='Pad', size_divisor=32), 34 | dict(type='ImageToTensor', keys=['img']), 35 | dict(type='Collect', keys=['img']), 36 | ]) 37 | ] 38 | data = dict( 39 | train=dict(pipeline=train_pipeline), 40 | val=dict(pipeline=test_pipeline), 41 | test=dict(pipeline=test_pipeline)) 42 | -------------------------------------------------------------------------------- /rfvision/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from rflib.utils import build_from_cfg 4 | 5 | from ..builder import PIPELINES 6 | 7 | 8 | @PIPELINES.register_module() 9 | class Compose: 10 | """Compose multiple transforms sequentially. 11 | 12 | Args: 13 | transforms (Sequence[dict | callable]): Sequence of transform object or 14 | config dict to be composed. 15 | """ 16 | 17 | def __init__(self, transforms): 18 | assert isinstance(transforms, collections.abc.Sequence) 19 | self.transforms = [] 20 | for transform in transforms: 21 | if isinstance(transform, dict): 22 | transform = build_from_cfg(transform, PIPELINES) 23 | self.transforms.append(transform) 24 | elif callable(transform): 25 | self.transforms.append(transform) 26 | else: 27 | raise TypeError('transform must be callable or a dict') 28 | 29 | def __call__(self, data): 30 | """Call function to apply transforms sequentially. 31 | 32 | Args: 33 | data (dict): A result dict contains the data to transform. 34 | 35 | Returns: 36 | dict: Transformed data. 37 | """ 38 | 39 | for t in self.transforms: 40 | data = t(data) 41 | if data is None: 42 | return None 43 | return data 44 | 45 | def __repr__(self): 46 | format_string = self.__class__.__name__ + '(' 47 | for t in self.transforms: 48 | format_string += '\n' 49 | format_string += f' {t}' 50 | format_string += '\n)' 51 | return format_string 52 | -------------------------------------------------------------------------------- /rfvision/components/utils/builder.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from rflib.utils import Registry, build_from_cfg 3 | 4 | TRANSFORMER = Registry('Transformer') 5 | LINEAR_LAYERS = Registry('linear layers') 6 | 7 | 8 | def build_transformer(cfg, default_args=None): 9 | """Builder for Transformer.""" 10 | return build_from_cfg(cfg, TRANSFORMER, default_args) 11 | 12 | 13 | LINEAR_LAYERS.register_module('Linear', module=nn.Linear) 14 | 15 | 16 | def build_linear_layer(cfg, *args, **kwargs): 17 | """Build linear layer. 18 | Args: 19 | cfg (None or dict): The linear layer config, which should contain: 20 | - type (str): Layer type. 21 | - layer args: Args needed to instantiate an linear layer. 22 | args (argument list): Arguments passed to the `__init__` 23 | method of the corresponding linear layer. 24 | kwargs (keyword arguments): Keyword arguments passed to the `__init__` 25 | method of the corresponding linear layer. 26 | Returns: 27 | nn.Module: Created linear layer. 28 | """ 29 | if cfg is None: 30 | cfg_ = dict(type='Linear') 31 | else: 32 | if not isinstance(cfg, dict): 33 | raise TypeError('cfg must be a dict') 34 | if 'type' not in cfg: 35 | raise KeyError('the cfg dict must contain the key "type"') 36 | cfg_ = cfg.copy() 37 | 38 | layer_type = cfg_.pop('type') 39 | if layer_type not in LINEAR_LAYERS: 40 | raise KeyError(f'Unrecognized linear type {layer_type}') 41 | else: 42 | linear_layer = LINEAR_LAYERS.get(layer_type) 43 | 44 | layer = linear_layer(*args, **kwargs, **cfg_) 45 | 46 | return layer 47 | -------------------------------------------------------------------------------- /flows/human_analyzers/hand/interhand3d/internet_interhand3d.yml: -------------------------------------------------------------------------------- 1 | Collections: 2 | - Metadata: 3 | Architecture: 4 | - InterNet 5 | - ResNet 6 | Name: Hand 3D Keypoint internet interhand3d 7 | Paper: 8 | Title: 'InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation 9 | from a Single RGB Image' 10 | URL: https://link.springer.com/content/pdf/10.1007/978-3-030-58565-5_33.pdf 11 | README: configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/internet_interhand3d.md 12 | Models: 13 | - Config: configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/res50_interhand3d_all_256x256.py 14 | In Collection: Hand 3D Keypoint internet interhand3d 15 | Metadata: 16 | Training Data: InterHand2.6M 17 | Name: internet_res50_interhand3d_all_256x256 18 | Results: 19 | - Dataset: InterHand2.6M 20 | Metrics: 21 | APh: 0.99 22 | MPJPE-all: 11.59 23 | MPJPE-interacting: 13.4 24 | MPJPE-single: 9.47 25 | Task: Hand 3D Keypoint 26 | Weights: https://download.openmmlab.com/mmpose/hand3d/internet/res50_intehand3dv1.0_all_256x256-42b7f2ac_20210702.pth 27 | - Config: configs/hand/3d_kpt_sview_rgb_img/internet/interhand3d/res50_interhand3d_all_256x256.py 28 | In Collection: Hand 3D Keypoint internet interhand3d 29 | Metadata: 30 | Training Data: InterHand2.6M 31 | Name: internet_res50_interhand3d_all_256x256 32 | Results: 33 | - Dataset: InterHand2.6M 34 | Metrics: 35 | APh: 0.98 36 | MPJPE-all: 13.16 37 | MPJPE-interacting: 15.23 38 | MPJPE-single: 11.22 39 | Task: Hand 3D Keypoint 40 | Weights: https://download.openmmlab.com/mmpose/hand3d/internet/res50_intehand3dv1.0_all_256x256-42b7f2ac_20210702.pth 41 | -------------------------------------------------------------------------------- /flows/detectors/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://detectron/resnet101_caffe', 4 | backbone=dict(depth=101)) 5 | img_norm_cfg = dict( 6 | mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', with_bbox=True), 10 | dict( 11 | type='Resize', 12 | img_scale=[(1333, 640), (1333, 800)], 13 | multiscale_mode='value', 14 | keep_ratio=True), 15 | dict(type='RandomFlip', flip_ratio=0.5), 16 | dict(type='Normalize', **img_norm_cfg), 17 | dict(type='Pad', size_divisor=32), 18 | dict(type='DefaultFormatBundle'), 19 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 20 | ] 21 | test_pipeline = [ 22 | dict(type='LoadImageFromFile'), 23 | dict( 24 | type='MultiScaleFlipAug', 25 | img_scale=(1333, 800), 26 | flip=False, 27 | transforms=[ 28 | dict(type='Resize', keep_ratio=True), 29 | dict(type='RandomFlip'), 30 | dict(type='Normalize', **img_norm_cfg), 31 | dict(type='Pad', size_divisor=32), 32 | dict(type='ImageToTensor', keys=['img']), 33 | dict(type='Collect', keys=['img']), 34 | ]) 35 | ] 36 | data = dict( 37 | samples_per_gpu=2, 38 | workers_per_gpu=2, 39 | train=dict(pipeline=train_pipeline), 40 | val=dict(pipeline=test_pipeline), 41 | test=dict(pipeline=test_pipeline)) 42 | # learning policy 43 | lr_config = dict(step=[16, 22]) 44 | runner = dict(type='EpochBasedRunner', max_epochs=24) 45 | -------------------------------------------------------------------------------- /rfvision/datasets/pose_dataset/mesh/mesh_adv_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import numpy as np 3 | from torch.utils.data import Dataset 4 | 5 | from rfvision.datasets.builder import DATASETS, build_dataset 6 | 7 | 8 | @DATASETS.register_module() 9 | class MeshAdversarialDataset(Dataset): 10 | """Mix Dataset for the adversarial training in 3D human mesh estimation 11 | task. 12 | 13 | The dataset combines data from two datasets and 14 | return a dict containing data from two datasets. 15 | 16 | Args: 17 | train_dataset (Dataset): Dataset for 3D human mesh estimation. 18 | adversarial_dataset (Dataset): Dataset for adversarial learning, 19 | provides real SMPL parameters. 20 | """ 21 | 22 | def __init__(self, train_dataset, adversarial_dataset): 23 | super().__init__() 24 | self.train_dataset = build_dataset(train_dataset) 25 | self.adversarial_dataset = build_dataset(adversarial_dataset) 26 | self.length = len(self.train_dataset) 27 | 28 | def __len__(self): 29 | """Get the size of the dataset.""" 30 | return self.length 31 | 32 | def __getitem__(self, i): 33 | """Given index, get the data from train dataset and randomly sample an 34 | item from adversarial dataset. 35 | 36 | Return a dict containing data from train and adversarial dataset. 37 | """ 38 | data = self.train_dataset[i] 39 | ind_adv = np.random.randint( 40 | low=0, high=len(self.adversarial_dataset), dtype=int) 41 | data.update(self.adversarial_dataset[ind_adv % 42 | len(self.adversarial_dataset)]) 43 | return data 44 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://detectron2/resnet50_caffe', 4 | backbone=dict(norm_cfg=dict(requires_grad=False), style='caffe')) 5 | # use caffe img_norm 6 | img_norm_cfg = dict( 7 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict( 11 | type='LoadAnnotations', 12 | with_bbox=True, 13 | with_mask=True, 14 | poly2mask=False), 15 | dict( 16 | type='Resize', 17 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 18 | (1333, 768), (1333, 800)], 19 | multiscale_mode='value', 20 | keep_ratio=True), 21 | dict(type='RandomFlip', flip_ratio=0.5), 22 | dict(type='Normalize', **img_norm_cfg), 23 | dict(type='Pad', size_divisor=32), 24 | dict(type='DefaultFormatBundle'), 25 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 26 | ] 27 | test_pipeline = [ 28 | dict(type='LoadImageFromFile'), 29 | dict( 30 | type='MultiScaleFlipAug', 31 | img_scale=(1333, 800), 32 | flip=False, 33 | transforms=[ 34 | dict(type='Resize', keep_ratio=True), 35 | dict(type='RandomFlip'), 36 | dict(type='Normalize', **img_norm_cfg), 37 | dict(type='Pad', size_divisor=32), 38 | dict(type='ImageToTensor', keys=['img']), 39 | dict(type='Collect', keys=['img']), 40 | ]) 41 | ] 42 | data = dict( 43 | train=dict(pipeline=train_pipeline), 44 | val=dict(pipeline=test_pipeline), 45 | test=dict(pipeline=test_pipeline)) 46 | -------------------------------------------------------------------------------- /rfvision/core/evaluation/class_names.py: -------------------------------------------------------------------------------- 1 | import rflib 2 | 3 | def coco_classes(): 4 | return [ 5 | 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 6 | 'truck', 'boat', 'traffic_light', 'fire_hydrant', 'stop_sign', 7 | 'parking_meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 8 | 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 9 | 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 10 | 'sports_ball', 'kite', 'baseball_bat', 'baseball_glove', 'skateboard', 11 | 'surfboard', 'tennis_racket', 'bottle', 'wine_glass', 'cup', 'fork', 12 | 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 13 | 'broccoli', 'carrot', 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 14 | 'couch', 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 15 | 'laptop', 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 16 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 17 | 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush' 18 | ] 19 | 20 | 21 | dataset_aliases = { 22 | 'coco': ['coco', 'mscoco', 'ms_coco'], 23 | } 24 | 25 | 26 | def get_classes(dataset): 27 | """Get class names of a dataset.""" 28 | alias2name = {} 29 | for name, aliases in dataset_aliases.items(): 30 | for alias in aliases: 31 | alias2name[alias] = name 32 | 33 | if rflib.is_str(dataset): 34 | if dataset in alias2name: 35 | labels = eval(alias2name[dataset] + '_classes()') 36 | else: 37 | raise ValueError(f'Unrecognized dataset: {dataset}') 38 | else: 39 | raise TypeError(f'dataset must a str, but got {type(dataset)}') 40 | return labels 41 | -------------------------------------------------------------------------------- /rfvision/models/detectors3d/category_ppf/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | def backproject(depth, intrinsics, instance_mask): 5 | intrinsics_inv = np.linalg.inv(intrinsics) 6 | image_shape = depth.shape 7 | width = image_shape[1] 8 | height = image_shape[0] 9 | 10 | x = np.arange(width) 11 | y = np.arange(height) 12 | 13 | # non_zero_mask = np.logical_and(depth > 0, depth < 5000) 14 | non_zero_mask = (depth > 0) 15 | final_instance_mask = np.logical_and(instance_mask, non_zero_mask) 16 | 17 | idxs = np.where(final_instance_mask) 18 | grid = np.array([idxs[1], idxs[0]]) 19 | 20 | # shape: height * width 21 | # mesh_grid = np.meshgrid(x, y) #[height, width, 2] 22 | # mesh_grid = np.reshape(mesh_grid, [2, -1]) 23 | length = grid.shape[1] 24 | ones = np.ones([1, length]) 25 | uv_grid = np.concatenate((grid, ones), axis=0) # [3, num_pixel] 26 | 27 | xyz = intrinsics_inv @ uv_grid # [3, num_pixel] 28 | xyz = np.transpose(xyz) # [num_pixel, 3] 29 | 30 | z = depth[idxs[0], idxs[1]] 31 | 32 | # print(np.amax(z), np.amin(z)) 33 | pts = xyz * z[:, np.newaxis] / xyz[:, -1:] 34 | pts[:, 0] = -pts[:, 0] 35 | pts[:, 1] = -pts[:, 1] 36 | 37 | return pts, idxs 38 | 39 | 40 | def fibonacci_sphere(samples): 41 | points = [] 42 | phi = math.pi * (3. - math.sqrt(5.)) # golden angle in radians 43 | 44 | for i in range(samples): 45 | y = 1 - (i / float(samples - 1)) * 2 # y goes from 1 to -1 46 | radius = math.sqrt(1 - y * y) # radius at y 47 | 48 | theta = phi * i # golden angle increment 49 | 50 | x = math.cos(theta) * radius 51 | z = math.sin(theta) * radius 52 | 53 | points.append((x, y, z)) 54 | 55 | return points 56 | 57 | -------------------------------------------------------------------------------- /rfvision/models/detectors/cascade_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class CascadeRCNN(TwoStageDetector): 7 | r"""Implementation of `Cascade R-CNN: Delving into High Quality Object 8 | Detection `_""" 9 | 10 | def __init__(self, 11 | backbone, 12 | neck=None, 13 | rpn_head=None, 14 | roi_head=None, 15 | train_cfg=None, 16 | test_cfg=None, 17 | init_cfg=None): 18 | super(CascadeRCNN, self).__init__( 19 | backbone=backbone, 20 | neck=neck, 21 | rpn_head=rpn_head, 22 | roi_head=roi_head, 23 | train_cfg=train_cfg, 24 | test_cfg=test_cfg, 25 | init_cfg=init_cfg) 26 | 27 | def show_result(self, data, result, **kwargs): 28 | """Show prediction results of the detector. 29 | 30 | Args: 31 | data (str or np.ndarray): Image filename or loaded image. 32 | result (Tensor or tuple): The results to draw over `img` 33 | bbox_result or (bbox_result, segm_result). 34 | 35 | Returns: 36 | np.ndarray: The image with bboxes drawn on it. 37 | """ 38 | if self.with_mask: 39 | ms_bbox_result, ms_segm_result = result 40 | if isinstance(ms_bbox_result, dict): 41 | result = (ms_bbox_result['ensemble'], 42 | ms_segm_result['ensemble']) 43 | else: 44 | if isinstance(result, dict): 45 | result = result['ensemble'] 46 | return super(CascadeRCNN, self).show_result(data, result, **kwargs) 47 | -------------------------------------------------------------------------------- /flows/detectors/solo/solo_r50_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/coco_instance.py', 3 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='SOLO', 9 | backbone=dict( 10 | type='ResNet', 11 | depth=50, 12 | num_stages=4, 13 | out_indices=(0, 1, 2, 3), 14 | frozen_stages=1, 15 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), 16 | style='pytorch'), 17 | neck=dict( 18 | type='FPN', 19 | in_channels=[256, 512, 1024, 2048], 20 | out_channels=256, 21 | start_level=0, 22 | num_outs=5), 23 | mask_head=dict( 24 | type='SOLOHead', 25 | num_classes=80, 26 | in_channels=256, 27 | stacked_convs=7, 28 | feat_channels=256, 29 | strides=[8, 8, 16, 32, 32], 30 | scale_ranges=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)), 31 | pos_scale=0.2, 32 | num_grids=[40, 36, 24, 16, 12], 33 | cls_down_index=0, 34 | loss_mask=dict(type='DiceLoss', use_sigmoid=True, loss_weight=3.0), 35 | loss_cls=dict( 36 | type='FocalLoss', 37 | use_sigmoid=True, 38 | gamma=2.0, 39 | alpha=0.25, 40 | loss_weight=1.0), 41 | norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)), 42 | # model training and testing settings 43 | test_cfg=dict( 44 | nms_pre=500, 45 | score_thr=0.1, 46 | mask_thr=0.5, 47 | filter_thr=0.05, 48 | kernel='gaussian', # gaussian/linear 49 | sigma=2.0, 50 | max_per_img=100)) 51 | 52 | # optimizer 53 | optimizer = dict(type='SGD', lr=0.01) 54 | 55 | data = dict(workers_per_gpu=2,) 56 | -------------------------------------------------------------------------------- /flows/detectors3d/votenet/votenet_8x8_scannet-3d-18class.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/scannet-3d-18class.py', '../_base_/models/votenet.py', 3 | '../_base_/schedules/schedule_3x.py', '../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | bbox_head=dict( 9 | num_classes=18, 10 | bbox_coder=dict( 11 | type='PartialBinBasedBBoxCoder', 12 | num_sizes=18, 13 | num_dir_bins=1, 14 | with_rot=False, 15 | mean_sizes=[[0.76966727, 0.8116021, 0.92573744], 16 | [1.876858, 1.8425595, 1.1931566], 17 | [0.61328, 0.6148609, 0.7182701], 18 | [1.3955007, 1.5121545, 0.83443564], 19 | [0.97949594, 1.0675149, 0.6329687], 20 | [0.531663, 0.5955577, 1.7500148], 21 | [0.9624706, 0.72462326, 1.1481868], 22 | [0.83221924, 1.0490936, 1.6875663], 23 | [0.21132214, 0.4206159, 0.5372846], 24 | [1.4440073, 1.8970833, 0.26985747], 25 | [1.0294262, 1.4040797, 0.87554324], 26 | [1.3766412, 0.65521795, 1.6813129], 27 | [0.6650819, 0.71111923, 1.298853], 28 | [0.41999173, 0.37906948, 1.7513971], 29 | [0.59359556, 0.5912492, 0.73919016], 30 | [0.50867593, 0.50656086, 0.30136237], 31 | [1.1511526, 1.0546296, 0.49706793], 32 | [0.47535285, 0.49249494, 0.5802117]]))) 33 | 34 | # optimizer 35 | # yapf:disable 36 | log_config = dict( 37 | interval=30, 38 | hooks=[ 39 | dict(type='TextLoggerHook'), 40 | dict(type='TensorboardLoggerHook') 41 | ]) 42 | # yapf:enable 43 | -------------------------------------------------------------------------------- /rfvision/data/sunrgbd/matlab/extract_split.m: -------------------------------------------------------------------------------- 1 | % Modified from 2 | % https://github.com/facebookresearch/votenet/blob/master/sunrgbd/matlab/extract_split.m 3 | % Copyright (c) Facebook, Inc. and its affiliates. 4 | % 5 | % This source code is licensed under the MIT license found in the 6 | % LICENSE file in the root directory of this source tree. 7 | 8 | %% Dump train/val split. 9 | % Author: Charles R. Qi 10 | 11 | addpath('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox') 12 | 13 | %% Construct Hash Map 14 | hash_train = java.util.Hashtable; 15 | hash_val = java.util.Hashtable; 16 | 17 | split = load('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox/traintestSUNRGBD/allsplit.mat'); 18 | 19 | N_train = length(split.alltrain); 20 | N_val = length(split.alltest); 21 | 22 | for i = 1:N_train 23 | folder_path = split.alltrain{i}; 24 | folder_path(1:16) = ''; 25 | hash_train.put(folder_path,0); 26 | end 27 | for i = 1:N_val 28 | folder_path = split.alltest{i}; 29 | folder_path(1:16) = ''; 30 | hash_val.put(folder_path,0); 31 | end 32 | 33 | %% Map data to train or val set. 34 | load('../OFFICIAL_SUNRGBD/SUNRGBDMeta3DBB_v2.mat'); 35 | if exist('../sunrgbd_trainval','dir')==0 36 | mkdir('../sunrgbd_trainval'); 37 | end 38 | fid_train = fopen('../sunrgbd_trainval/train_data_idx.txt', 'w'); 39 | fid_val = fopen('../sunrgbd_trainval/val_data_idx.txt', 'w'); 40 | 41 | for imageId = 1:10335 42 | data = SUNRGBDMeta(imageId); 43 | depthpath = data.depthpath; 44 | depthpath(1:16) = ''; 45 | [filepath,name,ext] = fileparts(depthpath); 46 | [filepath,name,ext] = fileparts(filepath); 47 | if hash_train.containsKey(filepath) 48 | fprintf(fid_train, '%d\n', imageId); 49 | elseif hash_val.containsKey(filepath) 50 | fprintf(fid_val, '%d\n', imageId); 51 | else 52 | a = 1; 53 | end 54 | end 55 | fclose(fid_train); 56 | fclose(fid_val); 57 | -------------------------------------------------------------------------------- /flows/detectors/yolox/yolox_tiny_8x8_300e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './yolox_s_8x8_300e_coco.py' 2 | 3 | # model settings 4 | model = dict( 5 | random_size_range=(10, 20), 6 | backbone=dict(deepen_factor=0.33, widen_factor=0.375), 7 | neck=dict(in_channels=[96, 192, 384], out_channels=96), 8 | bbox_head=dict(in_channels=96, feat_channels=96)) 9 | 10 | img_scale = (640, 640) 11 | 12 | train_pipeline = [ 13 | dict(type='Mosaic', img_scale=img_scale, pad_val=114.0), 14 | dict( 15 | type='RandomAffine', 16 | scaling_ratio_range=(0.5, 1.5), 17 | border=(-img_scale[0] // 2, -img_scale[1] // 2)), 18 | dict(type='YOLOXHSVRandomAug'), 19 | dict(type='RandomFlip', flip_ratio=0.5), 20 | dict(type='Resize', img_scale=img_scale, keep_ratio=True), 21 | dict( 22 | type='Pad', 23 | pad_to_square=True, 24 | pad_val=dict(img=(114.0, 114.0, 114.0))), 25 | dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False), 26 | dict(type='DefaultFormatBundle'), 27 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) 28 | ] 29 | 30 | test_pipeline = [ 31 | dict(type='LoadImageFromFile'), 32 | dict( 33 | type='MultiScaleFlipAug', 34 | img_scale=(416, 416), 35 | flip=False, 36 | transforms=[ 37 | dict(type='Resize', keep_ratio=True), 38 | dict(type='RandomFlip'), 39 | dict( 40 | type='Pad', 41 | pad_to_square=True, 42 | pad_val=dict(img=(114.0, 114.0, 114.0))), 43 | dict(type='DefaultFormatBundle'), 44 | dict(type='Collect', keys=['img']) 45 | ]) 46 | ] 47 | 48 | train_dataset = dict(pipeline=train_pipeline) 49 | 50 | data = dict( 51 | train=train_dataset, 52 | val=dict(pipeline=test_pipeline), 53 | test=dict(pipeline=test_pipeline)) 54 | -------------------------------------------------------------------------------- /rfvision/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', eps=1e-6): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (bboxes1[:, 3] - bboxes1[:, 1]) 32 | area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (bboxes2[:, 3] - bboxes2[:, 1]) 33 | for i in range(bboxes1.shape[0]): 34 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 35 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 36 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 37 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 38 | overlap = np.maximum(x_end - x_start, 0) * np.maximum( 39 | y_end - y_start, 0) 40 | if mode == 'iou': 41 | union = area1[i] + area2 - overlap 42 | else: 43 | union = area1[i] if not exchange else area2 44 | union = np.maximum(union, eps) 45 | ious[i, :] = overlap / union 46 | if exchange: 47 | ious = ious.T 48 | return ious 49 | -------------------------------------------------------------------------------- /tests/data/coco_sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "images": [ 3 | { 4 | "file_name": "fake1.jpg", 5 | "height": 800, 6 | "width": 800, 7 | "id": 0 8 | }, 9 | { 10 | "file_name": "fake2.jpg", 11 | "height": 800, 12 | "width": 800, 13 | "id": 1 14 | }, 15 | { 16 | "file_name": "fake3.jpg", 17 | "height": 800, 18 | "width": 800, 19 | "id": 2 20 | } 21 | ], 22 | "annotations": [ 23 | { 24 | "bbox": [ 25 | 0, 26 | 0, 27 | 20, 28 | 20 29 | ], 30 | "area": 400.00, 31 | "score": 1.0, 32 | "category_id": 1, 33 | "id": 1, 34 | "image_id": 0 35 | }, 36 | { 37 | "bbox": [ 38 | 0, 39 | 0, 40 | 20, 41 | 20 42 | ], 43 | "area": 400.00, 44 | "score": 1.0, 45 | "category_id": 2, 46 | "id": 2, 47 | "image_id": 0 48 | }, 49 | { 50 | "bbox": [ 51 | 0, 52 | 0, 53 | 20, 54 | 20 55 | ], 56 | "area": 400.00, 57 | "score": 1.0, 58 | "category_id": 1, 59 | "id": 3, 60 | "image_id": 1 61 | } 62 | ], 63 | "categories": [ 64 | { 65 | "id": 1, 66 | "name": "bus", 67 | "supercategory": "none" 68 | }, 69 | { 70 | "id": 2, 71 | "name": "car", 72 | "supercategory": "none" 73 | } 74 | ], 75 | "licenses": [], 76 | "info": null 77 | } 78 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/mstrain-poly_3x_coco_instance.py', 3 | '../_base_/models/mask_rcnn_r50_fpn.py' 4 | ] 5 | 6 | model = dict( 7 | init_cfg='open-mmlab://detectron2/resnet101_caffe', 8 | backbone=dict( 9 | depth=101, 10 | norm_cfg=dict(requires_grad=False), 11 | norm_eval=True, 12 | style='caffe')) 13 | # use caffe img_norm 14 | img_norm_cfg = dict( 15 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 16 | train_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='LoadAnnotations', 20 | with_bbox=True, 21 | with_mask=True, 22 | poly2mask=False), 23 | dict( 24 | type='Resize', 25 | img_scale=[(1333, 640), (1333, 800)], 26 | multiscale_mode='range', 27 | keep_ratio=True), 28 | dict(type='RandomFlip', flip_ratio=0.5), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='Pad', size_divisor=32), 31 | dict(type='DefaultFormatBundle'), 32 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 33 | ] 34 | test_pipeline = [ 35 | dict(type='LoadImageFromFile'), 36 | dict( 37 | type='MultiScaleFlipAug', 38 | img_scale=(1333, 800), 39 | flip=False, 40 | transforms=[ 41 | dict(type='Resize', keep_ratio=True), 42 | dict(type='RandomFlip'), 43 | dict(type='Normalize', **img_norm_cfg), 44 | dict(type='Pad', size_divisor=32), 45 | dict(type='ImageToTensor', keys=['img']), 46 | dict(type='Collect', keys=['img']), 47 | ]) 48 | ] 49 | 50 | data = dict( 51 | train=dict(dataset=dict(pipeline=train_pipeline)), 52 | val=dict(pipeline=test_pipeline), 53 | test=dict(pipeline=test_pipeline)) 54 | -------------------------------------------------------------------------------- /tests/test_models/test_dense_heads/test_dense_heads_attr.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from terminaltables import AsciiTable 4 | 5 | from rfvision.models import dense_heads 6 | from rfvision.models.dense_heads import * # noqa: F401,F403 7 | 8 | 9 | def test_dense_heads_test_attr(): 10 | """Tests inference methods such as simple_test and aug_test.""" 11 | # make list of dense heads 12 | exceptions = ['FeatureAdaption'] # module used in head 13 | all_dense_heads = [m for m in dense_heads.__all__ if m not in exceptions] 14 | 15 | # search attributes 16 | check_attributes = [ 17 | 'simple_test', 'aug_test', 'simple_test_bboxes', 'simple_test_rpn', 18 | 'aug_test_rpn' 19 | ] 20 | table_header = ['head name'] + check_attributes 21 | table_data = [table_header] 22 | not_found = {k: [] for k in check_attributes} 23 | for target_head_name in all_dense_heads: 24 | target_head = globals()[target_head_name] 25 | target_head_attributes = dir(target_head) 26 | check_results = [target_head_name] 27 | for check_attribute in check_attributes: 28 | found = check_attribute in target_head_attributes 29 | check_results.append(found) 30 | if not found: 31 | not_found[check_attribute].append(target_head_name) 32 | table_data.append(check_results) 33 | table = AsciiTable(table_data) 34 | print() 35 | print(table.table) 36 | 37 | # NOTE: this test just checks attributes. 38 | # simple_test of RPN heads will not work now. 39 | assert len(not_found['simple_test']) == 0, \ 40 | f'simple_test not found in {not_found["simple_test"]}' 41 | if len(not_found['aug_test']) != 0: 42 | warnings.warn(f'aug_test not found in {not_found["aug_test"]}. ' 43 | 'Please implement it or raise NotImplementedError.') 44 | -------------------------------------------------------------------------------- /rfvision/components/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import Accuracy, accuracy 2 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, 3 | cross_entropy, mask_cross_entropy) 4 | from .focal_loss import FocalLoss, sigmoid_focal_loss 5 | from .ghm_loss import GHMC, GHMR 6 | from .iou_loss import (BoundedIoULoss, CIoULoss, DIoULoss, GIoULoss, IoULoss, 7 | bounded_iou_loss, iou_loss) 8 | from .mse_loss import MSELoss, mse_loss, JointsMSELoss 9 | from .smooth_l1_loss import L1Loss, SmoothL1Loss, l1_loss, smooth_l1_loss 10 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 11 | 12 | from .arti_loss import ArtiNOCSLoss, ArtiMIoULoss, ArtiVECTLoss 13 | from .cosine_simlarity_loss import CosineSimilarityLoss, cosine_similarity_loss 14 | from .axis_aligned_iou_loss import AxisAlignedIoULoss, axis_aligned_iou_loss 15 | from .chamfer_distance import ChamferDistance, chamfer_distance 16 | from rfvision.components.losses_pose.regression_loss import L1LossPose 17 | from .dice_loss import DiceLoss 18 | 19 | # TODO: remove FocalLossSOLO 20 | from .sigmoid_focal_loss_solo import FocalLossSOLO 21 | 22 | __all__ = [ 23 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 24 | 'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss', 25 | 'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss', 26 | 'IoULoss', 'BoundedIoULoss', 'GIoULoss', 'DIoULoss', 'CIoULoss', 'GHMC', 27 | 'GHMR', 'reduce_loss', 'weight_reduce_loss', 'weighted_loss', 'L1Loss', 28 | 'l1_loss', 'JointsMSELoss', 29 | 'ArtiVECTLoss', 'ArtiNOCSLoss', 'ArtiMIoULoss', 30 | 'CosineSimilarityLoss', 'cosine_similarity_loss', 'ChamferDistance', 31 | 'chamfer_distance', 'axis_aligned_iou_loss', 'AxisAlignedIoULoss', 32 | 'L1LossPose', 'DiceLoss', 'FocalLossSOLO' 33 | ] 34 | -------------------------------------------------------------------------------- /rfvision/data/scannet/meta_data/scannetv2_test.txt: -------------------------------------------------------------------------------- 1 | scene0707_00 2 | scene0708_00 3 | scene0709_00 4 | scene0710_00 5 | scene0711_00 6 | scene0712_00 7 | scene0713_00 8 | scene0714_00 9 | scene0715_00 10 | scene0716_00 11 | scene0717_00 12 | scene0718_00 13 | scene0719_00 14 | scene0720_00 15 | scene0721_00 16 | scene0722_00 17 | scene0723_00 18 | scene0724_00 19 | scene0725_00 20 | scene0726_00 21 | scene0727_00 22 | scene0728_00 23 | scene0729_00 24 | scene0730_00 25 | scene0731_00 26 | scene0732_00 27 | scene0733_00 28 | scene0734_00 29 | scene0735_00 30 | scene0736_00 31 | scene0737_00 32 | scene0738_00 33 | scene0739_00 34 | scene0740_00 35 | scene0741_00 36 | scene0742_00 37 | scene0743_00 38 | scene0744_00 39 | scene0745_00 40 | scene0746_00 41 | scene0747_00 42 | scene0748_00 43 | scene0749_00 44 | scene0750_00 45 | scene0751_00 46 | scene0752_00 47 | scene0753_00 48 | scene0754_00 49 | scene0755_00 50 | scene0756_00 51 | scene0757_00 52 | scene0758_00 53 | scene0759_00 54 | scene0760_00 55 | scene0761_00 56 | scene0762_00 57 | scene0763_00 58 | scene0764_00 59 | scene0765_00 60 | scene0766_00 61 | scene0767_00 62 | scene0768_00 63 | scene0769_00 64 | scene0770_00 65 | scene0771_00 66 | scene0772_00 67 | scene0773_00 68 | scene0774_00 69 | scene0775_00 70 | scene0776_00 71 | scene0777_00 72 | scene0778_00 73 | scene0779_00 74 | scene0780_00 75 | scene0781_00 76 | scene0782_00 77 | scene0783_00 78 | scene0784_00 79 | scene0785_00 80 | scene0786_00 81 | scene0787_00 82 | scene0788_00 83 | scene0789_00 84 | scene0790_00 85 | scene0791_00 86 | scene0792_00 87 | scene0793_00 88 | scene0794_00 89 | scene0795_00 90 | scene0796_00 91 | scene0797_00 92 | scene0798_00 93 | scene0799_00 94 | scene0800_00 95 | scene0801_00 96 | scene0802_00 97 | scene0803_00 98 | scene0804_00 99 | scene0805_00 100 | scene0806_00 101 | -------------------------------------------------------------------------------- /flows/detectors/_base_/datasets/coco_detection.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CocoDataset' 3 | data_root = 'data/coco/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True), 9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(1333, 800), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type=dataset_type, 36 | ann_file=data_root + 'annotations/instances_val2017.json', 37 | img_prefix=data_root + 'val2017/', 38 | pipeline=train_pipeline), 39 | val=dict( 40 | type=dataset_type, 41 | ann_file=data_root + 'annotations/instances_val2017.json', 42 | img_prefix=data_root + 'val2017/', 43 | pipeline=test_pipeline), 44 | test=dict( 45 | type=dataset_type, 46 | ann_file=data_root + 'annotations/instances_val2017.json', 47 | img_prefix=data_root + 'val2017/', 48 | pipeline=test_pipeline)) 49 | evaluation = dict(interval=1, metric='bbox') 50 | -------------------------------------------------------------------------------- /rfvision/components/utils/mlp.py: -------------------------------------------------------------------------------- 1 | from rflib.cnn import ConvModule 2 | from torch import nn as nn 3 | from rflib.runner import BaseModule 4 | 5 | 6 | class MLP(BaseModule): 7 | """A simple MLP module. 8 | 9 | Pass features (B, C, N) through an MLP. 10 | 11 | Args: 12 | in_channels (int): Number of channels of input features. 13 | Default: 18. 14 | conv_channels (tuple[int]): Out channels of the convolution. 15 | Default: (256, 256). 16 | conv_cfg (dict): Config of convolution. 17 | Default: dict(type='Conv1d'). 18 | norm_cfg (dict): Config of normalization. 19 | Default: dict(type='BN1d'). 20 | act_cfg (dict): Config of activation. 21 | Default: dict(type='ReLU'). 22 | """ 23 | 24 | def __init__(self, 25 | in_channel=18, 26 | conv_channels=(256, 256), 27 | conv_cfg=dict(type='Conv1d'), 28 | norm_cfg=dict(type='BN1d'), 29 | act_cfg=dict(type='ReLU'), 30 | bias='auto', 31 | init_cfg=None): 32 | super().__init__(init_cfg=init_cfg) 33 | self.mlp = nn.Sequential() 34 | prev_channels = in_channel 35 | for i, conv_channel in enumerate(conv_channels): 36 | self.mlp.add_module( 37 | f'layer{i}', 38 | ConvModule( 39 | prev_channels, 40 | conv_channels[i], 41 | 1, 42 | padding=0, 43 | conv_cfg=conv_cfg, 44 | norm_cfg=norm_cfg, 45 | act_cfg=act_cfg, 46 | bias=bias, 47 | inplace=True)) 48 | prev_channels = conv_channels[i] 49 | 50 | def forward(self, img_features): 51 | return self.mlp(img_features) 52 | 53 | 54 | if __name__ == '__main__': 55 | m = MLP() 56 | -------------------------------------------------------------------------------- /rfvision/core/hook/sync_norm_hook.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from collections import OrderedDict 3 | 4 | from rflib.runner import get_dist_info 5 | from rflib.runner.hooks import HOOKS, Hook 6 | from torch import nn 7 | 8 | from ..utils.dist_utils import all_reduce_dict 9 | 10 | 11 | def get_norm_states(module): 12 | async_norm_states = OrderedDict() 13 | for name, child in module.named_modules(): 14 | if isinstance(child, nn.modules.batchnorm._NormBase): 15 | for k, v in child.state_dict().items(): 16 | async_norm_states['.'.join([name, k])] = v 17 | return async_norm_states 18 | 19 | 20 | @HOOKS.register_module() 21 | class SyncNormHook(Hook): 22 | """Synchronize Norm states after training epoch, currently used in YOLOX. 23 | 24 | Args: 25 | num_last_epochs (int): The number of latter epochs in the end of the 26 | training to switch to synchronizing norm interval. Default: 15. 27 | interval (int): Synchronizing norm interval. Default: 1. 28 | """ 29 | 30 | def __init__(self, num_last_epochs=15, interval=1): 31 | self.interval = interval 32 | self.num_last_epochs = num_last_epochs 33 | 34 | def before_train_epoch(self, runner): 35 | epoch = runner.epoch 36 | if (epoch + 1) == runner.max_epochs - self.num_last_epochs: 37 | # Synchronize norm every epoch. 38 | self.interval = 1 39 | 40 | def after_train_epoch(self, runner): 41 | """Synchronizing norm.""" 42 | epoch = runner.epoch 43 | module = runner.model 44 | if (epoch + 1) % self.interval == 0: 45 | _, world_size = get_dist_info() 46 | if world_size == 1: 47 | return 48 | norm_states = get_norm_states(module) 49 | norm_states = all_reduce_dict(norm_states, op='mean') 50 | module.load_state_dict(norm_states, strict=False) 51 | -------------------------------------------------------------------------------- /flows/detectors/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CocoDataset' 3 | data_root = 'data/coco/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(1333, 800), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type=dataset_type, 36 | ann_file=data_root + 'annotations/instances_train2017.json', 37 | img_prefix=data_root + 'train2017/', 38 | pipeline=train_pipeline), 39 | val=dict( 40 | type=dataset_type, 41 | ann_file=data_root + 'annotations/instances_val2017.json', 42 | img_prefix=data_root + 'val2017/', 43 | pipeline=test_pipeline), 44 | test=dict( 45 | type=dataset_type, 46 | ann_file=data_root + 'annotations/instances_val2017.json', 47 | img_prefix=data_root + 'val2017/', 48 | pipeline=test_pipeline)) 49 | evaluation = dict(metric=['bbox', 'segm']) 50 | -------------------------------------------------------------------------------- /flows/human_analyzers/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.md: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | SimpleBaseline3D (ICCV'2017) 5 | 6 | ```bibtex 7 | @inproceedings{martinez_2017_3dbaseline, 8 | title={A simple yet effective baseline for 3d human pose estimation}, 9 | author={Martinez, Julieta and Hossain, Rayat and Romero, Javier and Little, James J.}, 10 | booktitle={ICCV}, 11 | year={2017} 12 | } 13 | ``` 14 | 15 |
16 | 17 | 18 | 19 |
20 | Human3.6M (TPAMI'2014) 21 | 22 | ```bibtex 23 | @article{h36m_pami, 24 | author = {Ionescu, Catalin and Papava, Dragos and Olaru, Vlad and Sminchisescu, Cristian}, 25 | title = {Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments}, 26 | journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, 27 | publisher = {IEEE Computer Society}, 28 | volume = {36}, 29 | number = {7}, 30 | pages = {1325-1339}, 31 | month = {jul}, 32 | year = {2014} 33 | } 34 | ``` 35 | 36 |
37 | 38 | Results on Human3.6M dataset with ground truth 2D detections 39 | 40 | | Arch | MPJPE | P-MPJPE | ckpt | log | 41 | | :--- | :---: | :---: | :---: | :---: | 42 | | [simple_baseline_3d_tcn1](/configs/body/3d_kpt_sview_rgb_img/pose_lift/h36m/simplebaseline3d_h36m.py) | 43.4 | 34.3 | [ckpt](https://download.openmmlab.com/mmpose/body3d/simple_baseline/simple3Dbaseline_h36m-f0ad73a4_20210419.pth) | [log](https://download.openmmlab.com/mmpose/body3d/simple_baseline/20210415_065056.log.json) | 43 | 44 | 1 Differing from the original paper, we didn't apply the `max-norm constraint` because we found this led to a better convergence and performance. 45 | -------------------------------------------------------------------------------- /rfvision/datasets/shapenet_v2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | import numpy as np 4 | from . import DATASETS 5 | from .pipelines import Compose 6 | from .custom3d import Custom3DDataset 7 | 8 | @DATASETS.register_module() 9 | class ShapeNetCoreV2HDF5(Custom3DDataset): 10 | def __init__(self, 11 | data_root, 12 | pipeline, 13 | split='all', 14 | test_mode=False): 15 | assert split in ['all', 'test', 'val', 'train'] 16 | with open(os.path.join(data_root, 'shape_names.txt')) as f: 17 | self.CLASSES = f.readlines() 18 | self.points, self.labels = (), () 19 | for filename in os.listdir(data_root): 20 | if filename.endswith('.h5') and (split in filename or split == 'all'): 21 | f = h5py.File(os.path.join(data_root, filename), 'r') 22 | points = np.array(f['data']) 23 | labels = np.array(f['label']) 24 | self.points += (points,) 25 | self.labels += (labels,) 26 | f.close() 27 | else: 28 | continue 29 | self.points = np.concatenate(self.points) 30 | self.labels = np.concatenate(self.labels) 31 | self.labels_onehot = np.eye(len(self.CLASSES))[self.labels.flatten()] 32 | 33 | self._set_group_flag() 34 | self.pipeline = Compose(pipeline) 35 | 36 | def __len__(self,): 37 | return len(self.points) 38 | 39 | def __getitem__(self, index): 40 | results = {'points':self.points[index], 41 | 'labels':self.labels[index], 42 | 'labels_onehot':self.labels_onehot[index] 43 | } 44 | results = self.pipeline(results) 45 | return results 46 | 47 | def _set_group_flag(self): 48 | self.flag = np.zeros(len(self), dtype=np.uint8) 49 | 50 | 51 | -------------------------------------------------------------------------------- /flows/detectors/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'fcos_r50_caffe_fpn_gn-head_1x_coco.py' 2 | 3 | model = dict( 4 | init_cfg='open-mmlab://detectron2/resnet50_caffe', 5 | bbox_head=dict( 6 | norm_on_bbox=True, 7 | centerness_on_reg=True, 8 | dcn_on_last_conv=False, 9 | center_sampling=True, 10 | conv_bias=True, 11 | loss_bbox=dict(type='GIoULoss', loss_weight=1.0)), 12 | # training and testing settings 13 | test_cfg=dict(nms=dict(type='nms', iou_threshold=0.6))) 14 | 15 | # dataset settings 16 | img_norm_cfg = dict( 17 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 18 | train_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict(type='LoadAnnotations', with_bbox=True), 21 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 22 | dict(type='RandomFlip', flip_ratio=0.5), 23 | dict(type='Normalize', **img_norm_cfg), 24 | dict(type='Pad', size_divisor=32), 25 | dict(type='DefaultFormatBundle'), 26 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 27 | ] 28 | test_pipeline = [ 29 | dict(type='LoadImageFromFile'), 30 | dict( 31 | type='MultiScaleFlipAug', 32 | img_scale=(1333, 800), 33 | flip=False, 34 | transforms=[ 35 | dict(type='Resize', keep_ratio=True), 36 | dict(type='RandomFlip'), 37 | dict(type='Normalize', **img_norm_cfg), 38 | dict(type='Pad', size_divisor=32), 39 | dict(type='ImageToTensor', keys=['img']), 40 | dict(type='Collect', keys=['img']), 41 | ]) 42 | ] 43 | data = dict( 44 | samples_per_gpu=2, 45 | workers_per_gpu=2, 46 | train=dict(pipeline=train_pipeline), 47 | val=dict(pipeline=test_pipeline), 48 | test=dict(pipeline=test_pipeline)) 49 | optimizer_config = dict(_delete_=True, grad_clip=None) 50 | 51 | lr_config = dict(warmup='linear') 52 | -------------------------------------------------------------------------------- /rfvision/data/alfred/load_alfred_data.py: -------------------------------------------------------------------------------- 1 | # Modified from 2 | # https://github.com/facebookresearch/votenet/blob/master/scannet/load_scannet_data.py 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | # 5 | # This source code is licensed under the MIT license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | """ 8 | Load Alfred scenes with vertices and ground truth labels for semantic and 9 | instance segmentations. 10 | """ 11 | 12 | import numpy as np 13 | 14 | def export(pc, abs_dir, output_file=None, need_rgb=True, test_mode=False): 15 | """Export original files to vert, ins_label, sem_label and bbox file. 16 | 17 | Args: 18 | mesh_file (str): Path of the mesh_file. 19 | output_file (str): Path of the output folder. 20 | Default: None. 21 | need_rgb (str): Whether rgb data is needed. 22 | Default: True. 23 | test_mode (bool): Whether is generating test data without labels. 24 | Default: False. 25 | 26 | It returns a tuple, which containts the the following things: 27 | np.ndarray: Vertices of points data. 28 | np.ndarray: Instance bboxes. 29 | """ 30 | 31 | if need_rgb: 32 | mesh_vertices = pc.get_point_cloud(abs_dir) 33 | else: 34 | mesh_vertices = pc.get_point_cloud_norgb(abs_dir) 35 | 36 | bboxes = pc.get_3d_bbox(abs_dir) 37 | bboxes_arr = [] 38 | for bbox in bboxes: 39 | bboxes_arr.append(bboxes[bbox]) 40 | bbox = np.array(bboxes_arr) 41 | obj = np.array(pc.get_object(abs_dir)) 42 | assert bbox.shape[0] == obj.shape[0] 43 | 44 | assert np.isnan(bbox).any() == False 45 | assert np.isnan(mesh_vertices).any() == False 46 | 47 | if output_file is not None: 48 | np.save(output_file + '_vert.npy', mesh_vertices) 49 | np.save(output_file + '_bbox.npy', bbox) 50 | np.save(output_file + '_label.npy', obj) 51 | 52 | return mesh_vertices, bboxes, obj 53 | -------------------------------------------------------------------------------- /rfvision/core/utils_pose/dist_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from collections import OrderedDict 3 | 4 | import torch.distributed as dist 5 | from torch._utils import (_flatten_dense_tensors, _take_tensors, 6 | _unflatten_dense_tensors) 7 | 8 | 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 10 | """Allreduce parameters as a whole.""" 11 | if bucket_size_mb > 0: 12 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 13 | buckets = _take_tensors(tensors, bucket_size_bytes) 14 | else: 15 | buckets = OrderedDict() 16 | for tensor in tensors: 17 | tp = tensor.type() 18 | if tp not in buckets: 19 | buckets[tp] = [] 20 | buckets[tp].append(tensor) 21 | buckets = buckets.values() 22 | 23 | for bucket in buckets: 24 | flat_tensors = _flatten_dense_tensors(bucket) 25 | dist.all_reduce(flat_tensors) 26 | flat_tensors.div_(world_size) 27 | for tensor, synced in zip( 28 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 29 | tensor.copy_(synced) 30 | 31 | 32 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): 33 | """Allreduce gradients. 34 | 35 | Args: 36 | params (list[torch.Parameters]): List of parameters of a model 37 | coalesce (bool, optional): Whether allreduce parameters as a whole. 38 | Default: True. 39 | bucket_size_mb (int, optional): Size of bucket, the unit is MB. 40 | Default: -1. 41 | """ 42 | grads = [ 43 | param.grad.data for param in params 44 | if param.requires_grad and param.grad is not None 45 | ] 46 | world_size = dist.get_world_size() 47 | if coalesce: 48 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 49 | else: 50 | for tensor in grads: 51 | dist.all_reduce(tensor.div_(world_size)) 52 | -------------------------------------------------------------------------------- /flows/detectors/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r101_fpn_1x_coco.py' 2 | model = dict( 3 | init_cfg='open-mmlab://detectron2/resnext101_32x8d', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=32, 8 | base_width=8, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=False), 13 | style='pytorch')) 14 | 15 | dataset_type = 'CocoDataset' 16 | data_root = 'data/coco/' 17 | img_norm_cfg = dict( 18 | mean=[103.530, 116.280, 123.675], 19 | std=[57.375, 57.120, 58.395], 20 | to_rgb=False) 21 | train_pipeline = [ 22 | dict(type='LoadImageFromFile'), 23 | dict( 24 | type='LoadAnnotations', 25 | with_bbox=True, 26 | with_mask=True, 27 | poly2mask=False), 28 | dict( 29 | type='Resize', 30 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 31 | (1333, 768), (1333, 800)], 32 | multiscale_mode='value', 33 | keep_ratio=True), 34 | dict(type='RandomFlip', flip_ratio=0.5), 35 | dict(type='Normalize', **img_norm_cfg), 36 | dict(type='Pad', size_divisor=32), 37 | dict(type='DefaultFormatBundle'), 38 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 39 | ] 40 | test_pipeline = [ 41 | dict(type='LoadImageFromFile'), 42 | dict( 43 | type='MultiScaleFlipAug', 44 | img_scale=(1333, 800), 45 | flip=False, 46 | transforms=[ 47 | dict(type='Resize', keep_ratio=True), 48 | dict(type='RandomFlip'), 49 | dict(type='Normalize', **img_norm_cfg), 50 | dict(type='Pad', size_divisor=32), 51 | dict(type='ImageToTensor', keys=['img']), 52 | dict(type='Collect', keys=['img']), 53 | ]) 54 | ] 55 | data = dict( 56 | train=dict(pipeline=train_pipeline), 57 | val=dict(pipeline=test_pipeline), 58 | test=dict(pipeline=test_pipeline)) 59 | -------------------------------------------------------------------------------- /rfvision/tools/misc/print_config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import warnings 3 | 4 | from rflib import Config, DictAction 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser(description='Print the whole config') 9 | parser.add_argument('config', help='config file path') 10 | parser.add_argument( 11 | '--options', 12 | nargs='+', 13 | action=DictAction, 14 | help='override some settings in the used config, the key-value pair ' 15 | 'in xxx=yyy format will be merged into config file (deprecate), ' 16 | 'change to --cfg-options instead.') 17 | parser.add_argument( 18 | '--cfg-options', 19 | nargs='+', 20 | action=DictAction, 21 | help='override some settings in the used config, the key-value pair ' 22 | 'in xxx=yyy format will be merged into config file. If the value to ' 23 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 24 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 25 | 'Note that the quotation marks are necessary and that no white space ' 26 | 'is allowed.') 27 | args = parser.parse_args() 28 | 29 | if args.options and args.cfg_options: 30 | raise ValueError( 31 | '--options and --cfg-options cannot be both ' 32 | 'specified, --options is deprecated in favor of --cfg-options') 33 | if args.options: 34 | warnings.warn('--options is deprecated in favor of --cfg-options') 35 | args.cfg_options = args.options 36 | 37 | return args 38 | 39 | 40 | def main(): 41 | args = parse_args() 42 | 43 | cfg = Config.fromfile(args.config) 44 | if args.cfg_options is not None: 45 | cfg.merge_from_dict(args.cfg_options) 46 | # import modules from string list. 47 | if cfg.get('custom_imports', None): 48 | from rflib.utils import import_modules_from_strings 49 | import_modules_from_strings(**cfg['custom_imports']) 50 | print(f'Config:\n{cfg.pretty_text}') 51 | 52 | 53 | if __name__ == '__main__': 54 | main() 55 | -------------------------------------------------------------------------------- /rfvision/data/sunrgbd/matlab/extract_rgbd_data_v1.m: -------------------------------------------------------------------------------- 1 | % Copyright (c) Facebook, Inc. and its affiliates. 2 | % 3 | % This source code is licensed under the MIT license found in the 4 | % LICENSE file in the root directory of this source tree. 5 | 6 | %% Dump SUNRGBD data to our format 7 | % for each sample, we have RGB image, 2d boxes. 8 | % point cloud (in camera coordinate), calibration and 3d boxes. 9 | % 10 | % Extract using V1 labels. 11 | % 12 | % Author: Charles R. Qi 13 | % 14 | clear; close all; clc; 15 | addpath(genpath('.')) 16 | addpath('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox') 17 | %% V1 2D&3D BB and Seg masks 18 | load('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox/Metadata/SUNRGBDMeta.mat') 19 | % load('./Metadata/SUNRGBD2Dseg.mat') 20 | 21 | %% Create folders 22 | det_label_folder = '../sunrgbd_trainval/label_v1/'; 23 | mkdir(det_label_folder); 24 | %% Read 25 | for imageId = 1:10335 26 | imageId 27 | try 28 | data = SUNRGBDMeta(imageId); 29 | data.depthpath(1:16) = ''; 30 | data.depthpath = strcat('../OFFICIAL_SUNRGBD/SUNRGBD', data.depthpath); 31 | data.rgbpath(1:16) = ''; 32 | data.rgbpath = strcat('../OFFICIAL_SUNRGBD/SUNRGBD', data.rgbpath); 33 | 34 | % MAT files are 3x smaller than TXT files. In Python we can use 35 | % scipy.io.loadmat('xxx.mat')['points3d_rgb'] to load the data. 36 | mat_filename = strcat(num2str(imageId,'%06d'), '.mat'); 37 | txt_filename = strcat(num2str(imageId,'%06d'), '.txt'); 38 | 39 | % Write 2D and 3D box label 40 | data2d = data; 41 | fid = fopen(strcat(det_label_folder, txt_filename), 'w'); 42 | for j = 1:length(data.groundtruth3DBB) 43 | centroid = data.groundtruth3DBB(j).centroid; 44 | classname = data.groundtruth3DBB(j).classname; 45 | orientation = data.groundtruth3DBB(j).orientation; 46 | coeffs = abs(data.groundtruth3DBB(j).coeffs); 47 | box2d = data2d.groundtruth2DBB(j).gtBb2D; 48 | fprintf(fid, '%s %d %d %d %d %f %f %f %f %f %f %f %f\n', classname, box2d(1), box2d(2), box2d(3), box2d(4), centroid(1), centroid(2), centroid(3), coeffs(1), coeffs(2), coeffs(3), orientation(1), orientation(2)); 49 | end 50 | fclose(fid); 51 | 52 | catch 53 | end 54 | 55 | end 56 | --------------------------------------------------------------------------------