├── .flake8
├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── README_CN.md
├── autonomous_driving
    ├── Online-HD-Map-Construction
    │   ├── LICENSE
    │   ├── README.md
    │   ├── src
    │   │   ├── __init__.py
    │   │   ├── configs
    │   │   │   ├── _base_
    │   │   │   │   ├── datasets
    │   │   │   │   │   ├── coco_instance.py
    │   │   │   │   │   ├── kitti-3d-3class.py
    │   │   │   │   │   ├── kitti-3d-car.py
    │   │   │   │   │   ├── lyft-3d.py
    │   │   │   │   │   ├── nuim_instance.py
    │   │   │   │   │   ├── nus-3d.py
    │   │   │   │   │   ├── nus-mono3d.py
    │   │   │   │   │   ├── range100_lyft-3d.py
    │   │   │   │   │   ├── s3dis_seg-3d-13class.py
    │   │   │   │   │   ├── scannet-3d-18class.py
    │   │   │   │   │   ├── scannet_seg-3d-20class.py
    │   │   │   │   │   ├── sunrgbd-3d-10class.py
    │   │   │   │   │   ├── waymoD5-3d-3class.py
    │   │   │   │   │   └── waymoD5-3d-car.py
    │   │   │   │   ├── default_runtime.py
    │   │   │   │   ├── models
    │   │   │   │   │   ├── 3dssd.py
    │   │   │   │   │   ├── cascade_mask_rcnn_r50_fpn.py
    │   │   │   │   │   ├── centerpoint_01voxel_second_secfpn_nus.py
    │   │   │   │   │   ├── centerpoint_02pillar_second_secfpn_nus.py
    │   │   │   │   │   ├── fcos3d.py
    │   │   │   │   │   ├── groupfree3d.py
    │   │   │   │   │   ├── h3dnet.py
    │   │   │   │   │   ├── hv_pointpillars_fpn_lyft.py
    │   │   │   │   │   ├── hv_pointpillars_fpn_nus.py
    │   │   │   │   │   ├── hv_pointpillars_fpn_range100_lyft.py
    │   │   │   │   │   ├── hv_pointpillars_secfpn_kitti.py
    │   │   │   │   │   ├── hv_pointpillars_secfpn_waymo.py
    │   │   │   │   │   ├── hv_second_secfpn_kitti.py
    │   │   │   │   │   ├── hv_second_secfpn_waymo.py
    │   │   │   │   │   ├── imvotenet_image.py
    │   │   │   │   │   ├── mask_rcnn_r50_fpn.py
    │   │   │   │   │   ├── paconv_cuda_ssg.py
    │   │   │   │   │   ├── paconv_ssg.py
    │   │   │   │   │   ├── parta2.py
    │   │   │   │   │   ├── pointnet2_msg.py
    │   │   │   │   │   ├── pointnet2_ssg.py
    │   │   │   │   │   └── votenet.py
    │   │   │   │   └── schedules
    │   │   │   │   │   ├── cosine.py
    │   │   │   │   │   ├── cyclic_20e.py
    │   │   │   │   │   ├── cyclic_40e.py
    │   │   │   │   │   ├── mmdet_schedule_1x.py
    │   │   │   │   │   ├── schedule_2x.py
    │   │   │   │   │   ├── schedule_3x.py
    │   │   │   │   │   ├── seg_cosine_150e.py
    │   │   │   │   │   ├── seg_cosine_200e.py
    │   │   │   │   │   └── seg_cosine_50e.py
    │   │   │   ├── vectormapnet.py
    │   │   │   └── vectormapnet_intern.py
    │   │   ├── datasets
    │   │   │   ├── __init__.py
    │   │   │   ├── argo_dataset.py
    │   │   │   ├── base_dataset.py
    │   │   │   ├── evaluation
    │   │   │   │   ├── AP.py
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── distance.py
    │   │   │   │   └── vector_eval.py
    │   │   │   └── pipelines
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── formating.py
    │   │   │   │   ├── loading.py
    │   │   │   │   ├── poly_bbox.py
    │   │   │   │   ├── transform.py
    │   │   │   │   └── vectorize.py
    │   │   └── models
    │   │   │   ├── __init__.py
    │   │   │   ├── assigner
    │   │   │       ├── __init__.py
    │   │   │       ├── assigner.py
    │   │   │       └── match_cost.py
    │   │   │   ├── augmentation
    │   │   │       ├── __init__.py
    │   │   │       └── sythesis_det.py
    │   │   │   ├── backbones
    │   │   │       ├── __init__.py
    │   │   │       ├── internimage.py
    │   │   │       └── ipm_backbone.py
    │   │   │   ├── heads
    │   │   │       ├── __init__.py
    │   │   │       ├── base_map_head.py
    │   │   │       ├── detgen_utils
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── causal_trans.py
    │   │   │       │   └── utils.py
    │   │   │       ├── detr_bbox.py
    │   │   │       ├── detr_head.py
    │   │   │       ├── dg_head.py
    │   │   │       ├── map_element_detector.py
    │   │   │       └── polyline_generator.py
    │   │   │   ├── losses
    │   │   │       ├── __init__.py
    │   │   │       └── detr_loss.py
    │   │   │   ├── mapers
    │   │   │       ├── __init__.py
    │   │   │       ├── base_mapper.py
    │   │   │       └── vectormapnet.py
    │   │   │   └── transformer_utils
    │   │   │       ├── __init__.py
    │   │   │       ├── base_transformer.py
    │   │   │       ├── deformable_transformer.py
    │   │   │       └── fp16_dattn.py
    │   └── tools
    │   │   ├── dist_test.sh
    │   │   ├── dist_train.sh
    │   │   ├── evaluate_submission.py
    │   │   ├── mmdet_test.py
    │   │   ├── mmdet_train.py
    │   │   ├── test.py
    │   │   ├── train.py
    │   │   └── visualization
    │   │       ├── renderer.py
    │   │       └── visualize.py
    ├── README.md
    ├── occupancy_prediction
    │   ├── CITATION.cff
    │   ├── CODE_OF_CONDUCT.md
    │   ├── LICENSE
    │   ├── README.md
    │   ├── docs
    │   │   └── getting_started.md
    │   ├── projects
    │   │   ├── __init__.py
    │   │   ├── configs
    │   │   │   ├── _base_
    │   │   │   │   ├── datasets
    │   │   │   │   │   ├── coco_instance.py
    │   │   │   │   │   ├── kitti-3d-3class.py
    │   │   │   │   │   ├── kitti-3d-car.py
    │   │   │   │   │   ├── lyft-3d.py
    │   │   │   │   │   ├── nuim_instance.py
    │   │   │   │   │   ├── nus-3d.py
    │   │   │   │   │   ├── nus-mono3d.py
    │   │   │   │   │   ├── range100_lyft-3d.py
    │   │   │   │   │   ├── s3dis-3d-5class.py
    │   │   │   │   │   ├── s3dis_seg-3d-13class.py
    │   │   │   │   │   ├── scannet-3d-18class.py
    │   │   │   │   │   ├── scannet_seg-3d-20class.py
    │   │   │   │   │   ├── sunrgbd-3d-10class.py
    │   │   │   │   │   ├── waymoD5-3d-3class.py
    │   │   │   │   │   └── waymoD5-3d-car.py
    │   │   │   │   ├── default_runtime.py
    │   │   │   │   ├── models
    │   │   │   │   │   ├── 3dssd.py
    │   │   │   │   │   ├── cascade_mask_rcnn_r50_fpn.py
    │   │   │   │   │   ├── centerpoint_01voxel_second_secfpn_nus.py
    │   │   │   │   │   ├── centerpoint_02pillar_second_secfpn_nus.py
    │   │   │   │   │   ├── fcos3d.py
    │   │   │   │   │   ├── groupfree3d.py
    │   │   │   │   │   ├── h3dnet.py
    │   │   │   │   │   ├── hv_pointpillars_fpn_lyft.py
    │   │   │   │   │   ├── hv_pointpillars_fpn_nus.py
    │   │   │   │   │   ├── hv_pointpillars_fpn_range100_lyft.py
    │   │   │   │   │   ├── hv_pointpillars_secfpn_kitti.py
    │   │   │   │   │   ├── hv_pointpillars_secfpn_waymo.py
    │   │   │   │   │   ├── hv_second_secfpn_kitti.py
    │   │   │   │   │   ├── hv_second_secfpn_waymo.py
    │   │   │   │   │   ├── imvotenet_image.py
    │   │   │   │   │   ├── mask_rcnn_r50_fpn.py
    │   │   │   │   │   ├── paconv_cuda_ssg.py
    │   │   │   │   │   ├── paconv_ssg.py
    │   │   │   │   │   ├── parta2.py
    │   │   │   │   │   ├── pointnet2_msg.py
    │   │   │   │   │   ├── pointnet2_ssg.py
    │   │   │   │   │   └── votenet.py
    │   │   │   │   └── schedules
    │   │   │   │   │   ├── cosine.py
    │   │   │   │   │   ├── cyclic_20e.py
    │   │   │   │   │   ├── cyclic_40e.py
    │   │   │   │   │   ├── mmdet_schedule_1x.py
    │   │   │   │   │   ├── schedule_2x.py
    │   │   │   │   │   ├── schedule_3x.py
    │   │   │   │   │   ├── seg_cosine_150e.py
    │   │   │   │   │   ├── seg_cosine_200e.py
    │   │   │   │   │   └── seg_cosine_50e.py
    │   │   │   ├── bevformer
    │   │   │   │   ├── .ipynb_checkpoints
    │   │   │   │   │   └── bevformer_small_occ-checkpoint.py
    │   │   │   │   ├── bevformer_base_occ.py
    │   │   │   │   ├── bevformer_intern-s_occ.py
    │   │   │   │   └── bevformer_small_occ.py
    │   │   │   └── datasets
    │   │   │   │   └── custom_nus-3d.py
    │   │   └── mmdet3d_plugin
    │   │   │   ├── __init__.py
    │   │   │   ├── bevformer
    │   │   │       ├── __init__.py
    │   │   │       ├── apis
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── mmdet_train.py
    │   │   │       │   ├── test.py
    │   │   │       │   └── train.py
    │   │   │       ├── backbones
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── custom_layer_decay_optimizer_constructor.py
    │   │   │       │   ├── internimage.py
    │   │   │       │   └── ops_dcnv3
    │   │   │       │   │   ├── functions
    │   │   │       │   │       ├── __init__.py
    │   │   │       │   │       └── dcnv3_func.py
    │   │   │       │   │   ├── make.sh
    │   │   │       │   │   ├── modules
    │   │   │       │   │       ├── __init__.py
    │   │   │       │   │       └── dcnv3.py
    │   │   │       │   │   ├── setup.py
    │   │   │       │   │   ├── src
    │   │   │       │   │       ├── cpu
    │   │   │       │   │       │   ├── dcnv3_cpu.cpp
    │   │   │       │   │       │   └── dcnv3_cpu.h
    │   │   │       │   │       ├── cuda
    │   │   │       │   │       │   ├── dcnv3_cuda.cu
    │   │   │       │   │       │   ├── dcnv3_cuda.h
    │   │   │       │   │       │   └── dcnv3_im2col_cuda.cuh
    │   │   │       │   │       ├── dcnv3.h
    │   │   │       │   │       └── vision.cpp
    │   │   │       │   │   └── test.py
    │   │   │       ├── dense_heads
    │   │   │       │   ├── __init__.py
    │   │   │       │   └── bevformer_occ_head.py
    │   │   │       ├── detectors
    │   │   │       │   ├── __init__.py
    │   │   │       │   └── bevformer_occ.py
    │   │   │       ├── hooks
    │   │   │       │   ├── __init__.py
    │   │   │       │   └── custom_hooks.py
    │   │   │       ├── modules
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── custom_base_transformer_layer.py
    │   │   │       │   ├── decoder.py
    │   │   │       │   ├── encoder.py
    │   │   │       │   ├── multi_scale_deformable_attn_function.py
    │   │   │       │   ├── spatial_cross_attention.py
    │   │   │       │   ├── temporal_self_attention.py
    │   │   │       │   ├── transformer.py
    │   │   │       │   └── transformer_occ.py
    │   │   │       └── runner
    │   │   │       │   ├── __init__.py
    │   │   │       │   └── epoch_based_runner.py
    │   │   │   ├── core
    │   │   │       ├── bbox
    │   │   │       │   ├── assigners
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   └── hungarian_assigner_3d.py
    │   │   │       │   ├── coders
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   └── nms_free_coder.py
    │   │   │       │   ├── match_costs
    │   │   │       │   │   ├── __init__.py
    │   │   │       │   │   └── match_cost.py
    │   │   │       │   └── util.py
    │   │   │       └── evaluation
    │   │   │       │   ├── __init__.py
    │   │   │       │   └── eval_hooks.py
    │   │   │   ├── datasets
    │   │   │       ├── __init__.py
    │   │   │       ├── builder.py
    │   │   │       ├── nuscenes_dataset.py
    │   │   │       ├── nuscenes_occ.py
    │   │   │       ├── nuscnes_eval.py
    │   │   │       ├── occ_metrics.py
    │   │   │       ├── pipelines
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── formating.py
    │   │   │       │   ├── loading.py
    │   │   │       │   └── transform_3d.py
    │   │   │       └── samplers
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── distributed_sampler.py
    │   │   │       │   ├── group_sampler.py
    │   │   │       │   └── sampler.py
    │   │   │   └── models
    │   │   │       ├── backbones
    │   │   │           ├── __init__.py
    │   │   │           └── vovnet.py
    │   │   │       ├── hooks
    │   │   │           ├── __init__.py
    │   │   │           └── hooks.py
    │   │   │       ├── opt
    │   │   │           ├── __init__.py
    │   │   │           └── adamw.py
    │   │   │       └── utils
    │   │   │           ├── __init__.py
    │   │   │           ├── bricks.py
    │   │   │           ├── grid_mask.py
    │   │   │           ├── position_embedding.py
    │   │   │           ├── positional_encoding.py
    │   │   │           └── visual.py
    │   ├── tools
    │   │   ├── .ipynb_checkpoints
    │   │   │   └── train-checkpoint.py
    │   │   ├── analysis_tools
    │   │   │   ├── __init__.py
    │   │   │   ├── analyze_logs.py
    │   │   │   ├── benchmark.py
    │   │   │   ├── get_params.py
    │   │   │   └── visual.py
    │   │   ├── create_data.py
    │   │   ├── data_converter
    │   │   │   ├── __init__.py
    │   │   │   ├── create_gt_database.py
    │   │   │   ├── nuscenes_converter.py
    │   │   │   └── nuscenes_occ_converter.py
    │   │   ├── dist_test.sh
    │   │   ├── dist_train.sh
    │   │   ├── fp16
    │   │   │   ├── dist_train.sh
    │   │   │   └── train.py
    │   │   ├── misc
    │   │   │   ├── browse_dataset.py
    │   │   │   ├── fuse_conv_bn.py
    │   │   │   ├── print_config.py
    │   │   │   └── visualize_results.py
    │   │   ├── model_converters
    │   │   │   ├── convert_votenet_checkpoints.py
    │   │   │   ├── publish_model.py
    │   │   │   └── regnet2mmdet.py
    │   │   ├── slurm_train.sh
    │   │   ├── test.py
    │   │   └── train.py
    │   └── utils
    │   │   └── vis.py
    └── openlane-v2
    │   ├── .gitignore
    │   ├── CITATION
    │   ├── CODE_OF_CONDUCT
    │   ├── LICENSE
    │   ├── README-zh-hans.md
    │   ├── README.md
    │   ├── data
    │       ├── OpenLane-V2
    │       │   ├── data_dict_sample.json
    │       │   ├── data_dict_subset_A.json
    │       │   ├── openlanev2.md5
    │       │   └── preprocess.py
    │       └── README.md
    │   ├── docs
    │       ├── annotation.md
    │       ├── devkit.md
    │       ├── metrics.md
    │       ├── statistics.md
    │       └── submission.md
    │   ├── imgs
    │       ├── lane.gif
    │       ├── poster.gif
    │       ├── topology.gif
    │       └── traffic_element.gif
    │   ├── openlanev2
    │       ├── __init__.py
    │       ├── dataset
    │       │   ├── __init__.py
    │       │   ├── collection.py
    │       │   └── frame.py
    │       ├── evaluation
    │       │   ├── __init__.py
    │       │   ├── distance.py
    │       │   ├── evaluate.py
    │       │   └── f_score.py
    │       ├── io
    │       │   ├── __init__.py
    │       │   └── io.py
    │       ├── preprocessing
    │       │   ├── __init__.py
    │       │   ├── check.py
    │       │   └── collect.py
    │       ├── utils.py
    │       └── visualization
    │       │   ├── __init__.py
    │       │   ├── bev.py
    │       │   ├── pv.py
    │       │   └── utils.py
    │   ├── plugin
    │       └── mmdet3d
    │       │   ├── baseline
    │       │       ├── __init__.py
    │       │       ├── core
    │       │       │   ├── __init__.py
    │       │       │   └── bbox
    │       │       │   │   ├── __init__.py
    │       │       │   │   ├── assigners.py
    │       │       │   │   └── match_costs.py
    │       │       ├── datasets
    │       │       │   ├── __init__.py
    │       │       │   ├── openlane_v2_dataset.py
    │       │       │   └── pipelines
    │       │       │   │   ├── __init__.py
    │       │       │   │   ├── formating.py
    │       │       │   │   ├── loading.py
    │       │       │   │   └── transforms.py
    │       │       └── models
    │       │       │   ├── __init__.py
    │       │       │   ├── backbones
    │       │       │       ├── __init__.py
    │       │       │       ├── intern_image.py
    │       │       │       └── ops_dcnv3
    │       │       │       │   ├── functions
    │       │       │       │       ├── __init__.py
    │       │       │       │       └── dcnv3_func.py
    │       │       │       │   ├── make.sh
    │       │       │       │   ├── modules
    │       │       │       │       ├── __init__.py
    │       │       │       │       └── dcnv3.py
    │       │       │       │   ├── setup.py
    │       │       │       │   ├── src
    │       │       │       │       ├── cpu
    │       │       │       │       │   ├── dcnv3_cpu.cpp
    │       │       │       │       │   └── dcnv3_cpu.h
    │       │       │       │       ├── cuda
    │       │       │       │       │   ├── dcnv3_cuda.cu
    │       │       │       │       │   ├── dcnv3_cuda.h
    │       │       │       │       │   └── dcnv3_im2col_cuda.cuh
    │       │       │       │       ├── dcnv3.h
    │       │       │       │       └── vision.cpp
    │       │       │       │   └── test.py
    │       │       │   ├── detectors
    │       │       │       ├── __init__.py
    │       │       │       ├── baseline.py
    │       │       │       └── road_bev.py
    │       │       │   ├── heads
    │       │       │       ├── __init__.py
    │       │       │       ├── custom_detr_head.py
    │       │       │       ├── lc_deformable_detr_head.py
    │       │       │       ├── relationship_head.py
    │       │       │       ├── te_deformable_detr_head.py
    │       │       │       └── topology_head.py
    │       │       │   ├── modules
    │       │       │       ├── __init__.py
    │       │       │       ├── bevformer_constructer.py
    │       │       │       ├── custom_base_transformer_layer.py
    │       │       │       ├── decoder.py
    │       │       │       ├── encoder.py
    │       │       │       ├── multi_scale_deformable_attn_function.py
    │       │       │       ├── spatial_cross_attention.py
    │       │       │       ├── temporal_self_attention.py
    │       │       │       └── transformer.py
    │       │       │   └── necks
    │       │       │       ├── __init__.py
    │       │       │       ├── custom_fpn.py
    │       │       │       └── custom_ipm_view_transformer.py
    │       │   └── configs
    │       │       ├── baseline.py
    │       │       ├── baseline_large.py
    │       │       └── internimage-s.py
    │   ├── requirements.txt
    │   ├── setup.py
    │   ├── tools
    │       ├── analysis_tools
    │       │   ├── analyze_logs.py
    │       │   ├── benchmark.py
    │       │   └── get_flops.py
    │       ├── create_data.py
    │       ├── create_data.sh
    │       ├── data_converter
    │       │   ├── __init__.py
    │       │   ├── create_gt_database.py
    │       │   ├── indoor_converter.py
    │       │   ├── kitti_converter.py
    │       │   ├── kitti_data_utils.py
    │       │   ├── lyft_converter.py
    │       │   ├── lyft_data_fixer.py
    │       │   ├── nuimage_converter.py
    │       │   ├── nuscenes_converter.py
    │       │   ├── s3dis_data_utils.py
    │       │   ├── scannet_data_utils.py
    │       │   ├── sunrgbd_data_utils.py
    │       │   └── waymo_converter.py
    │       ├── deployment
    │       │   ├── mmdet3d2torchserve.py
    │       │   ├── mmdet3d_handler.py
    │       │   └── test_torchserver.py
    │       ├── dist_test.sh
    │       ├── dist_train.sh
    │       ├── misc
    │       │   ├── browse_dataset.py
    │       │   ├── fuse_conv_bn.py
    │       │   ├── print_config.py
    │       │   └── visualize_results.py
    │       ├── model_converters
    │       │   ├── convert_h3dnet_checkpoints.py
    │       │   ├── convert_votenet_checkpoints.py
    │       │   ├── publish_model.py
    │       │   └── regnet2mmdet.py
    │       ├── slurm_test.sh
    │       ├── slurm_train.sh
    │       ├── test.py
    │       ├── train.py
    │       ├── update_data_coords.py
    │       └── update_data_coords.sh
    │   └── tutorial.ipynb
├── classification
    ├── README.md
    ├── config.py
    ├── configs
    │   ├── accelerate
    │   │   ├── deepspeed
    │   │   │   ├── ds_config_zero1.json
    │   │   │   ├── ds_config_zero1_wo_loss_scale.json
    │   │   │   ├── ds_config_zero3_offload.json
    │   │   │   └── ds_config_zero3_offload_wo_loss_scale.json
    │   │   ├── dist_8gpus_ddp_fp16.yaml
    │   │   ├── dist_8gpus_zero1.yaml
    │   │   ├── dist_8gpus_zero1_wo_loss_scale.yaml
    │   │   ├── dist_8gpus_zero3_offload.yaml
    │   │   └── dist_8gpus_zero3_offload_wo_loss_scale.yaml
    │   ├── inaturalist2018
    │   │   └── internimage_h_22ktoinat18_384.yaml
    │   ├── internimage_b_1k_224.yaml
    │   ├── internimage_g_22kto1k_512.yaml
    │   ├── internimage_h_22kto1k_384.yaml
    │   ├── internimage_h_22kto1k_640.yaml
    │   ├── internimage_l_22kto1k_384.yaml
    │   ├── internimage_s_1k_224.yaml
    │   ├── internimage_t_1k_224.yaml
    │   ├── internimage_xl_22kto1k_384.yaml
    │   └── without_lr_decay
    │   │   ├── internimage_b_1k_224.yaml
    │   │   ├── internimage_g_22kto1k_512.yaml
    │   │   ├── internimage_h_22kto1k_640.yaml
    │   │   ├── internimage_l_22kto1k_384.yaml
    │   │   ├── internimage_s_1k_224.yaml
    │   │   ├── internimage_t_1k_224.yaml
    │   │   └── internimage_xl_22kto1k_384.yaml
    ├── dataset
    │   ├── __init__.py
    │   ├── build.py
    │   ├── cached_image_folder.py
    │   ├── samplers.py
    │   └── zipreader.py
    ├── ddp_hooks.py
    ├── ema_deepspeed.py
    ├── export.py
    ├── extract_feature.py
    ├── huggingface
    │   ├── 22k_model
    │   │   ├── internimage_g_jointto22k_384
    │   │   │   ├── README.md
    │   │   │   ├── config.json
    │   │   │   ├── configuration_internimage.py
    │   │   │   ├── dcnv3.py
    │   │   │   ├── dcnv3_func.py
    │   │   │   ├── modeling_internimage.py
    │   │   │   └── preprocessor_config.json
    │   │   ├── internimage_h_jointto22k_384
    │   │   │   ├── README.md
    │   │   │   ├── config.json
    │   │   │   ├── configuration_internimage.py
    │   │   │   ├── dcnv3.py
    │   │   │   ├── dcnv3_func.py
    │   │   │   ├── modeling_internimage.py
    │   │   │   └── preprocessor_config.json
    │   │   ├── internimage_l_22k_384
    │   │   │   ├── README.md
    │   │   │   ├── config.json
    │   │   │   ├── configuration_internimage.py
    │   │   │   ├── dcnv3.py
    │   │   │   ├── dcnv3_func.py
    │   │   │   ├── modeling_internimage.py
    │   │   │   └── preprocessor_config.json
    │   │   └── internimage_xl_22k_384
    │   │   │   ├── README.md
    │   │   │   ├── config.json
    │   │   │   ├── configuration_internimage.py
    │   │   │   ├── dcnv3.py
    │   │   │   ├── dcnv3_func.py
    │   │   │   ├── modeling_internimage.py
    │   │   │   └── preprocessor_config.json
    │   ├── README.md
    │   ├── convert.py
    │   ├── in1k_model
    │   │   ├── internimage_b_1k_224
    │   │   │   ├── README.md
    │   │   │   ├── config.json
    │   │   │   ├── configuration_internimage.py
    │   │   │   ├── dcnv3.py
    │   │   │   ├── dcnv3_func.py
    │   │   │   ├── modeling_internimage.py
    │   │   │   └── preprocessor_config.json
    │   │   ├── internimage_g_22kto1k_512
    │   │   │   ├── README.md
    │   │   │   ├── config.json
    │   │   │   ├── configuration_internimage.py
    │   │   │   ├── dcnv3.py
    │   │   │   ├── dcnv3_func.py
    │   │   │   ├── modeling_internimage.py
    │   │   │   └── preprocessor_config.json
    │   │   ├── internimage_h_22kto1k_640
    │   │   │   ├── README.md
    │   │   │   ├── config.json
    │   │   │   ├── configuration_internimage.py
    │   │   │   ├── dcnv3.py
    │   │   │   ├── dcnv3_func.py
    │   │   │   ├── modeling_internimage.py
    │   │   │   └── preprocessor_config.json
    │   │   ├── internimage_l_22kto1k_384
    │   │   │   ├── README.md
    │   │   │   ├── config.json
    │   │   │   ├── configuration_internimage.py
    │   │   │   ├── dcnv3.py
    │   │   │   ├── dcnv3_func.py
    │   │   │   ├── modeling_internimage.py
    │   │   │   └── preprocessor_config.json
    │   │   ├── internimage_s_1k_224
    │   │   │   ├── README.md
    │   │   │   ├── config.json
    │   │   │   ├── configuration_internimage.py
    │   │   │   ├── dcnv3.py
    │   │   │   ├── dcnv3_func.py
    │   │   │   ├── modeling_internimage.py
    │   │   │   └── preprocessor_config.json
    │   │   ├── internimage_t_1k_224
    │   │   │   ├── README.md
    │   │   │   ├── config.json
    │   │   │   ├── configuration_internimage.py
    │   │   │   ├── dcnv3.py
    │   │   │   ├── dcnv3_func.py
    │   │   │   ├── modeling_internimage.py
    │   │   │   └── preprocessor_config.json
    │   │   └── internimage_xl_22kto1k_384
    │   │   │   ├── README.md
    │   │   │   ├── config.json
    │   │   │   ├── configuration_internimage.py
    │   │   │   ├── dcnv3.py
    │   │   │   ├── dcnv3_func.py
    │   │   │   ├── modeling_internimage.py
    │   │   │   └── preprocessor_config.json
    │   └── test.py
    ├── logger.py
    ├── lr_scheduler.py
    ├── main.py
    ├── main_accelerate.py
    ├── main_deepspeed.py
    ├── meta_data
    │   ├── 22k_class_to_idx.json
    │   ├── map22kto1k.txt
    │   ├── train.txt.zip
    │   └── val.txt.zip
    ├── models
    │   ├── __init__.py
    │   ├── build.py
    │   ├── intern_image.py
    │   └── intern_image_meta_former.py
    ├── ops_dcnv3
    │   ├── functions
    │   │   ├── __init__.py
    │   │   └── dcnv3_func.py
    │   ├── make.sh
    │   ├── modules
    │   │   ├── __init__.py
    │   │   └── dcnv3.py
    │   ├── setup.py
    │   ├── src
    │   │   ├── cpu
    │   │   │   ├── dcnv3_cpu.cpp
    │   │   │   └── dcnv3_cpu.h
    │   │   ├── cuda
    │   │   │   ├── dcnv3_cuda.cu
    │   │   │   ├── dcnv3_cuda.h
    │   │   │   └── dcnv3_im2col_cuda.cuh
    │   │   ├── dcnv3.h
    │   │   └── vision.cpp
    │   └── test.py
    ├── optimizer.py
    ├── train_in1k.sh
    ├── train_in1k_deepspeed.sh
    ├── train_inat18.sh
    └── utils.py
├── detection
    ├── README.md
    ├── configs
    │   ├── _base_
    │   │   ├── datasets
    │   │   │   ├── cityscapes_detection.py
    │   │   │   ├── cityscapes_instance.py
    │   │   │   ├── coco_detection.py
    │   │   │   ├── coco_instance.py
    │   │   │   ├── coco_panoptic.py
    │   │   │   ├── crowd_human.py
    │   │   │   ├── deepfashion.py
    │   │   │   ├── lvis_v0.5_instance.py
    │   │   │   ├── lvis_v1_instance.py
    │   │   │   ├── lvis_v1_instance_minival.py
    │   │   │   ├── obj365_detection.py
    │   │   │   ├── openimages_detection.py
    │   │   │   ├── voc0712.py
    │   │   │   └── wider_face.py
    │   │   ├── default_runtime.py
    │   │   ├── models
    │   │   │   ├── cascade_mask_rcnn_r50_fpn.py
    │   │   │   ├── cascade_mask_rcnn_r50_fpn_crowdhuman.py
    │   │   │   ├── cascade_rcnn_r50_fpn.py
    │   │   │   ├── fast_rcnn_r50_fpn.py
    │   │   │   ├── faster_rcnn_r50_caffe_c4.py
    │   │   │   ├── faster_rcnn_r50_caffe_dc5.py
    │   │   │   ├── faster_rcnn_r50_fpn.py
    │   │   │   ├── mask_rcnn_convnext_fpn.py
    │   │   │   ├── mask_rcnn_r50_caffe_c4.py
    │   │   │   ├── mask_rcnn_r50_fpn.py
    │   │   │   ├── retinanet_r50_fpn.py
    │   │   │   ├── rpn_r50_caffe_c4.py
    │   │   │   ├── rpn_r50_fpn.py
    │   │   │   └── ssd300.py
    │   │   └── schedules
    │   │   │   ├── schedule_1x.py
    │   │   │   ├── schedule_20e.py
    │   │   │   ├── schedule_2x.py
    │   │   │   ├── schedule_3x.py
    │   │   │   └── schedule_6x.py
    │   ├── coco
    │   │   ├── README.md
    │   │   ├── cascade_internimage_l_fpn_1x_coco.py
    │   │   ├── cascade_internimage_l_fpn_3x_coco.py
    │   │   ├── cascade_internimage_xl_fpn_1x_coco.py
    │   │   ├── cascade_internimage_xl_fpn_3x_coco.py
    │   │   ├── dino_4scale_cbinternimage_h_objects365_coco_ss.py
    │   │   ├── dino_4scale_internimage_g_objects365_coco_ss.py
    │   │   ├── dino_4scale_internimage_h_objects365_coco_ss.py
    │   │   ├── dino_4scale_internimage_l_1x_coco_0.1x_backbone_lr.py
    │   │   ├── dino_4scale_internimage_l_1x_coco_layer_wise_lr.py
    │   │   ├── dino_4scale_internimage_t_1x_coco_layer_wise_lr.py
    │   │   ├── mask_rcnn_internimage_b_fpn_1x_coco.py
    │   │   ├── mask_rcnn_internimage_b_fpn_3x_coco.py
    │   │   ├── mask_rcnn_internimage_s_fpn_1x_coco.py
    │   │   ├── mask_rcnn_internimage_s_fpn_3x_coco.py
    │   │   ├── mask_rcnn_internimage_t_fpn_1x_coco.py
    │   │   ├── mask_rcnn_internimage_t_fpn_1x_coco_with_dcnv4.py
    │   │   └── mask_rcnn_internimage_t_fpn_3x_coco.py
    │   ├── crowd_human
    │   │   ├── README.md
    │   │   └── cascade_internimage_xl_fpn_3x_crowd_human.py
    │   ├── lvis
    │   │   ├── README.md
    │   │   ├── dino_4scale_cbinternimage_h_objects365_lvis_minival_ss.py
    │   │   └── dino_4scale_cbinternimage_h_objects365_lvis_val_ss.py
    │   ├── openimages
    │   │   ├── README.md
    │   │   └── dino_4scale_cbinternimage_h_objects365_openimages_ss.py
    │   └── voc
    │   │   ├── README.md
    │   │   ├── dino_4scale_cbinternimage_h_objects365_voc07.py
    │   │   └── dino_4scale_cbinternimage_h_objects365_voc12.py
    ├── deploy.py
    ├── deploy
    │   ├── configs
    │   │   ├── _base_
    │   │   │   ├── backends
    │   │   │   │   ├── tensorrt-fp16.py
    │   │   │   │   └── tensorrt.py
    │   │   │   └── onnx_config.py
    │   │   └── mmdet
    │   │   │   ├── _base_
    │   │   │       ├── base_dynamic.py
    │   │   │       ├── base_instance-seg_dynamic.py
    │   │   │       ├── base_instance-seg_static.py
    │   │   │       └── base_static.py
    │   │   │   └── instance-seg
    │   │   │       └── instance-seg_tensorrt_dynamic-320x320-1344x1344.py
    │   └── demo.jpg
    ├── dist_test.sh
    ├── dist_train.sh
    ├── get_flops.py
    ├── image_demo.py
    ├── mmcv_custom
    │   ├── __init__.py
    │   ├── custom_layer_decay_optimizer_constructor.py
    │   └── efficient_ffn.py
    ├── mmdet_custom
    │   ├── __init__.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   └── crowd_human.py
    │   └── models
    │   │   ├── __init__.py
    │   │   ├── backbones
    │   │       ├── __init__.py
    │   │       ├── cbnet.py
    │   │       └── intern_image.py
    │   │   ├── dense_heads
    │   │       ├── __init__.py
    │   │       ├── cbdino_head.py
    │   │       ├── deformable_detr_head.py
    │   │       ├── detr_head.py
    │   │       └── dino_head.py
    │   │   ├── detectors
    │   │       ├── __init__.py
    │   │       ├── cbnet_dino.py
    │   │       └── dino.py
    │   │   ├── necks
    │   │       ├── __init__.py
    │   │       └── cbnet_channel_mapper.py
    │   │   └── utils
    │   │       ├── __init__.py
    │   │       ├── query_denoising.py
    │   │       └── transformer.py
    ├── ops_dcnv3
    │   ├── functions
    │   │   ├── __init__.py
    │   │   └── dcnv3_func.py
    │   ├── make.sh
    │   ├── modules
    │   │   ├── __init__.py
    │   │   └── dcnv3.py
    │   ├── setup.py
    │   ├── src
    │   │   ├── cpu
    │   │   │   ├── dcnv3_cpu.cpp
    │   │   │   └── dcnv3_cpu.h
    │   │   ├── cuda
    │   │   │   ├── dcnv3_cuda.cu
    │   │   │   ├── dcnv3_cuda.h
    │   │   │   └── dcnv3_im2col_cuda.cuh
    │   │   ├── dcnv3.h
    │   │   └── vision.cpp
    │   └── test.py
    ├── slurm_test.sh
    ├── slurm_train.sh
    ├── test.py
    ├── tools
    │   ├── create_crowd_anno.py
    │   ├── download_dataset.py
    │   └── evaluate
    │   │   └── __init__.py
    └── train.py
├── docs
    └── figs
    │   ├── arch.png
    │   ├── intern_pipeline.png
    │   ├── intern_pipeline_en.png
    │   ├── log.png
    │   └── network.png
├── sam
    ├── engine.py
    └── main_zero_shot_instance_seg.py
├── segmentation
    ├── README.md
    ├── configs
    │   ├── _base_
    │   │   ├── datasets
    │   │   │   ├── ade20k.py
    │   │   │   ├── chase_db1.py
    │   │   │   ├── cityscapes.py
    │   │   │   ├── cityscapes_1024x1024.py
    │   │   │   ├── cityscapes_extra.py
    │   │   │   ├── cityscapes_extra_1024x1024.py
    │   │   │   ├── coco-stuff10k.py
    │   │   │   ├── coco-stuff164k.py
    │   │   │   ├── drive.py
    │   │   │   ├── hrf.py
    │   │   │   ├── loveda.py
    │   │   │   ├── mapillary.py
    │   │   │   ├── mapillary_1024x1024.py
    │   │   │   ├── mapillary_896x896.py
    │   │   │   ├── nyu_depth_v2.py
    │   │   │   ├── pascal_context.py
    │   │   │   ├── pascal_context_59.py
    │   │   │   ├── pascal_voc12.py
    │   │   │   ├── pascal_voc12_aug.py
    │   │   │   ├── potsdam.py
    │   │   │   └── stare.py
    │   │   ├── default_runtime.py
    │   │   ├── models
    │   │   │   ├── mask2former_beit.py
    │   │   │   ├── segformer_mit-b0.py
    │   │   │   └── upernet_r50.py
    │   │   └── schedules
    │   │   │   ├── schedule_160k.py
    │   │   │   ├── schedule_20k.py
    │   │   │   ├── schedule_320k.py
    │   │   │   ├── schedule_40k.py
    │   │   │   └── schedule_80k.py
    │   ├── ade20k
    │   │   ├── README.md
    │   │   ├── mask2former_internimage_h_896_80k_cocostuff2ade20k_ms.py
    │   │   ├── mask2former_internimage_h_896_80k_cocostuff2ade20k_ss.py
    │   │   ├── upernet_internimage_b_512_160k_ade20k.py
    │   │   ├── upernet_internimage_g_896_160k_ade20k.py
    │   │   ├── upernet_internimage_h_896_160k_ade20k.py
    │   │   ├── upernet_internimage_l_640_160k_ade20k.py
    │   │   ├── upernet_internimage_s_512_160k_ade20k.py
    │   │   ├── upernet_internimage_t_512_160k_ade20k.py
    │   │   └── upernet_internimage_xl_640_160k_ade20k.py
    │   ├── cityscapes
    │   │   ├── README.md
    │   │   ├── mask2former_internimage_h_1024x1024_80k_mapillary2cityscapes.py
    │   │   ├── segformer_internimage_l_512x1024_160k_mapillary2cityscapes.py
    │   │   ├── segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.py
    │   │   ├── upernet_internimage_b_512x1024_160k_cityscapes.py
    │   │   ├── upernet_internimage_l_512x1024_160k_cityscapes.py
    │   │   ├── upernet_internimage_l_512x1024_160k_mapillary2cityscapes.py
    │   │   ├── upernet_internimage_s_512x1024_160k_cityscapes.py
    │   │   ├── upernet_internimage_t_512x1024_160k_cityscapes.py
    │   │   ├── upernet_internimage_xl_512x1024_160k_cityscapes.py
    │   │   └── upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.py
    │   ├── coco_stuff10k
    │   │   ├── README.md
    │   │   └── mask2former_internimage_h_512_40k_cocostuff164k_to_10k.py
    │   ├── coco_stuff164k
    │   │   ├── README.md
    │   │   └── mask2former_internimage_h_896_80k_cocostuff164k.py
    │   ├── mapillary
    │   │   ├── README.md
    │   │   ├── mask2former_internimage_h_896x896_80k_mapillary.py
    │   │   ├── segformer_internimage_l_512x1024_80k_mapillary.py
    │   │   ├── segformer_internimage_xl_512x1024_80k_mapillary.py
    │   │   ├── upernet_internimage_l_512x1024_80k_mapillary.py
    │   │   └── upernet_internimage_xl_512x1024_80k_mapillary.py
    │   ├── nyu_depth_v2
    │   │   ├── README.md
    │   │   └── mask2former_internimage_h_480_40k_nyu.py
    │   └── pascal_context
    │   │   ├── README.md
    │   │   └── mask2former_internimage_h_480_40k_pascal_context_59.py
    ├── deploy.py
    ├── deploy
    │   ├── configs
    │   │   ├── _base_
    │   │   │   ├── backends
    │   │   │   │   └── tensorrt.py
    │   │   │   └── onnx_config.py
    │   │   └── mmseg
    │   │   │   ├── segmentation_static.py
    │   │   │   └── segmentation_tensorrt_static-512x512.py
    │   └── demo.png
    ├── dist_test.sh
    ├── dist_train.sh
    ├── get_flops.py
    ├── image_demo.py
    ├── mmcv_custom
    │   ├── __init__.py
    │   └── custom_layer_decay_optimizer_constructor.py
    ├── mmseg_custom
    │   ├── __init__.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── anchor
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── point_generator.py
    │   │   ├── box
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── samplers
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base_sampler.py
    │   │   │   │   ├── mask_pseudo_sampler.py
    │   │   │   │   ├── mask_sampling_result.py
    │   │   │   │   └── sampling_result.py
    │   │   ├── evaluation
    │   │   │   ├── __init__.py
    │   │   │   └── panoptic_utils.py
    │   │   ├── mask
    │   │   │   ├── __init__.py
    │   │   │   └── utils.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── dist_utils.py
    │   │   │   └── misc.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── dataset_wrappers.py
    │   │   ├── mapillary.py
    │   │   ├── nyu_depth_v2.py
    │   │   └── pipelines
    │   │   │   ├── __init__.py
    │   │   │   ├── formatting.py
    │   │   │   └── transform.py
    │   └── models
    │   │   ├── __init__.py
    │   │   ├── backbones
    │   │       ├── __init__.py
    │   │       └── intern_image.py
    │   │   ├── builder.py
    │   │   ├── decode_heads
    │   │       ├── __init__.py
    │   │       ├── mask2former_head.py
    │   │       └── maskformer_head.py
    │   │   ├── losses
    │   │       ├── __init__.py
    │   │       ├── cross_entropy_loss.py
    │   │       ├── dice_loss.py
    │   │       ├── focal_loss.py
    │   │       ├── match_costs.py
    │   │       └── match_loss.py
    │   │   ├── plugins
    │   │       ├── __init__.py
    │   │       ├── msdeformattn_pixel_decoder.py
    │   │       └── pixel_decoder.py
    │   │   ├── segmentors
    │   │       ├── __init__.py
    │   │       ├── encoder_decoder_mask2former.py
    │   │       └── encoder_decoder_mask2former_aug.py
    │   │   └── utils
    │   │       ├── __init__.py
    │   │       ├── assigner.py
    │   │       ├── point_sample.py
    │   │       ├── positional_encoding.py
    │   │       └── transformer.py
    ├── ops_dcnv3
    │   ├── functions
    │   │   ├── __init__.py
    │   │   └── dcnv3_func.py
    │   ├── make.sh
    │   ├── modules
    │   │   ├── __init__.py
    │   │   └── dcnv3.py
    │   ├── setup.py
    │   ├── src
    │   │   ├── cpu
    │   │   │   ├── dcnv3_cpu.cpp
    │   │   │   └── dcnv3_cpu.h
    │   │   ├── cuda
    │   │   │   ├── dcnv3_cuda.cu
    │   │   │   ├── dcnv3_cuda.h
    │   │   │   └── dcnv3_im2col_cuda.cuh
    │   │   ├── dcnv3.h
    │   │   └── vision.cpp
    │   └── test.py
    ├── slurm_test.sh
    ├── slurm_train.sh
    ├── test.py
    └── train.py
└── tensorrt
    └── modulated_deform_conv_v3
        ├── trt_deform_conv_v3.cpp
        ├── trt_deform_conv_v3.hpp
        ├── trt_deform_conv_v3_kernel.cu
        └── trt_deform_conv_v3_kernel.hpp


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = E501, E502, F403, C901, W504, W605, E251, E122, E126, E127, E722, W503, E128, E741, E731, E701, E712
3 | select = E1, E3, E502, E7, E9, W1, W5, W6
4 | max-line-length = 180
5 | exclude=*.egg/*,build,dist,detection/configs/*
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea/
 2 | .DS_Store
 3 | __pycache__/
 4 | classification/convertor/
 5 | segmentation/convertor/
 6 | detection/convertor/
 7 | checkpoint_dir/
 8 | demo/
 9 | pretrained/
10 | upload.py
11 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line-length = 180
 3 | multi_line_output = 0
 4 | extra_standard_library = setuptools
 5 | known_third_party = PIL,asynctest,cityscapesscripts,cv2,gather_models,matplotlib,mmcv,numpy,onnx,onnxruntime,pycocotools,pytest,pytorch_sphinx_theme,requests,scipy,seaborn,six,terminaltables,torch,ts,yaml
 6 | no_lines_before = STDLIB,LOCALFOLDER
 7 | default_section = THIRDPARTY
 8 | 
 9 | [yapf]
10 | BASED_ON_STYLE = pep8
11 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
12 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
13 | 
14 | [codespell]
15 | skip = *.ipynb
16 | quiet-level = 3
17 | ignore-words-list = patten,nd,ty,mot,hist,formating,winn,gool,datas,wan,confids,TOOD,tood
18 | © 2022 GitHub, Inc.
19 | Terms
20 | Privacy
21 | Security
22 | Status
23 | Docs
24 | Contact GitHub
25 | Pricing
26 | API
27 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | exclude: ^internvl_chat_llava/
 2 | repos:
 3 |   - repo: https://github.com/PyCQA/flake8
 4 |     rev: 5.0.4
 5 |     hooks:
 6 |       - id: flake8
 7 |   - repo: https://github.com/PyCQA/isort
 8 |     rev: 5.11.5
 9 |     hooks:
10 |       - id: isort
11 |   - repo: https://github.com/pre-commit/pre-commit-hooks
12 |     rev: v4.3.0
13 |     hooks:
14 |       - id: trailing-whitespace
15 |       - id: check-yaml
16 |       - id: end-of-file-fixer
17 |       - id: requirements-txt-fixer
18 |       - id: double-quote-string-fixer
19 |       - id: check-merge-conflict
20 |       - id: fix-encoding-pragma
21 |         args: ["--remove"]
22 |       - id: mixed-line-ending
23 |         args: ["--fix=lf"]
24 |   - repo: https://github.com/executablebooks/mdformat
25 |     rev: 0.7.9
26 |     hooks:
27 |       - id: mdformat
28 |         args: ["--number"]
29 |         additional_dependencies:
30 |           - mdformat-openmmlab
31 |           - mdformat_frontmatter
32 |           - linkify-it-py
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 OpenGVLab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | checkpoint_config = dict(interval=1)
 2 | # yapf:disable push
 3 | # By default we use textlogger hook and tensorboard
 4 | # For more loggers see
 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook
 6 | log_config = dict(
 7 |     interval=50,
 8 |     hooks=[
 9 |         dict(type='TextLoggerHook'),
10 |         dict(type='TensorboardLoggerHook')
11 |     ])
12 | # yapf:enable
13 | dist_params = dict(backend='nccl')
14 | log_level = 'INFO'
15 | work_dir = None
16 | load_from = None
17 | resume_from = None
18 | workflow = [('train', 1)]
19 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/models/hv_pointpillars_fpn_lyft.py:
--------------------------------------------------------------------------------
 1 | _base_ = './hv_pointpillars_fpn_nus.py'
 2 | 
 3 | # model settings (based on nuScenes model settings)
 4 | # Voxel size for voxel encoder
 5 | # Usually voxel size is changed consistently with the point cloud range
 6 | # If point cloud range is modified, do remember to change all related
 7 | # keys in the config.
 8 | model = dict(
 9 |     pts_voxel_layer=dict(
10 |         max_num_points=20,
11 |         point_cloud_range=[-80, -80, -5, 80, 80, 3],
12 |         max_voxels=(60000, 60000)),
13 |     pts_voxel_encoder=dict(
14 |         feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]),
15 |     pts_middle_encoder=dict(output_shape=[640, 640]),
16 |     pts_bbox_head=dict(
17 |         num_classes=9,
18 |         anchor_generator=dict(
19 |             ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]),
20 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
21 |     # model training settings (based on nuScenes model settings)
22 |     train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
23 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py:
--------------------------------------------------------------------------------
 1 | _base_ = './hv_pointpillars_fpn_nus.py'
 2 | 
 3 | # model settings (based on nuScenes model settings)
 4 | # Voxel size for voxel encoder
 5 | # Usually voxel size is changed consistently with the point cloud range
 6 | # If point cloud range is modified, do remember to change all related
 7 | # keys in the config.
 8 | model = dict(
 9 |     pts_voxel_layer=dict(
10 |         max_num_points=20,
11 |         point_cloud_range=[-100, -100, -5, 100, 100, 3],
12 |         max_voxels=(60000, 60000)),
13 |     pts_voxel_encoder=dict(
14 |         feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]),
15 |     pts_middle_encoder=dict(output_shape=[800, 800]),
16 |     pts_bbox_head=dict(
17 |         num_classes=9,
18 |         anchor_generator=dict(
19 |             ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]),
20 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
21 |     # model training settings (based on nuScenes model settings)
22 |     train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
23 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/models/paconv_cuda_ssg.py:
--------------------------------------------------------------------------------
1 | _base_ = './paconv_ssg.py'
2 | 
3 | model = dict(
4 |     backbone=dict(
5 |         sa_cfg=dict(
6 |             type='PAConvCUDASAModule',
7 |             scorenet_cfg=dict(mlp_channels=[8, 16, 16]))))
8 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/models/pointnet2_msg.py:
--------------------------------------------------------------------------------
 1 | _base_ = './pointnet2_ssg.py'
 2 | 
 3 | # model settings
 4 | model = dict(
 5 |     backbone=dict(
 6 |         _delete_=True,
 7 |         type='PointNet2SAMSG',
 8 |         in_channels=6,  # [xyz, rgb], should be modified with dataset
 9 |         num_points=(1024, 256, 64, 16),
10 |         radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)),
11 |         num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),
12 |         sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,
13 |                                                                     128)),
14 |                      ((128, 196, 256), (128, 196, 256)), ((256, 256, 512),
15 |                                                           (256, 384, 512))),
16 |         aggregation_channels=(None, None, None, None),
17 |         fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),
18 |         fps_sample_range_lists=((-1), (-1), (-1), (-1)),
19 |         dilated_group=(False, False, False, False),
20 |         out_indices=(0, 1, 2, 3),
21 |         sa_cfg=dict(
22 |             type='PointSAModuleMSG',
23 |             pool_mod='max',
24 |             use_xyz=True,
25 |             normalize_xyz=False)),
26 |     decode_head=dict(
27 |         fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128),
28 |                      (128, 128, 128, 128))))
29 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/models/pointnet2_ssg.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='EncoderDecoder3D',
 4 |     backbone=dict(
 5 |         type='PointNet2SASSG',
 6 |         in_channels=6,  # [xyz, rgb], should be modified with dataset
 7 |         num_points=(1024, 256, 64, 16),
 8 |         radius=(0.1, 0.2, 0.4, 0.8),
 9 |         num_samples=(32, 32, 32, 32),
10 |         sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
11 |                                                                     512)),
12 |         fp_channels=(),
13 |         norm_cfg=dict(type='BN2d'),
14 |         sa_cfg=dict(
15 |             type='PointSAModule',
16 |             pool_mod='max',
17 |             use_xyz=True,
18 |             normalize_xyz=False)),
19 |     decode_head=dict(
20 |         type='PointNet2Head',
21 |         fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
22 |                      (128, 128, 128, 128)),
23 |         channels=128,
24 |         dropout_ratio=0.5,
25 |         conv_cfg=dict(type='Conv1d'),
26 |         norm_cfg=dict(type='BN1d'),
27 |         act_cfg=dict(type='ReLU'),
28 |         loss_decode=dict(
29 |             type='CrossEntropyLoss',
30 |             use_sigmoid=False,
31 |             class_weight=None,  # should be modified with dataset
32 |             loss_weight=1.0)),
33 |     # model training and testing settings
34 |     train_cfg=dict(),
35 |     test_cfg=dict(mode='slide'))
36 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/cosine.py:
--------------------------------------------------------------------------------
 1 | # This schedule is mainly used by models with dynamic voxelization
 2 | # optimizer
 3 | lr = 0.003  # max learning rate
 4 | optimizer = dict(
 5 |     type='AdamW',
 6 |     lr=lr,
 7 |     betas=(0.95, 0.99),  # the momentum is change during training
 8 |     weight_decay=0.001)
 9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
10 | 
11 | lr_config = dict(
12 |     policy='CosineAnnealing',
13 |     warmup='linear',
14 |     warmup_iters=1000,
15 |     warmup_ratio=1.0 / 10,
16 |     min_lr_ratio=1e-5)
17 | 
18 | momentum_config = None
19 | 
20 | runner = dict(type='EpochBasedRunner', max_epochs=40)
21 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/cyclic_20e.py:
--------------------------------------------------------------------------------
 1 | # For nuScenes dataset, we usually evaluate the model at the end of training.
 2 | # Since the models are trained by 24 epochs by default, we set evaluation
 3 | # interval to be 20. Please change the interval accordingly if you do not
 4 | # use a default schedule.
 5 | # optimizer
 6 | # This schedule is mainly used by models on nuScenes dataset
 7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01)
 8 | # max_norm=10 is better for SECOND
 9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
10 | lr_config = dict(
11 |     policy='cyclic',
12 |     target_ratio=(10, 1e-4),
13 |     cyclic_times=1,
14 |     step_ratio_up=0.4,
15 | )
16 | momentum_config = dict(
17 |     policy='cyclic',
18 |     target_ratio=(0.85 / 0.95, 1),
19 |     cyclic_times=1,
20 |     step_ratio_up=0.4,
21 | )
22 | 
23 | # runtime settings
24 | runner = dict(type='EpochBasedRunner', max_epochs=20)
25 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/cyclic_40e.py:
--------------------------------------------------------------------------------
 1 | # The schedule is usually used by models trained on KITTI dataset
 2 | 
 3 | # The learning rate set in the cyclic schedule is the initial learning rate
 4 | # rather than the max learning rate. Since the target_ratio is (10, 1e-4),
 5 | # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
 6 | lr = 0.0018
 7 | # The optimizer follows the setting in SECOND.Pytorch, but here we use
 8 | # the offcial AdamW optimizer implemented by PyTorch.
 9 | optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
10 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
11 | # We use cyclic learning rate and momentum schedule following SECOND.Pytorch
12 | # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69  # noqa
13 | # We implement them in mmcv, for more details, please refer to
14 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327  # noqa
15 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130  # noqa
16 | lr_config = dict(
17 |     policy='cyclic',
18 |     target_ratio=(10, 1e-4),
19 |     cyclic_times=1,
20 |     step_ratio_up=0.4,
21 | )
22 | momentum_config = dict(
23 |     policy='cyclic',
24 |     target_ratio=(0.85 / 0.95, 1),
25 |     cyclic_times=1,
26 |     step_ratio_up=0.4,
27 | )
28 | # Although the max_epochs is 40, this schedule is usually used we
29 | # RepeatDataset with repeat ratio N, thus the actual max epoch
30 | # number could be Nx40
31 | runner = dict(type='EpochBasedRunner', max_epochs=40)
32 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/mmdet_schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used by models on nuScenes dataset
 3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
 4 | # max_norm=10 is better for SECOND
 5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 6 | lr_config = dict(
 7 |     policy='step',
 8 |     warmup='linear',
 9 |     warmup_iters=1000,
10 |     warmup_ratio=1.0 / 1000,
11 |     step=[20, 23])
12 | momentum_config = None
13 | # runtime settings
14 | runner = dict(type='EpochBasedRunner', max_epochs=24)
15 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/schedule_3x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used by models on indoor dataset,
 3 | # e.g., VoteNet on SUNRGBD and ScanNet
 4 | lr = 0.008  # max learning rate
 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32])
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=36)
10 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/seg_cosine_150e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on S3DIS dataset in segmentation task
 3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=150)
10 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/seg_cosine_200e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on ScanNet dataset in segmentation task
 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=200)
10 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/seg_cosine_50e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on S3DIS dataset in segmentation task
 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=50)
10 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/datasets/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/datasets/evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/datasets/evaluation/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/datasets/evaluation/distance.py:
--------------------------------------------------------------------------------
 1 | from numpy.typing import NDArray
 2 | from scipy.spatial import distance
 3 | 
 4 | 
 5 | def chamfer_distance(line1: NDArray, line2: NDArray) -> float:
 6 |     ''' Calculate chamfer distance between two lines. Make sure the
 7 |     lines are interpolated.
 8 | 
 9 |     Args:
10 |         line1 (array): coordinates of line1
11 |         line2 (array): coordinates of line2
12 | 
13 |     Returns:
14 |         distance (float): chamfer distance
15 |     '''
16 | 
17 |     dist_matrix = distance.cdist(line1, line2, 'euclidean')
18 |     dist12 = dist_matrix.min(-1).sum() / len(line1)
19 |     dist21 = dist_matrix.min(-2).sum() / len(line2)
20 | 
21 |     return (dist12 + dist21) / 2
22 | 
23 | 
24 | def frechet_distance(line1: NDArray, line2: NDArray) -> float:
25 |     ''' Calculate frechet distance between two lines. Make sure the
26 |     lines are interpolated.
27 | 
28 |     Args:
29 |         line1 (array): coordinates of line1
30 |         line2 (array): coordinates of line2
31 | 
32 |     Returns:
33 |         distance (float): frechet distance
34 |     '''
35 | 
36 |     raise NotImplementedError
37 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | from .formating import FormatBundleMap
 2 | from .loading import LoadMultiViewImagesFromFiles
 3 | from .poly_bbox import PolygonizeLocalMapBbox
 4 | from .transform import Normalize3D, PadMultiViewImages, ResizeMultiViewImages
 5 | from .vectorize import VectorizeMap
 6 | 
 7 | # for argoverse
 8 | 
 9 | __all__ = [
10 |     'LoadMultiViewImagesFromFiles',
11 |     'FormatBundleMap', 'Normalize3D', 'ResizeMultiViewImages', 'PadMultiViewImages',
12 |     'VectorizeMap', 'PolygonizeLocalMapBbox'
13 | ]
14 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/models/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/models/assigner/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/models/assigner/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/models/augmentation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/models/augmentation/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .ipm_backbone import IPMEncoder
2 | 
3 | __all__ = [
4 |     'IPMEncoder'
5 | ]
6 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/models/heads/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/models/heads/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/models/heads/base_map_head.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | import torch.nn as nn
 4 | from mmcv.runner import auto_fp16
 5 | from mmcv.utils import print_log
 6 | from mmdet.utils import get_root_logger
 7 | 
 8 | 
 9 | class BaseMapHead(nn.Module, metaclass=ABCMeta):
10 |     """Base class for mappers."""
11 | 
12 |     def __init__(self):
13 |         super(BaseMapHead, self).__init__()
14 |         self.fp16_enabled = False
15 | 
16 |     def init_weights(self, pretrained=None):
17 |         """Initialize the weights in detector.
18 |         Args:
19 |             pretrained (str, optional): Path to pre-trained weights.
20 |                 Defaults to None.
21 |         """
22 |         if pretrained is not None:
23 |             logger = get_root_logger()
24 |             print_log(f'load model from: {pretrained}', logger=logger)
25 | 
26 |     @auto_fp16(apply_to=('img',))
27 |     def forward(self, *args, **kwargs):
28 |         pass
29 | 
30 |     @abstractmethod
31 |     def loss(self, pred, gt):
32 |         '''
33 |         Compute loss
34 |         Output:
35 |             dict(
36 |                 loss: torch.Tensor
37 |                 log_vars: dict(
38 |                     str: float,
39 |                 )
40 |                 num_samples: int
41 |             )
42 |         '''
43 |         return
44 | 
45 |     @abstractmethod
46 |     def post_process(self, pred):
47 |         '''
48 |         convert model predictions to vectorized outputs
49 |         the output format should be consistent with the evaluation function
50 |         '''
51 |         return
52 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/models/heads/detgen_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/models/heads/detgen_utils/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/models/losses/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/models/losses/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/models/mapers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/models/mapers/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/models/transformer_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/models/transformer_utils/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/src/models/transformer_utils/base_transformer.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from mmcv.cnn.bricks.registry import TRANSFORMER_LAYER_SEQUENCE
 3 | 
 4 | 
 5 | @TRANSFORMER_LAYER_SEQUENCE.register_module()
 6 | class PlaceHolderEncoder(nn.Module):
 7 | 
 8 |     def __init__(self, *args, embed_dims=None, **kwargs):
 9 |         super(PlaceHolderEncoder, self).__init__()
10 |         self.embed_dims = embed_dims
11 | 
12 |     def forward(self, *args, query=None, **kwargs):
13 |         return query
14 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29500}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
11 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-29500}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 | 


--------------------------------------------------------------------------------
/autonomous_driving/Online-HD-Map-Construction/tools/evaluate_submission.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | sys.path.append(os.path.abspath('.'))
 5 | import argparse
 6 | 
 7 | from src.datasets.evaluation.vector_eval import VectorEvaluate
 8 | 
 9 | 
10 | def parse_args():
11 |     parser = argparse.ArgumentParser(
12 |         description='Evaluate a submission file')
13 | 
14 |     parser.add_argument('submission',
15 |                         help='submission file in pickle or json format to be evaluated')
16 | 
17 |     parser.add_argument('gt',
18 |                         help='gt annotation file')
19 | 
20 |     args = parser.parse_args()
21 |     return args
22 | 
23 | 
24 | def main(args):
25 |     evaluator = VectorEvaluate(args.gt, n_workers=0)
26 |     results = evaluator.evaluate(args.submission)
27 | 
28 |     print(results)
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     args = parse_args()
33 |     main(args)
34 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | message: "If you use this software, please cite it as below."
3 | authors:
4 |   - name: "OpenOccupancy Benchmark Contributors"
5 | title: "OpenOccupancy: 3D Occupancy Benchmark for Scene Perception in Autonomous Driving"
6 | date-released: 2023-02-10
7 | url: "https://github.com/CVPR2023-Occupancy-Prediction-Challenge/CVPR2023-Occupancy-Prediction-Challenge"
8 | license: Apache-2.0
9 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/occupancy_prediction/projects/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | checkpoint_config = dict(interval=1)
 2 | # yapf:disable push
 3 | # By default we use textlogger hook and tensorboard
 4 | # For more loggers see
 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook
 6 | log_config = dict(
 7 |     interval=50,
 8 |     hooks=[
 9 |         dict(type='TextLoggerHook'),
10 |         dict(type='TensorboardLoggerHook')
11 |     ])
12 | # yapf:enable
13 | dist_params = dict(backend='nccl')
14 | log_level = 'INFO'
15 | work_dir = None
16 | load_from = None
17 | resume_from = None
18 | workflow = [('train', 1)]
19 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py:
--------------------------------------------------------------------------------
 1 | _base_ = './hv_pointpillars_fpn_nus.py'
 2 | 
 3 | # model settings (based on nuScenes model settings)
 4 | # Voxel size for voxel encoder
 5 | # Usually voxel size is changed consistently with the point cloud range
 6 | # If point cloud range is modified, do remember to change all related
 7 | # keys in the config.
 8 | model = dict(
 9 |     pts_voxel_layer=dict(
10 |         max_num_points=20,
11 |         point_cloud_range=[-80, -80, -5, 80, 80, 3],
12 |         max_voxels=(60000, 60000)),
13 |     pts_voxel_encoder=dict(
14 |         feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]),
15 |     pts_middle_encoder=dict(output_shape=[640, 640]),
16 |     pts_bbox_head=dict(
17 |         num_classes=9,
18 |         anchor_generator=dict(
19 |             ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]),
20 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
21 |     # model training settings (based on nuScenes model settings)
22 |     train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
23 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py:
--------------------------------------------------------------------------------
 1 | _base_ = './hv_pointpillars_fpn_nus.py'
 2 | 
 3 | # model settings (based on nuScenes model settings)
 4 | # Voxel size for voxel encoder
 5 | # Usually voxel size is changed consistently with the point cloud range
 6 | # If point cloud range is modified, do remember to change all related
 7 | # keys in the config.
 8 | model = dict(
 9 |     pts_voxel_layer=dict(
10 |         max_num_points=20,
11 |         point_cloud_range=[-100, -100, -5, 100, 100, 3],
12 |         max_voxels=(60000, 60000)),
13 |     pts_voxel_encoder=dict(
14 |         feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]),
15 |     pts_middle_encoder=dict(output_shape=[800, 800]),
16 |     pts_bbox_head=dict(
17 |         num_classes=9,
18 |         anchor_generator=dict(
19 |             ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]),
20 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
21 |     # model training settings (based on nuScenes model settings)
22 |     train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
23 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/configs/_base_/models/paconv_cuda_ssg.py:
--------------------------------------------------------------------------------
1 | _base_ = './paconv_ssg.py'
2 | 
3 | model = dict(
4 |     backbone=dict(
5 |         sa_cfg=dict(
6 |             type='PAConvCUDASAModule',
7 |             scorenet_cfg=dict(mlp_channels=[8, 16, 16]))))
8 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/configs/_base_/models/pointnet2_msg.py:
--------------------------------------------------------------------------------
 1 | _base_ = './pointnet2_ssg.py'
 2 | 
 3 | # model settings
 4 | model = dict(
 5 |     backbone=dict(
 6 |         _delete_=True,
 7 |         type='PointNet2SAMSG',
 8 |         in_channels=6,  # [xyz, rgb], should be modified with dataset
 9 |         num_points=(1024, 256, 64, 16),
10 |         radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)),
11 |         num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),
12 |         sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,
13 |                                                                     128)),
14 |                      ((128, 196, 256), (128, 196, 256)), ((256, 256, 512),
15 |                                                           (256, 384, 512))),
16 |         aggregation_channels=(None, None, None, None),
17 |         fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),
18 |         fps_sample_range_lists=((-1), (-1), (-1), (-1)),
19 |         dilated_group=(False, False, False, False),
20 |         out_indices=(0, 1, 2, 3),
21 |         sa_cfg=dict(
22 |             type='PointSAModuleMSG',
23 |             pool_mod='max',
24 |             use_xyz=True,
25 |             normalize_xyz=False)),
26 |     decode_head=dict(
27 |         fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128),
28 |                      (128, 128, 128, 128))))
29 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/configs/_base_/models/pointnet2_ssg.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='EncoderDecoder3D',
 4 |     backbone=dict(
 5 |         type='PointNet2SASSG',
 6 |         in_channels=6,  # [xyz, rgb], should be modified with dataset
 7 |         num_points=(1024, 256, 64, 16),
 8 |         radius=(0.1, 0.2, 0.4, 0.8),
 9 |         num_samples=(32, 32, 32, 32),
10 |         sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
11 |                                                                     512)),
12 |         fp_channels=(),
13 |         norm_cfg=dict(type='BN2d'),
14 |         sa_cfg=dict(
15 |             type='PointSAModule',
16 |             pool_mod='max',
17 |             use_xyz=True,
18 |             normalize_xyz=False)),
19 |     decode_head=dict(
20 |         type='PointNet2Head',
21 |         fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
22 |                      (128, 128, 128, 128)),
23 |         channels=128,
24 |         dropout_ratio=0.5,
25 |         conv_cfg=dict(type='Conv1d'),
26 |         norm_cfg=dict(type='BN1d'),
27 |         act_cfg=dict(type='ReLU'),
28 |         loss_decode=dict(
29 |             type='CrossEntropyLoss',
30 |             use_sigmoid=False,
31 |             class_weight=None,  # should be modified with dataset
32 |             loss_weight=1.0)),
33 |     # model training and testing settings
34 |     train_cfg=dict(),
35 |     test_cfg=dict(mode='slide'))
36 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/cosine.py:
--------------------------------------------------------------------------------
 1 | # This schedule is mainly used by models with dynamic voxelization
 2 | # optimizer
 3 | lr = 0.003  # max learning rate
 4 | optimizer = dict(
 5 |     type='AdamW',
 6 |     lr=lr,
 7 |     betas=(0.95, 0.99),  # the momentum is change during training
 8 |     weight_decay=0.001)
 9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
10 | 
11 | lr_config = dict(
12 |     policy='CosineAnnealing',
13 |     warmup='linear',
14 |     warmup_iters=1000,
15 |     warmup_ratio=1.0 / 10,
16 |     min_lr_ratio=1e-5)
17 | 
18 | momentum_config = None
19 | 
20 | runner = dict(type='EpochBasedRunner', max_epochs=40)
21 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/cyclic_20e.py:
--------------------------------------------------------------------------------
 1 | # For nuScenes dataset, we usually evaluate the model at the end of training.
 2 | # Since the models are trained by 24 epochs by default, we set evaluation
 3 | # interval to be 20. Please change the interval accordingly if you do not
 4 | # use a default schedule.
 5 | # optimizer
 6 | # This schedule is mainly used by models on nuScenes dataset
 7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01)
 8 | # max_norm=10 is better for SECOND
 9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
10 | lr_config = dict(
11 |     policy='cyclic',
12 |     target_ratio=(10, 1e-4),
13 |     cyclic_times=1,
14 |     step_ratio_up=0.4,
15 | )
16 | momentum_config = dict(
17 |     policy='cyclic',
18 |     target_ratio=(0.85 / 0.95, 1),
19 |     cyclic_times=1,
20 |     step_ratio_up=0.4,
21 | )
22 | 
23 | # runtime settings
24 | runner = dict(type='EpochBasedRunner', max_epochs=20)
25 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/cyclic_40e.py:
--------------------------------------------------------------------------------
 1 | # The schedule is usually used by models trained on KITTI dataset
 2 | 
 3 | # The learning rate set in the cyclic schedule is the initial learning rate
 4 | # rather than the max learning rate. Since the target_ratio is (10, 1e-4),
 5 | # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
 6 | lr = 0.0018
 7 | # The optimizer follows the setting in SECOND.Pytorch, but here we use
 8 | # the offcial AdamW optimizer implemented by PyTorch.
 9 | optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
10 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
11 | # We use cyclic learning rate and momentum schedule following SECOND.Pytorch
12 | # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69  # noqa
13 | # We implement them in mmcv, for more details, please refer to
14 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327  # noqa
15 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130  # noqa
16 | lr_config = dict(
17 |     policy='cyclic',
18 |     target_ratio=(10, 1e-4),
19 |     cyclic_times=1,
20 |     step_ratio_up=0.4,
21 | )
22 | momentum_config = dict(
23 |     policy='cyclic',
24 |     target_ratio=(0.85 / 0.95, 1),
25 |     cyclic_times=1,
26 |     step_ratio_up=0.4,
27 | )
28 | # Although the max_epochs is 40, this schedule is usually used we
29 | # RepeatDataset with repeat ratio N, thus the actual max epoch
30 | # number could be Nx40
31 | runner = dict(type='EpochBasedRunner', max_epochs=40)
32 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/mmdet_schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used by models on nuScenes dataset
 3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
 4 | # max_norm=10 is better for SECOND
 5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 6 | lr_config = dict(
 7 |     policy='step',
 8 |     warmup='linear',
 9 |     warmup_iters=1000,
10 |     warmup_ratio=1.0 / 1000,
11 |     step=[20, 23])
12 | momentum_config = None
13 | # runtime settings
14 | runner = dict(type='EpochBasedRunner', max_epochs=24)
15 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/schedule_3x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used by models on indoor dataset,
 3 | # e.g., VoteNet on SUNRGBD and ScanNet
 4 | lr = 0.008  # max learning rate
 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32])
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=36)
10 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/seg_cosine_150e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on S3DIS dataset in segmentation task
 3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=150)
10 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/seg_cosine_200e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on ScanNet dataset in segmentation task
 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=200)
10 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/seg_cosine_50e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on S3DIS dataset in segmentation task
 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=50)
10 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/__init__.py:
--------------------------------------------------------------------------------
 1 | from .bevformer import *
 2 | from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D
 3 | from .core.bbox.coders.nms_free_coder import NMSFreeCoder
 4 | from .core.bbox.match_costs import BBox3DL1Cost
 5 | from .core.evaluation.eval_hooks import CustomDistEvalHook
 6 | from .datasets.pipelines import (CustomCollect3D, NormalizeMultiviewImage,
 7 |                                  PadMultiViewImage,
 8 |                                  PhotoMetricDistortionMultiViewImage)
 9 | from .models.backbones.vovnet import VoVNet
10 | from .models.opt.adamw import AdamW2
11 | from .models.utils import *
12 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/__init__.py:
--------------------------------------------------------------------------------
1 | from .backbones import *
2 | from .dense_heads import *
3 | from .detectors import *
4 | from .hooks import *
5 | from .modules import *
6 | from .runner import *
7 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/apis/__init__.py:
--------------------------------------------------------------------------------
1 | from .mmdet_train import custom_train_detector
2 | from .train import custom_train_model
3 | 
4 | # from .test import custom_multi_gpu_test
5 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .custom_layer_decay_optimizer_constructor import \
2 |     CustomLayerDecayOptimizerConstructor
3 | from .internimage import InternImage
4 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/backbones/ops_dcnv3/functions/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # InternImage
3 | # Copyright (c) 2022 OpenGVLab
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # --------------------------------------------------------
6 | 
7 | from .dcnv3_func import DCNv3Function, dcnv3_core_pytorch
8 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/backbones/ops_dcnv3/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # --------------------------------------------------------
3 | # InternImage
4 | # Copyright (c) 2022 OpenGVLab
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 | 
8 | python setup.py build install
9 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/backbones/ops_dcnv3/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # InternImage
3 | # Copyright (c) 2022 OpenGVLab
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # --------------------------------------------------------
6 | 
7 | from .dcnv3 import DCNv3, DCNv3_pytorch
8 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/backbones/ops_dcnv3/src/vision.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * InternImage
 4 | * Copyright (c) 2022 OpenGVLab
 5 | * Licensed under The MIT License [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from
 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 9 | **************************************************************************************************
10 | */
11 | 
12 | #include "dcnv3.h"
13 | 
14 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
15 |     m.def("dcnv3_forward", &dcnv3_forward, "dcnv3_forward");
16 |     m.def("dcnv3_backward", &dcnv3_backward, "dcnv3_backward");
17 | }
18 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/dense_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .bevformer_occ_head import BEVFormerOccHead
2 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .bevformer_occ import BEVFormerOcc
2 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | from .custom_hooks import TransferWeight
2 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/hooks/custom_hooks.py:
--------------------------------------------------------------------------------
 1 | from mmcv.runner.hooks.hook import HOOKS, Hook
 2 | 
 3 | 
 4 | @HOOKS.register_module()
 5 | class TransferWeight(Hook):
 6 | 
 7 |     def __init__(self, every_n_inters=1):
 8 |         self.every_n_inters = every_n_inters
 9 | 
10 |     def after_train_iter(self, runner):
11 |         if self.every_n_inner_iters(runner, self.every_n_inters):
12 |             runner.eval_model.load_state_dict(runner.model.state_dict())
13 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .decoder import DetectionTransformerDecoder
2 | from .encoder import BEVFormerEncoder, BEVFormerLayer
3 | from .spatial_cross_attention import (MSDeformableAttention3D,
4 |                                       SpatialCrossAttention)
5 | from .temporal_self_attention import TemporalSelfAttention
6 | from .transformer import PerceptionTransformer
7 | from .transformer_occ import TransformerOcc
8 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/runner/__init__.py:
--------------------------------------------------------------------------------
1 | from .epoch_based_runner import EpochBasedRunner_video
2 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | from .hungarian_assigner_3d import HungarianAssigner3D
2 | 
3 | __all__ = ['HungarianAssigner3D']
4 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/core/bbox/coders/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_free_coder import NMSFreeCoder
2 | 
3 | __all__ = ['NMSFreeCoder']
4 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py:
--------------------------------------------------------------------------------
1 | from .match_cost import BBox3DL1Cost
2 | 
3 | __all__ = ['build_match_cost', 'BBox3DL1Cost']
4 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from mmdet.core.bbox.match_costs.builder import MATCH_COST
 3 | 
 4 | 
 5 | @MATCH_COST.register_module()
 6 | class BBox3DL1Cost(object):
 7 |     """BBox3DL1Cost.
 8 |      Args:
 9 |          weight (int | float, optional): loss_weight
10 |     """
11 | 
12 |     def __init__(self, weight=1.):
13 |         self.weight = weight
14 | 
15 |     def __call__(self, bbox_pred, gt_bboxes):
16 |         """
17 |         Args:
18 |             bbox_pred (Tensor): Predicted boxes with normalized coordinates
19 |                 (cx, cy, w, h), which are all in range [0, 1]. Shape
20 |                 [num_query, 4].
21 |             gt_bboxes (Tensor): Ground truth boxes with normalized
22 |                 coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
23 |         Returns:
24 |             torch.Tensor: bbox_cost value with weight
25 |         """
26 |         bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)
27 |         return bbox_cost * self.weight
28 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/core/bbox/util.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def normalize_bbox(bboxes, pc_range):
 5 |     cx = bboxes[..., 0:1]
 6 |     cy = bboxes[..., 1:2]
 7 |     cz = bboxes[..., 2:3]
 8 |     w = bboxes[..., 3:4].log()
 9 |     l = bboxes[..., 4:5].log()
10 |     h = bboxes[..., 5:6].log()
11 | 
12 |     rot = bboxes[..., 6:7]
13 |     if bboxes.size(-1) > 7:
14 |         vx = bboxes[..., 7:8]
15 |         vy = bboxes[..., 8:9]
16 |         normalized_bboxes = torch.cat(
17 |             (cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1
18 |         )
19 |     else:
20 |         normalized_bboxes = torch.cat(
21 |             (cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1
22 |         )
23 |     return normalized_bboxes
24 | 
25 | 
26 | def denormalize_bbox(normalized_bboxes, pc_range):
27 |     # rotation
28 |     rot_sine = normalized_bboxes[..., 6:7]
29 | 
30 |     rot_cosine = normalized_bboxes[..., 7:8]
31 |     rot = torch.atan2(rot_sine, rot_cosine)
32 | 
33 |     # center in the bev
34 |     cx = normalized_bboxes[..., 0:1]
35 |     cy = normalized_bboxes[..., 1:2]
36 |     cz = normalized_bboxes[..., 4:5]
37 | 
38 |     # size
39 |     w = normalized_bboxes[..., 2:3]
40 |     l = normalized_bboxes[..., 3:4]
41 |     h = normalized_bboxes[..., 5:6]
42 | 
43 |     w = w.exp()
44 |     l = l.exp()
45 |     h = h.exp()
46 |     if normalized_bboxes.size(-1) > 8:
47 |         # velocity
48 |         vx = normalized_bboxes[:, 8:9]
49 |         vy = normalized_bboxes[:, 9:10]
50 |         denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1)
51 |     else:
52 |         denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1)
53 |     return denormalized_bboxes
54 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .eval_hooks import CustomDistEvalHook
2 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import custom_build_dataset
2 | from .nuscenes_dataset import CustomNuScenesDataset
3 | from .nuscenes_occ import NuSceneOcc
4 | 
5 | __all__ = [
6 |     'CustomNuScenesDataset'
7 | ]
8 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | from .formating import CustomDefaultFormatBundle3D
 2 | from .loading import LoadOccGTFromFile
 3 | from .transform_3d import (CustomCollect3D, NormalizeMultiviewImage,
 4 |                            PadMultiViewImage,
 5 |                            PhotoMetricDistortionMultiViewImage,
 6 |                            RandomScaleImageMultiViewImage)
 7 | 
 8 | __all__ = [
 9 |     'PadMultiViewImage', 'NormalizeMultiviewImage',
10 |     'PhotoMetricDistortionMultiViewImage', 'CustomDefaultFormatBundle3D', 'CustomCollect3D',
11 |     'RandomScaleImageMultiViewImage'
12 | ]
13 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/pipelines/formating.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.parallel import DataContainer as DC
 3 | from mmdet3d.datasets.pipelines import DefaultFormatBundle3D
 4 | from mmdet.datasets.builder import PIPELINES
 5 | from mmdet.datasets.pipelines import to_tensor
 6 | 
 7 | 
 8 | @PIPELINES.register_module()
 9 | class CustomDefaultFormatBundle3D(DefaultFormatBundle3D):
10 |     """Default formatting bundle.
11 |     It simplifies the pipeline of formatting common fields for voxels,
12 |     including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and
13 |     "gt_semantic_seg".
14 |     These fields are formatted as follows.
15 |     - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
16 |     - proposals: (1)to tensor, (2)to DataContainer
17 |     - gt_bboxes: (1)to tensor, (2)to DataContainer
18 |     - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
19 |     - gt_labels: (1)to tensor, (2)to DataContainer
20 |     """
21 | 
22 |     def __call__(self, results):
23 |         """Call function to transform and format common fields in results.
24 |         Args:
25 |             results (dict): Result dict contains the data to convert.
26 |         Returns:
27 |             dict: The result dict contains the data that is formatted with
28 |                 default bundle.
29 |         """
30 |         # Format 3D data
31 |         results = super(CustomDefaultFormatBundle3D, self).__call__(results)
32 |         results['gt_map_masks'] = DC(
33 |             to_tensor(results['gt_map_masks']), stack=True)
34 | 
35 |         return results
36 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/pipelines/loading.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | from mmdet.datasets.builder import PIPELINES
 5 | 
 6 | 
 7 | @PIPELINES.register_module()
 8 | class LoadOccGTFromFile(object):
 9 |     """Load multi channel images from a list of separate channel files.
10 | 
11 |     Expects results['img_filename'] to be a list of filenames.
12 |     note that we read image in BGR style to align with opencv.imread
13 |     Args:
14 |         to_float32 (bool): Whether to convert the img to float32.
15 |             Defaults to False.
16 |         color_type (str): Color type of the file. Defaults to 'unchanged'.
17 |     """
18 | 
19 |     def __init__(
20 |             self,
21 |             data_root,
22 |     ):
23 |         self.data_root = data_root
24 | 
25 |     def __call__(self, results):
26 |         # print(results.keys())
27 |         occ_gt_path = results['occ_gt_path']
28 |         occ_gt_path = os.path.join(self.data_root, occ_gt_path)
29 | 
30 |         occ_labels = np.load(occ_gt_path)
31 |         semantics = occ_labels['semantics']
32 |         mask_lidar = occ_labels['mask_lidar']
33 |         mask_camera = occ_labels['mask_camera']
34 | 
35 |         results['voxel_semantics'] = semantics
36 |         results['mask_lidar'] = mask_lidar
37 |         results['mask_camera'] = mask_camera
38 | 
39 |         return results
40 | 
41 |     def __repr__(self):
42 |         """str: Return a string that describes the module."""
43 |         return "{} (data_root={}')".format(
44 |             self.__class__.__name__, self.data_root)
45 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .distributed_sampler import DistributedSampler
2 | from .group_sampler import DistributedGroupSampler
3 | from .sampler import SAMPLER, build_sampler
4 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | from torch.utils.data import DistributedSampler as _DistributedSampler
 5 | 
 6 | from .sampler import SAMPLER
 7 | 
 8 | 
 9 | @SAMPLER.register_module()
10 | class DistributedSampler(_DistributedSampler):
11 | 
12 |     def __init__(self,
13 |                  dataset=None,
14 |                  num_replicas=None,
15 |                  rank=None,
16 |                  shuffle=True,
17 |                  seed=0):
18 |         super().__init__(
19 |             dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)
20 |         # for the compatibility from PyTorch 1.3+
21 |         self.seed = seed if seed is not None else 0
22 | 
23 |     def __iter__(self):
24 |         # deterministically shuffle based on epoch
25 |         if self.shuffle:
26 |             assert False
27 |         else:
28 |             indices = torch.arange(len(self.dataset)).tolist()
29 | 
30 |         # add extra samples to make it evenly divisible
31 |         # in case that indices is shorter than half of total_size
32 |         indices = (indices *
33 |                    math.ceil(self.total_size / len(indices)))[:self.total_size]
34 |         assert len(indices) == self.total_size
35 | 
36 |         # subsample
37 |         per_replicas = self.total_size // self.num_replicas
38 |         # indices = indices[self.rank:self.total_size:self.num_replicas]
39 |         indices = indices[self.rank * per_replicas:(self.rank + 1) * per_replicas]
40 |         assert len(indices) == self.num_samples
41 | 
42 |         return iter(indices)
43 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/samplers/sampler.py:
--------------------------------------------------------------------------------
1 | from mmcv.utils.registry import Registry, build_from_cfg
2 | 
3 | SAMPLER = Registry('sampler')
4 | 
5 | 
6 | def build_sampler(cfg, default_args):
7 |     return build_from_cfg(cfg, SAMPLER, default_args)
8 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .vovnet import VoVNet
2 | 
3 | __all__ = ['VoVNet']
4 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | from .hooks import GradChecker
2 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/hooks/hooks.py:
--------------------------------------------------------------------------------
 1 | from mmcv.runner.hooks.hook import HOOKS, Hook
 2 | 
 3 | 
 4 | @HOOKS.register_module()
 5 | class GradChecker(Hook):
 6 | 
 7 |     def after_train_iter(self, runner):
 8 |         for key, val in runner.model.named_parameters():
 9 |             if val.grad is None and val.requires_grad:
10 |                 print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key))
11 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/opt/__init__.py:
--------------------------------------------------------------------------------
1 | from .adamw import AdamW2
2 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .bricks import run_time
2 | from .grid_mask import GridMask
3 | from .position_embedding import RelPositionEmbedding
4 | from .positional_encoding import LearnedPositionalEncoding3D
5 | from .visual import save_tensor
6 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/bricks.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from collections import defaultdict
 3 | 
 4 | import torch
 5 | 
 6 | time_maps = defaultdict(lambda: 0.)
 7 | count_maps = defaultdict(lambda: 0.)
 8 | 
 9 | 
10 | def run_time(name):
11 |     def middle(fn):
12 |         def wrapper(*args, **kwargs):
13 |             torch.cuda.synchronize()
14 |             start = time.time()
15 |             res = fn(*args, **kwargs)
16 |             torch.cuda.synchronize()
17 |             time_maps['%s : %s' % (name, fn.__name__)] += time.time() - start
18 |             count_maps['%s : %s' % (name, fn.__name__)] += 1
19 |             print('%s : %s takes up %f ' % (name, fn.__name__, time_maps['%s : %s' % (name, fn.__name__)] / count_maps[
20 |                 '%s : %s' % (name, fn.__name__)]))
21 |             return res
22 | 
23 |         return wrapper
24 | 
25 |     return middle
26 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/position_embedding.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | class RelPositionEmbedding(nn.Module):
 8 |     def __init__(self, num_pos_feats=64, pos_norm=True):
 9 |         super().__init__()
10 |         self.num_pos_feats = num_pos_feats
11 |         self.fc = nn.Linear(4, self.num_pos_feats, bias=False)
12 |         # nn.init.orthogonal_(self.fc.weight)
13 |         # self.fc.weight.requires_grad = False
14 |         self.pos_norm = pos_norm
15 |         if self.pos_norm:
16 |             self.norm = nn.LayerNorm(self.num_pos_feats)
17 | 
18 |     def forward(self, tensor):
19 |         # mask = nesttensor.mask
20 |         B, C, H, W = tensor.shape
21 |         # print('tensor.shape',  tensor.shape)
22 |         y_range = (torch.arange(H) / float(H - 1)).to(tensor.device)
23 |         # y_axis = torch.stack((y_range, 1-y_range),dim=1)
24 |         y_axis = torch.stack((torch.cos(y_range * math.pi), torch.sin(y_range * math.pi)), dim=1)
25 |         y_axis = y_axis.reshape(H, 1, 2).repeat(1, W, 1).reshape(H * W, 2)
26 | 
27 |         x_range = (torch.arange(W) / float(W - 1)).to(tensor.device)
28 |         # x_axis =torch.stack((x_range,1-x_range),dim=1)
29 |         x_axis = torch.stack((torch.cos(x_range * math.pi), torch.sin(x_range * math.pi)), dim=1)
30 |         x_axis = x_axis.reshape(1, W, 2).repeat(H, 1, 1).reshape(H * W, 2)
31 |         x_pos = torch.cat((y_axis, x_axis), dim=1)
32 |         x_pos = self.fc(x_pos)
33 | 
34 |         if self.pos_norm:
35 |             x_pos = self.norm(x_pos)
36 |         # print('xpos,', x_pos.max(),x_pos.min())
37 |         return x_pos
38 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/visual.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import matplotlib.pyplot as plt
 3 | import torch
 4 | import torchvision
 5 | from torchvision.utils import make_grid
 6 | 
 7 | 
 8 | def convert_color(img_path):
 9 |     plt.figure()
10 |     img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
11 |     plt.imsave(img_path, img, cmap=plt.get_cmap('viridis'))
12 |     plt.close()
13 | 
14 | 
15 | def save_tensor(tensor, path, pad_value=254.0, ):
16 |     print('save_tensor', path)
17 |     tensor = tensor.to(torch.float).detach().cpu()
18 |     if tensor.type() == 'torch.BoolTensor':
19 |         tensor = tensor * 255
20 |     if len(tensor.shape) == 3:
21 |         tensor = tensor.unsqueeze(1)
22 |     tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy()
23 |     torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path)
24 |     convert_color(path)
25 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/tools/analysis_tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/occupancy_prediction/tools/analysis_tools/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/tools/analysis_tools/get_params.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | file_path = './ckpts/bevformer_v4.pth'
 4 | model = torch.load(file_path, map_location='cpu')
 5 | all = 0
 6 | for key in list(model['state_dict'].keys()):
 7 |     all += model['state_dict'][key].nelement()
 8 | print(all)
 9 | 
10 | # smaller 63374123
11 | # v4 69140395
12 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/tools/data_converter/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29503}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} --eval bbox
11 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | NNODES=${NNODES:-1}
 6 | NODE_RANK=${NODE_RANK:-0}
 7 | PORT=${PORT:-29500}
 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 9 | 
10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
11 | python -m torch.distributed.launch \
12 |     --nnodes=$NNODES \
13 |     --node_rank=$NODE_RANK \
14 |     --master_addr=$MASTER_ADDR \
15 |     --nproc_per_node=$GPUS \
16 |     --master_port=$PORT \
17 |     $(dirname "$0")/train.py \
18 |     $CONFIG \
19 |    --deterministic \
20 |     --launcher pytorch ${@:3}
21 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/tools/fp16/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-28508}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic
10 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/tools/misc/print_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | 
 4 | from mmcv import Config, DictAction
 5 | 
 6 | 
 7 | def parse_args():
 8 |     parser = argparse.ArgumentParser(description='Print the whole config')
 9 |     parser.add_argument('config', help='config file path')
10 |     parser.add_argument(
11 |         '--options', nargs='+', action=DictAction, help='arguments in dict')
12 |     args = parser.parse_args()
13 | 
14 |     return args
15 | 
16 | 
17 | def main():
18 |     args = parse_args()
19 | 
20 |     cfg = Config.fromfile(args.config)
21 |     if args.options is not None:
22 |         cfg.merge_from_dict(args.options)
23 |     print(f'Config:\n{cfg.pretty_text}')
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     main()
28 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/tools/misc/visualize_results.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | 
 4 | import mmcv
 5 | from mmcv import Config
 6 | from mmdet3d.datasets import build_dataset
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(
11 |         description='MMDet3D visualize the results')
12 |     parser.add_argument('config', help='test config file path')
13 |     parser.add_argument('--result', help='results file in pickle format')
14 |     parser.add_argument(
15 |         '--show-dir', help='directory where visualize results will be saved')
16 |     args = parser.parse_args()
17 | 
18 |     return args
19 | 
20 | 
21 | def main():
22 |     args = parse_args()
23 | 
24 |     if args.result is not None and \
25 |             not args.result.endswith(('.pkl', '.pickle')):
26 |         raise ValueError('The results file must be a pkl file.')
27 | 
28 |     cfg = Config.fromfile(args.config)
29 |     cfg.data.test.test_mode = True
30 | 
31 |     # build the dataset
32 |     dataset = build_dataset(cfg.data.test)
33 |     results = mmcv.load(args.result)
34 | 
35 |     if getattr(dataset, 'show', None) is not None:
36 |         # data loading pipeline for showing
37 |         eval_pipeline = cfg.get('eval_pipeline', {})
38 |         if eval_pipeline:
39 |             dataset.show(results, args.show_dir, pipeline=eval_pipeline)
40 |         else:
41 |             dataset.show(results, args.show_dir)  # use default pipeline
42 |     else:
43 |         raise NotImplementedError(
44 |             'Show is not implemented for dataset {}!'.format(
45 |                 type(dataset).__name__))
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     main()
50 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/tools/model_converters/publish_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import subprocess
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | def parse_args():
 9 |     parser = argparse.ArgumentParser(
10 |         description='Process a checkpoint to be published')
11 |     parser.add_argument('in_file', help='input checkpoint filename')
12 |     parser.add_argument('out_file', help='output checkpoint filename')
13 |     args = parser.parse_args()
14 |     return args
15 | 
16 | 
17 | def process_checkpoint(in_file, out_file):
18 |     checkpoint = torch.load(in_file, map_location='cpu')
19 |     # remove optimizer for smaller file size
20 |     if 'optimizer' in checkpoint:
21 |         del checkpoint['optimizer']
22 |     # if it is necessary to remove some sensitive data in checkpoint['meta'],
23 |     # add the code here.
24 |     torch.save(checkpoint, out_file)
25 |     sha = subprocess.check_output(['sha256sum', out_file]).decode()
26 |     final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
27 |     subprocess.Popen(['mv', out_file, final_file])
28 | 
29 | 
30 | def main():
31 |     args = parse_args()
32 |     process_checkpoint(args.in_file, args.out_file)
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     main()
37 | 


--------------------------------------------------------------------------------
/autonomous_driving/occupancy_prediction/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | 
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${@:5}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/CITATION:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - name: "OpenLane-V2 Dataset Contributors"
 5 | title: "OpenLane-V2: The world's First Perception and Reasoning Benchmark for Scene Structure in Autonomous Driving"
 6 | date-released: 2023-01-19
 7 | url: "https://github.com/OpenDriveLab/OpenLane-V2"
 8 | type: dataset
 9 | license: Apache-2.0
10 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/data/OpenLane-V2/openlanev2.md5:
--------------------------------------------------------------------------------
 1 | 21c607fa5a1930275b7f1409b25042a0  OpenLane-V2_sample.tar
 2 | 8ade7daeec1b64f8ab91a50c81d812f6  OpenLane-V2_subset_A_image_0.tar
 3 | c78e776f79e2394d2d5d95b7b5985e0f  OpenLane-V2_subset_A_image_1.tar
 4 | 4bf09079144aa54cb4dcd5ff6e00cf79  OpenLane-V2_subset_A_image_2.tar
 5 | fd9e64345445975f462213b209632aee  OpenLane-V2_subset_A_image_3.tar
 6 | ae07e48c88ea2c3f6afbdf5ff71e9821  OpenLane-V2_subset_A_image_4.tar
 7 | df62c1f6e6b3fb2a2a0868c78ab19c92  OpenLane-V2_subset_A_image_5.tar
 8 | 7bff1ce30329235f8e0f25f6f6653b8f  OpenLane-V2_subset_A_image_6.tar
 9 | c73af4a7aef2692b96e4e00795120504  OpenLane-V2_subset_A_image_7.tar
10 | fb2f61e7309e0b48e2697e085a66a259  OpenLane-V2_subset_A_image_8.tar
11 | 95bf28ccf22583d20434d75800be065d  OpenLane-V2_subset_A_info.tar
12 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/data/OpenLane-V2/preprocess.py:
--------------------------------------------------------------------------------
 1 | # ==============================================================================
 2 | # Binaries and/or source for the following packages or projects
 3 | # are presented under one or more of the following open source licenses:
 4 | # preprocess.py    The OpenLane-V2 Dataset Authors    Apache License, Version 2.0
 5 | #
 6 | # Contact wanghuijie@pjlab.org.cn if you have any issue.
 7 | #
 8 | # Copyright (c) 2023 The OpenLane-v2 Dataset Authors. All Rights Reserved.
 9 | #
10 | # Licensed under the Apache License, Version 2.0 (the "License");
11 | # you may not use this file except in compliance with the License.
12 | # You may obtain a copy of the License at
13 | #
14 | # http://www.apache.org/licenses/LICENSE-2.0
15 | #
16 | # Unless required by applicable law or agreed to in writing, software
17 | # distributed under the License is distributed on an "AS IS" BASIS,
18 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | # See the License for the specific language governing permissions and
20 | # limitations under the License.
21 | # =============================================================================='
22 | 
23 | from openlanev2.io import io
24 | from openlanev2.preprocessing import collect
25 | 
26 | root_path = './OpenLane-V2'
27 | for file in io.os_listdir(root_path):
28 |     if file.endswith('json'):
29 |         subset = file.split('.')[0]
30 |         for split, segments in io.json_load(f'{root_path}/{file}').items():
31 |             point_interval = 1 if split == 'train' else 20
32 |             collect(root_path, {split: segments}, f'{subset}_{split}', point_interval=point_interval)
33 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/docs/annotation.md:
--------------------------------------------------------------------------------
 1 | # Annotation
 2 | 
 3 | ## Criterion
 4 | 
 5 | The road structure cognition task is defined as inputting the surrounding view images, reconstructing the high-precision
 6 | map of the self-vehicle, and outputting the recognition result of the direction of the self-vehicle. The specific
 7 | expansion is to input the surrounding view images of the vehicle, HDMap; the output is the lane centerlines, the traffic
 8 | signs, the topology of the lane centerlines, and the correspondence between lanes centerlines and traffic signs. Below
 9 | are examples of visualizing annotations and relationships between different elements on 2D images.
10 | 
11 | ![image](https://user-images.githubusercontent.com/47048022/209953048-f8ded0da-6005-45b7-8e3d-501dbd422058.png)
12 | ![image](https://user-images.githubusercontent.com/47048022/209954207-7b8a1b5a-8243-41d5-91fe-f2de5949107e.png)
13 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/docs/statistics.md:
--------------------------------------------------------------------------------
 1 | # Statistics
 2 | 
 3 | ## `subset_A`
 4 | 
 5 | ### Temporal Consistency
 6 | 
 7 | ![image](https://user-images.githubusercontent.com/29263416/228440318-f24136e5-7a26-4b28-bb74-6a448c900756.png)
 8 | 
 9 | ### Instance Distribution
10 | 
11 | ![image](https://user-images.githubusercontent.com/29263416/228441160-19d399c8-548c-4bef-8909-06ffcd0c027b.png)
12 | 
13 | ### Centerline Property
14 | 
15 | ![image](https://user-images.githubusercontent.com/29263416/228442761-5895e5b4-6d3a-4b90-8dbb-4ecfab98190e.png)
16 | 
17 | ### Topology Distribution
18 | 
19 | ![image](https://user-images.githubusercontent.com/29263416/228443434-a74085dc-28f8-400d-99a2-f0c67b49bf66.png)
20 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/docs/submission.md:
--------------------------------------------------------------------------------
 1 | # Submission
 2 | 
 3 | ## Format
 4 | 
 5 | The submitted results are required to be stored in a pickle file, which is a dict of identifier
 6 | and [formatted predictions](../data/README.md#annotations) of a frame:
 7 | 
 8 | ```
 9 | {
10 |     'method':                               <str> -- name of the method
11 |     'authors':                              <list> -- list of str, authors
12 |     'e-mail':                               <str> -- e-mail address
13 |     'institution / company':                <str> -- institution or company
14 |     'country / region':                     <str> -- country or region, checked by iso3166*
15 |     'results': {
16 |         [identifier]: {                     <tuple> -- identifier of the frame, (split, segment_id, timestamp)
17 |             'lane_centerline':              ...
18 |             'traffic_element':              ...
19 |             'topology_lclc':                ...
20 |             'topology_lcte':                ...
21 |         },
22 |         ...
23 |     }
24 | }
25 | ```
26 | 
27 | \*: For validation, `from iso3166 import countries; countries.get(str)` can be used.
28 | 
29 | ## Steps
30 | 
31 | 1. Create a team on [EvalAI](https://eval.ai/web/challenges/challenge-page/1925).
32 | 2. Click the 'Participate' tag, then choose a team for participation.
33 | 3. Choose the phase 'Test Phase (CVPR 2023 Autonomous Driving Challenge)' and upload the file formatted as mentioned
34 |    above.
35 | 4. Check if the submitted file is valid, which is indicated by the 'Status' under the tag of 'My Submissions'. A valid
36 |    submission would provide performance scores.
37 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/imgs/lane.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/imgs/lane.gif


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/imgs/poster.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/imgs/poster.gif


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/imgs/topology.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/imgs/topology.gif


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/imgs/traffic_element.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/imgs/traffic_element.gif


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/openlanev2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/openlanev2/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/openlanev2/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/openlanev2/dataset/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/openlanev2/evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/openlanev2/evaluation/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/openlanev2/io/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/openlanev2/io/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/openlanev2/preprocessing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/openlanev2/preprocessing/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/openlanev2/utils.py:
--------------------------------------------------------------------------------
 1 | # ==============================================================================
 2 | # Binaries and/or source for the following packages or projects
 3 | # are presented under one or more of the following open source licenses:
 4 | # utils.py    The OpenLane-V2 Dataset Authors    Apache License, Version 2.0
 5 | #
 6 | # Contact wanghuijie@pjlab.org.cn if you have any issue.
 7 | #
 8 | # Copyright (c) 2023 The OpenLane-v2 Dataset Authors. All Rights Reserved.
 9 | #
10 | # Licensed under the Apache License, Version 2.0 (the "License");
11 | # you may not use this file except in compliance with the License.
12 | # You may obtain a copy of the License at
13 | #
14 | # http://www.apache.org/licenses/LICENSE-2.0
15 | #
16 | # Unless required by applicable law or agreed to in writing, software
17 | # distributed under the License is distributed on an "AS IS" BASIS,
18 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | # See the License for the specific language governing permissions and
20 | # limitations under the License.
21 | # ==============================================================================
22 | 
23 | TRAFFIC_ELEMENT_ATTRIBUTE = {
24 |     'unknown': 0,
25 |     'red': 1,
26 |     'green': 2,
27 |     'yellow': 3,
28 |     'go_straight': 4,
29 |     'turn_left': 5,
30 |     'turn_right': 6,
31 |     'no_left_turn': 7,
32 |     'no_right_turn': 8,
33 |     'u_turn': 9,
34 |     'no_u_turn': 10,
35 |     'slight_left': 11,
36 |     'slight_right': 12,
37 | }
38 | 
39 | 
40 | def format_metric(metric):
41 |     for key, val in metric.items():
42 |         print(f'{key} - {val["score"]}')
43 |         for k, v in val.items():
44 |             if 'score' not in k:
45 |                 print(f'    {k} - {v}')
46 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/openlanev2/visualization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/openlanev2/visualization/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/core/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/core/bbox/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/core/bbox/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/core/bbox/match_costs.py:
--------------------------------------------------------------------------------
 1 | # ==============================================================================
 2 | # Binaries and/or source for the following packages or projects
 3 | # are presented under one or more of the following open source licenses:
 4 | # match_costs.py    The OpenLane-V2 Dataset Authors    Apache License, Version 2.0
 5 | #
 6 | # Contact wanghuijie@pjlab.org.cn if you have any issue.
 7 | #
 8 | # Copyright (c) 2023 The OpenLane-v2 Dataset Authors. All Rights Reserved.
 9 | #
10 | # Licensed under the Apache License, Version 2.0 (the "License");
11 | # you may not use this file except in compliance with the License.
12 | # You may obtain a copy of the License at
13 | #
14 | # http://www.apache.org/licenses/LICENSE-2.0
15 | #
16 | # Unless required by applicable law or agreed to in writing, software
17 | # distributed under the License is distributed on an "AS IS" BASIS,
18 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | # See the License for the specific language governing permissions and
20 | # limitations under the License.
21 | # ==============================================================================
22 | 
23 | import torch
24 | from mmdet.core.bbox.match_costs.builder import MATCH_COST
25 | 
26 | 
27 | @MATCH_COST.register_module()
28 | class LaneL1Cost:
29 |     r"""
30 |     Notes
31 |     -----
32 |     Adapted from https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/match_costs/match_cost.py#L11.
33 | 
34 |     """
35 | 
36 |     def __init__(self, weight=1.):
37 |         self.weight = weight
38 | 
39 |     def __call__(self, lane_pred, gt_lanes):
40 |         lane_cost = torch.cdist(lane_pred, gt_lanes, p=1)
41 |         return lane_cost * self.weight
42 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/datasets/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/datasets/pipelines/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .intern_image import InternImage
2 | 
3 | __all__ = ['InternImage']
4 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/backbones/ops_dcnv3/functions/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # InternImage
3 | # Copyright (c) 2022 OpenGVLab
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # --------------------------------------------------------
6 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/backbones/ops_dcnv3/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # --------------------------------------------------------
3 | # InternImage
4 | # Copyright (c) 2022 OpenGVLab
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 | 
8 | python setup.py build install
9 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/backbones/ops_dcnv3/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # InternImage
3 | # Copyright (c) 2022 OpenGVLab
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # --------------------------------------------------------
6 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/backbones/ops_dcnv3/src/vision.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * InternImage
 4 | * Copyright (c) 2022 OpenGVLab
 5 | * Licensed under The MIT License [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from
 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 9 | **************************************************************************************************
10 | */
11 | 
12 | #include "dcnv3.h"
13 | 
14 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
15 |     m.def("dcnv3_forward", &dcnv3_forward, "dcnv3_forward");
16 |     m.def("dcnv3_backward", &dcnv3_backward, "dcnv3_backward");
17 | }
18 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/detectors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/detectors/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/heads/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/heads/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/modules/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/necks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/necks/__init__.py


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/requirements.txt:
--------------------------------------------------------------------------------
 1 | chardet
 2 | iso3166
 3 | jupyter
 4 | matplotlib
 5 | ninja
 6 | numpy >=1.22.0, <1.24.0
 7 | opencv-python
 8 | openmim
 9 | ortools ==9.2.9972
10 | scikit-learn
11 | scipy ==1.8.0
12 | similaritymeasures
13 | tqdm
14 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/setup.py:
--------------------------------------------------------------------------------
 1 | # ==============================================================================
 2 | # Binaries and/or source for the following packages or projects
 3 | # are presented under one or more of the following open source licenses:
 4 | # setup.py    The OpenLane-V2 Dataset Authors    Apache License, Version 2.0
 5 | #
 6 | # Contact wanghuijie@pjlab.org.cn if you have any issue.
 7 | #
 8 | # Copyright (c) 2023 The OpenLane-v2 Dataset Authors. All Rights Reserved.
 9 | #
10 | # Licensed under the Apache License, Version 2.0 (the "License");
11 | # you may not use this file except in compliance with the License.
12 | # You may obtain a copy of the License at
13 | #
14 | # http://www.apache.org/licenses/LICENSE-2.0
15 | #
16 | # Unless required by applicable law or agreed to in writing, software
17 | # distributed under the License is distributed on an "AS IS" BASIS,
18 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | # See the License for the specific language governing permissions and
20 | # limitations under the License.
21 | # ==============================================================================
22 | 
23 | from setuptools import find_packages, setup
24 | 
25 | setup(
26 |     name='openlanev2',
27 |     version='0.1.0',
28 |     author='The OpenLane-V2 Dataset Authors',
29 |     author_email='wanghuijie@pjlab.org.cn',
30 |     description='The official devkit of the OpenLane-V2 dataset.',
31 |     url='https://github.com/OpenDriveLab/OpenLane-V2',
32 |     packages=find_packages(),
33 |     license='Apache License 2.0',
34 | )
35 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/tools/create_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | export PYTHONPATH=`pwd`:$PYTHONPATH
 5 | 
 6 | PARTITION=$1
 7 | JOB_NAME=$2
 8 | DATASET=$3
 9 | GPUS=${GPUS:-1}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-1}
11 | SRUN_ARGS=${SRUN_ARGS:-""}
12 | JOB_NAME=create_data
13 | 
14 | srun -p ${PARTITION} \
15 |     --job-name=${JOB_NAME} \
16 |     --gres=gpu:${GPUS_PER_NODE} \
17 |     --ntasks=${GPUS} \
18 |     --ntasks-per-node=${GPUS_PER_NODE} \
19 |     --kill-on-bad-exit=1 \
20 |     ${SRUN_ARGS} \
21 |     python -u tools/create_data.py ${DATASET} \
22 |             --root-path ./data/${DATASET} \
23 |             --out-dir ./data/${DATASET} \
24 |             --extra-tag ${DATASET}
25 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/tools/data_converter/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/tools/data_converter/lyft_data_fixer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import os
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | def fix_lyft(root_folder='./data/lyft', version='v1.01'):
 9 |     # refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000  # noqa
10 |     lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin'
11 |     root_folder = os.path.join(root_folder, f'{version}-train')
12 |     lidar_path = os.path.join(root_folder, lidar_path)
13 |     assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \
14 |                                        f'dataset and make sure {lidar_path} is present.'
15 |     points = np.fromfile(lidar_path, dtype=np.float32, count=-1)
16 |     try:
17 |         points.reshape([-1, 5])
18 |         print(f'This fix is not required for version {version}.')
19 |     except ValueError:
20 |         new_points = np.array(list(points) + [100.0, 1.0], dtype='float32')
21 |         new_points.tofile(lidar_path)
22 |         print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.')
23 | 
24 | 
25 | parser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser')
26 | parser.add_argument(
27 |     '--root-folder',
28 |     type=str,
29 |     default='./data/lyft',
30 |     help='specify the root path of Lyft dataset')
31 | parser.add_argument(
32 |     '--version',
33 |     type=str,
34 |     default='v1.01',
35 |     help='specify Lyft dataset version')
36 | args = parser.parse_args()
37 | 
38 | if __name__ == '__main__':
39 |     fix_lyft(root_folder=args.root_folder, version=args.version)
40 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | NNODES=${NNODES:-1}
 7 | NODE_RANK=${NODE_RANK:-0}
 8 | PORT=${PORT:-29500}
 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
10 | 
11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
12 | python -m torch.distributed.launch \
13 |     --nnodes=$NNODES \
14 |     --node_rank=$NODE_RANK \
15 |     --master_addr=$MASTER_ADDR \
16 |     --nproc_per_node=$GPUS \
17 |     --master_port=$PORT \
18 |     $(dirname "$0")/test.py \
19 |     $CONFIG \
20 |     $CHECKPOINT \
21 |     --launcher pytorch \
22 |     ${@:4}
23 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | NNODES=${NNODES:-1}
 6 | NODE_RANK=${NODE_RANK:-0}
 7 | PORT=${PORT:-29500}
 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 9 | 
10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
11 | python -m torch.distributed.launch \
12 |     --nnodes=$NNODES \
13 |     --node_rank=$NODE_RANK \
14 |     --master_addr=$MASTER_ADDR \
15 |     --nproc_per_node=$GPUS \
16 |     --master_port=$PORT \
17 |     $(dirname "$0")/train.py \
18 |     $CONFIG \
19 |     --seed 0 \
20 |     --launcher pytorch ${@:3}
21 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/tools/misc/print_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | 
 4 | from mmcv import Config, DictAction
 5 | 
 6 | 
 7 | def parse_args():
 8 |     parser = argparse.ArgumentParser(description='Print the whole config')
 9 |     parser.add_argument('config', help='config file path')
10 |     parser.add_argument(
11 |         '--options', nargs='+', action=DictAction, help='arguments in dict')
12 |     args = parser.parse_args()
13 | 
14 |     return args
15 | 
16 | 
17 | def main():
18 |     args = parse_args()
19 | 
20 |     cfg = Config.fromfile(args.config)
21 |     if args.options is not None:
22 |         cfg.merge_from_dict(args.options)
23 |     print(f'Config:\n{cfg.pretty_text}')
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     main()
28 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/tools/misc/visualize_results.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | 
 4 | import mmcv
 5 | from mmcv import Config
 6 | from mmdet3d.datasets import build_dataset
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(
11 |         description='MMDet3D visualize the results')
12 |     parser.add_argument('config', help='test config file path')
13 |     parser.add_argument('--result', help='results file in pickle format')
14 |     parser.add_argument(
15 |         '--show-dir', help='directory where visualize results will be saved')
16 |     args = parser.parse_args()
17 | 
18 |     return args
19 | 
20 | 
21 | def main():
22 |     args = parse_args()
23 | 
24 |     if args.result is not None and \
25 |             not args.result.endswith(('.pkl', '.pickle')):
26 |         raise ValueError('The results file must be a pkl file.')
27 | 
28 |     cfg = Config.fromfile(args.config)
29 |     cfg.data.test.test_mode = True
30 | 
31 |     # build the dataset
32 |     dataset = build_dataset(cfg.data.test)
33 |     results = mmcv.load(args.result)
34 | 
35 |     if getattr(dataset, 'show', None) is not None:
36 |         # data loading pipeline for showing
37 |         eval_pipeline = cfg.get('eval_pipeline', {})
38 |         if eval_pipeline:
39 |             dataset.show(results, args.show_dir, pipeline=eval_pipeline)
40 |         else:
41 |             dataset.show(results, args.show_dir)  # use default pipeline
42 |     else:
43 |         raise NotImplementedError(
44 |             'Show is not implemented for dataset {}!'.format(
45 |                 type(dataset).__name__))
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     main()
50 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/tools/model_converters/publish_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import subprocess
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | def parse_args():
 9 |     parser = argparse.ArgumentParser(
10 |         description='Process a checkpoint to be published')
11 |     parser.add_argument('in_file', help='input checkpoint filename')
12 |     parser.add_argument('out_file', help='output checkpoint filename')
13 |     args = parser.parse_args()
14 |     return args
15 | 
16 | 
17 | def process_checkpoint(in_file, out_file):
18 |     checkpoint = torch.load(in_file, map_location='cpu')
19 |     # remove optimizer for smaller file size
20 |     if 'optimizer' in checkpoint:
21 |         del checkpoint['optimizer']
22 |     # if it is necessary to remove some sensitive data in checkpoint['meta'],
23 |     # add the code here.
24 |     torch.save(checkpoint, out_file)
25 |     sha = subprocess.check_output(['sha256sum', out_file]).decode()
26 |     final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
27 |     subprocess.Popen(['mv', out_file, final_file])
28 | 
29 | 
30 | def main():
31 |     args = parse_args()
32 |     process_checkpoint(args.in_file, args.out_file)
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     main()
37 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | WORK_DIR=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${@:5}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/autonomous_driving/openlane-v2/tools/update_data_coords.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | export PYTHONPATH=`pwd`:$PYTHONPATH
 5 | 
 6 | PARTITION=$1
 7 | DATASET=$2
 8 | GPUS=${GPUS:-1}
 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-1}
10 | SRUN_ARGS=${SRUN_ARGS:-""}
11 | JOB_NAME=update_data_coords
12 | 
13 | srun -p ${PARTITION} \
14 |     --job-name=${JOB_NAME} \
15 |     --gres=gpu:${GPUS_PER_NODE} \
16 |     --ntasks=${GPUS} \
17 |     --ntasks-per-node=${GPUS_PER_NODE} \
18 |     --kill-on-bad-exit=1 \
19 |     ${SRUN_ARGS} \
20 |     python -u tools/update_data_coords.py ${DATASET} \
21 |             --root-dir ./data/${DATASET} \
22 |             --out-dir ./data/${DATASET}
23 | 


--------------------------------------------------------------------------------
/classification/configs/accelerate/deepspeed/ds_config_zero1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": true,
 4 |         "auto_cast": true
 5 |     },
 6 |     "zero_optimization": {
 7 |         "stage": 1,
 8 |         "offload_optimizer": {
 9 |             "device": "none"
10 |         },
11 |         "offload_param": {
12 |             "device": "none"
13 |         }
14 |     },
15 |     "gradient_accumulation_steps": 4,
16 |     "gradient_clipping": 5.0,
17 |     "steps_per_print": "inf",
18 |     "train_batch_size": "auto",
19 |     "train_micro_batch_size_per_gpu": "auto"
20 | }
21 | 


--------------------------------------------------------------------------------
/classification/configs/accelerate/deepspeed/ds_config_zero1_wo_loss_scale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": true,
 4 |         "auto_cast": true,
 5 |         "loss_scale": 1
 6 |     },
 7 |     "zero_optimization": {
 8 |         "stage": 1,
 9 |         "offload_optimizer": {
10 |             "device": "none"
11 |         },
12 |         "offload_param": {
13 |             "device": "none"
14 |         }
15 |     },
16 |     "gradient_accumulation_steps": 4,
17 |     "gradient_clipping": 5.0,
18 |     "steps_per_print": "inf",
19 |     "train_batch_size": "auto",
20 |     "train_micro_batch_size_per_gpu": "auto"
21 | }
22 | 


--------------------------------------------------------------------------------
/classification/configs/accelerate/deepspeed/ds_config_zero3_offload.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": true,
 4 |         "auto_cast": true
 5 |     },
 6 |     "zero_optimization": {
 7 |         "stage": 3,
 8 |         "offload_optimizer": {
 9 |             "device": "cpu"
10 |         },
11 |         "offload_param": {
12 |             "device": "cpu"
13 |         }
14 |     },
15 |     "gradient_accumulation_steps": 4,
16 |     "gradient_clipping": 5.0,
17 |     "steps_per_print": "inf",
18 |     "train_batch_size": "auto",
19 |     "train_micro_batch_size_per_gpu": "auto"
20 | }
21 | 


--------------------------------------------------------------------------------
/classification/configs/accelerate/deepspeed/ds_config_zero3_offload_wo_loss_scale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": true,
 4 |         "auto_cast": true,
 5 |         "loss_scale": 1
 6 |     },
 7 |     "zero_optimization": {
 8 |         "stage": 3,
 9 |         "offload_optimizer": {
10 |             "device": "cpu"
11 |         },
12 |         "offload_param": {
13 |             "device": "cpu"
14 |         }
15 |     },
16 |     "gradient_accumulation_steps": 4,
17 |     "gradient_clipping": 5.0,
18 |     "steps_per_print": "inf",
19 |     "train_batch_size": "auto",
20 |     "train_micro_batch_size_per_gpu": "auto"
21 | }
22 | 


--------------------------------------------------------------------------------
/classification/configs/accelerate/dist_8gpus_ddp_fp16.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | deepspeed_config: {}
 3 | distributed_type: MULTI_GPU
 4 | downcast_bf16: 'no'
 5 | fsdp_config: {}
 6 | machine_rank: 0
 7 | main_process_ip: null
 8 | main_process_port: 11922
 9 | main_training_function: main
10 | mixed_precision: 'fp16'
11 | num_machines: 1
12 | num_processes: 8
13 | use_cpu: false
14 | 


--------------------------------------------------------------------------------
/classification/configs/accelerate/dist_8gpus_zero1.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | deepspeed_config:
 3 |   deepspeed_config_file: configs/accelerate/deepspeed/ds_config_zero1.json
 4 |   zero3_init_flag: false
 5 | distributed_type: DEEPSPEED
 6 | downcast_bf16: 'no'
 7 | machine_rank: 0
 8 | main_training_function: main
 9 | num_machines: 1
10 | num_processes: 8
11 | rdzv_backend: static
12 | use_cpu: false
13 | 


--------------------------------------------------------------------------------
/classification/configs/accelerate/dist_8gpus_zero1_wo_loss_scale.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | deepspeed_config:
 3 |   deepspeed_config_file: configs/accelerate/deepspeed/ds_config_zero1_wo_loss_scale.json
 4 |   zero3_init_flag: false
 5 | distributed_type: DEEPSPEED
 6 | downcast_bf16: 'no'
 7 | machine_rank: 0
 8 | main_training_function: main
 9 | num_machines: 1
10 | num_processes: 8
11 | rdzv_backend: static
12 | use_cpu: false
13 | 


--------------------------------------------------------------------------------
/classification/configs/accelerate/dist_8gpus_zero3_offload.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | deepspeed_config:
 3 |   deepspeed_config_file: configs/accelerate/deepspeed/ds_config_zero3_offload.json
 4 |   zero3_init_flag: false
 5 | distributed_type: DEEPSPEED
 6 | downcast_bf16: 'no'
 7 | machine_rank: 0
 8 | main_training_function: main
 9 | num_machines: 1
10 | num_processes: 8
11 | rdzv_backend: static
12 | use_cpu: false
13 | 


--------------------------------------------------------------------------------
/classification/configs/accelerate/dist_8gpus_zero3_offload_wo_loss_scale.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | deepspeed_config:
 3 |   deepspeed_config_file: configs/accelerate/deepspeed/ds_config_zero3_wo_loss_scale.json
 4 |   zero3_init_flag: false
 5 | distributed_type: DEEPSPEED
 6 | downcast_bf16: 'no'
 7 | machine_rank: 0
 8 | main_training_function: main
 9 | num_machines: 1
10 | num_processes: 8
11 | rdzv_backend: static
12 | use_cpu: false
13 | 


--------------------------------------------------------------------------------
/classification/configs/inaturalist2018/internimage_h_22ktoinat18_384.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   IMG_SIZE: 384
 3 |   IMG_ON_MEMORY: False
 4 |   DATASET: inat18
 5 | AUG:
 6 |   MIXUP: 0.0
 7 |   CUTMIX: 0.0
 8 |   REPROB: 0.0
 9 | MODEL:
10 |   TYPE: intern_image_meta_former
11 |   DROP_PATH_RATE: 0.6
12 |   LABEL_SMOOTHING: 0.3
13 |   INTERN_IMAGE:
14 |     CORE_OP: 'DCNv3'
15 |     DEPTHS: [6, 6, 32, 6]
16 |     GROUPS: [10, 20, 40, 80]
17 |     CHANNELS: 320
18 |     DW_KERNEL_SIZE: 5
19 |     LAYER_SCALE: None
20 |     OFFSET_SCALE: 1.0
21 |     MLP_RATIO: 4.0
22 |     POST_NORM: False
23 |     RES_POST_NORM: True
24 |     LEVEL2_POST_NORM: True
25 |     LEVEL2_POST_NORM_BLOCK_IDS: [5, 11, 17, 23, 29]
26 |     CENTER_FEATURE_SCALE: True
27 |     USE_CLIP_PROJECTOR: True
28 |   PRETRAINED: 'pretrained/internimage_h_jointto22k_384.pth'
29 | TRAIN:
30 |   EMA:
31 |     ENABLE: false
32 |     DECAY: 0.9999
33 |   EPOCHS: 100
34 |   WARMUP_EPOCHS: 0
35 |   WEIGHT_DECAY: 0.05
36 |   BASE_LR: 2e-05 # 512
37 |   WARMUP_LR: .0
38 |   MIN_LR: .0
39 |   USE_CHECKPOINT: true
40 |   RAND_INIT_FT_HEAD: true
41 | AMP_OPT_LEVEL: O0
42 | EVAL_FREQ: 1
43 | 


--------------------------------------------------------------------------------
/classification/configs/internimage_b_1k_224.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   IMG_ON_MEMORY: True
 3 | MODEL:
 4 |   TYPE: intern_image
 5 |   DROP_PATH_RATE: 0.5
 6 |   INTERN_IMAGE:
 7 |     CORE_OP: 'DCNv3'
 8 |     DEPTHS: [4, 4, 21, 4]
 9 |     GROUPS: [7, 14, 28, 56]
10 |     CHANNELS: 112
11 |     LAYER_SCALE: 1e-5
12 |     OFFSET_SCALE: 1.0
13 |     MLP_RATIO: 4.0
14 |     POST_NORM: True
15 | TRAIN:
16 |   EMA:
17 |     ENABLE: True
18 |     DECAY: 0.9999
19 |   BASE_LR: 5e-4
20 | 


--------------------------------------------------------------------------------
/classification/configs/internimage_g_22kto1k_512.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   IMG_SIZE: 512
 3 |   IMG_ON_MEMORY: True
 4 | AUG:
 5 |   MIXUP: 0.0
 6 |   CUTMIX: 0.0
 7 |   REPROB: 0.0
 8 | MODEL:
 9 |   TYPE: intern_image
10 |   DROP_PATH_RATE: 0.4
11 |   LABEL_SMOOTHING: 0.3
12 |   INTERN_IMAGE:
13 |     CORE_OP: 'DCNv3'
14 |     DEPTHS: [2, 2, 48, 4]
15 |     GROUPS: [16, 32, 64, 128]
16 |     CHANNELS: 512
17 |     DW_KERNEL_SIZE: 5
18 |     LAYER_SCALE: None
19 |     OFFSET_SCALE: 1.0
20 |     MLP_RATIO: 4.0
21 |     POST_NORM: True
22 |     LEVEL2_POST_NORM: True
23 |     LEVEL2_POST_NORM_BLOCK_IDS: [5, 11, 17, 23, 29, 35, 41, 47]
24 |     CENTER_FEATURE_SCALE: True
25 |     USE_CLIP_PROJECTOR: True
26 |   PRETRAINED: 'pretrained/internimage_g_pretrainto22k_384.pth'
27 | TRAIN:
28 |   EMA:
29 |     ENABLE: true
30 |     DECAY: 0.9999
31 |   EPOCHS: 20
32 |   WARMUP_EPOCHS: 2
33 |   WEIGHT_DECAY: 0.05
34 |   BASE_LR: 2e-05 # 512
35 |   WARMUP_LR: .0
36 |   MIN_LR: .0
37 |   LR_LAYER_DECAY: true
38 |   LR_LAYER_DECAY_RATIO: 0.9
39 |   USE_CHECKPOINT: true
40 |   OPTIMIZER:
41 |     DCN_LR_MUL: 0.1
42 | AMP_OPT_LEVEL: O0
43 | EVAL_FREQ: 1
44 | 


--------------------------------------------------------------------------------
/classification/configs/internimage_h_22kto1k_384.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   IMG_SIZE: 384
 3 |   IMG_ON_MEMORY: True
 4 | AUG:
 5 |   MIXUP: 0.0
 6 |   CUTMIX: 0.0
 7 |   REPROB: 0.0
 8 | MODEL:
 9 |   TYPE: intern_image
10 |   DROP_PATH_RATE: 0.2
11 |   LABEL_SMOOTHING: 0.3
12 |   INTERN_IMAGE:
13 |     CORE_OP: 'DCNv3'
14 |     DEPTHS: [6, 6, 32, 6]
15 |     GROUPS: [10, 20, 40, 80]
16 |     CHANNELS: 320
17 |     DW_KERNEL_SIZE: 5
18 |     LAYER_SCALE: None
19 |     OFFSET_SCALE: 1.0
20 |     MLP_RATIO: 4.0
21 |     POST_NORM: False
22 |     RES_POST_NORM: True
23 |     LEVEL2_POST_NORM: True
24 |     LEVEL2_POST_NORM_BLOCK_IDS: [5, 11, 17, 23, 29]
25 |     CENTER_FEATURE_SCALE: True
26 |     USE_CLIP_PROJECTOR: True
27 |   PRETRAINED: 'pretrained/internimage_h_jointto22k_384.pth'
28 | TRAIN:
29 |   EMA:
30 |     ENABLE: true
31 |     DECAY: 0.9999
32 |   EPOCHS: 20
33 |   WARMUP_EPOCHS: 2
34 |   WEIGHT_DECAY: 0.05
35 |   BASE_LR: 2e-05 # 512
36 |   WARMUP_LR: .0
37 |   MIN_LR: .0
38 |   LR_LAYER_DECAY: true
39 |   LR_LAYER_DECAY_RATIO: 0.9
40 |   USE_CHECKPOINT: true
41 |   OPTIMIZER:
42 |     DCN_LR_MUL: 0.1
43 | AMP_OPT_LEVEL: O0
44 | EVAL_FREQ: 1
45 | 


--------------------------------------------------------------------------------
/classification/configs/internimage_h_22kto1k_640.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   IMG_SIZE: 640
 3 |   IMG_ON_MEMORY: True
 4 | AUG:
 5 |   MIXUP: 0.0
 6 |   CUTMIX: 0.0
 7 |   REPROB: 0.0
 8 | MODEL:
 9 |   TYPE: intern_image
10 |   DROP_PATH_RATE: 0.2
11 |   LABEL_SMOOTHING: 0.3
12 |   INTERN_IMAGE:
13 |     CORE_OP: 'DCNv3'
14 |     DEPTHS: [6, 6, 32, 6]
15 |     GROUPS: [10, 20, 40, 80]
16 |     CHANNELS: 320
17 |     DW_KERNEL_SIZE: 5
18 |     LAYER_SCALE: None
19 |     OFFSET_SCALE: 1.0
20 |     MLP_RATIO: 4.0
21 |     POST_NORM: False
22 |     RES_POST_NORM: True
23 |     LEVEL2_POST_NORM: True
24 |     LEVEL2_POST_NORM_BLOCK_IDS: [5, 11, 17, 23, 29]
25 |     CENTER_FEATURE_SCALE: True
26 |     USE_CLIP_PROJECTOR: True
27 |   PRETRAINED: 'pretrained/internimage_h_jointto22k_384.pth'
28 | TRAIN:
29 |   EMA:
30 |     ENABLE: true
31 |     DECAY: 0.9999
32 |   EPOCHS: 20
33 |   WARMUP_EPOCHS: 2
34 |   WEIGHT_DECAY: 0.05
35 |   BASE_LR: 2e-05 # 512
36 |   WARMUP_LR: .0
37 |   MIN_LR: .0
38 |   LR_LAYER_DECAY: true
39 |   LR_LAYER_DECAY_RATIO: 0.9
40 |   USE_CHECKPOINT: true
41 |   OPTIMIZER:
42 |     USE_ZERO: True
43 |     DCN_LR_MUL: 0.1
44 | AMP_OPT_LEVEL: O0
45 | EVAL_FREQ: 1
46 | 


--------------------------------------------------------------------------------
/classification/configs/internimage_l_22kto1k_384.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   IMG_SIZE: 384
 3 |   IMG_ON_MEMORY: True
 4 | AUG:
 5 |   MIXUP: 0.0
 6 |   CUTMIX: 0.0
 7 |   REPROB: 0.0
 8 | MODEL:
 9 |   TYPE: intern_image
10 |   DROP_PATH_RATE: 0.1
11 |   LABEL_SMOOTHING: 0.3
12 |   INTERN_IMAGE:
13 |     CORE_OP: 'DCNv3'
14 |     DEPTHS: [5, 5, 22, 5]
15 |     GROUPS: [10, 20, 40, 80]
16 |     CHANNELS: 160
17 |     LAYER_SCALE: 1e-5
18 |     OFFSET_SCALE: 2.0
19 |     MLP_RATIO: 4.0
20 |     POST_NORM: True
21 |   PRETRAINED: 'pretrained/internimage_l_22k_192to384.pth'
22 | TRAIN:
23 |   EMA:
24 |     ENABLE: true
25 |     DECAY: 0.9999
26 |   EPOCHS: 20
27 |   WARMUP_EPOCHS: 2
28 |   WEIGHT_DECAY: 0.05
29 |   BASE_LR: 2e-05 # 512
30 |   WARMUP_LR: .0
31 |   MIN_LR: .0
32 |   LR_LAYER_DECAY: true
33 |   LR_LAYER_DECAY_RATIO: 0.9
34 |   USE_CHECKPOINT: true
35 |   OPTIMIZER:
36 |     DCN_LR_MUL: 0.1
37 | AMP_OPT_LEVEL: O0
38 | EVAL_FREQ: 1
39 | 


--------------------------------------------------------------------------------
/classification/configs/internimage_s_1k_224.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   IMG_ON_MEMORY: True
 3 | MODEL:
 4 |   TYPE: intern_image
 5 |   DROP_PATH_RATE: 0.4
 6 |   INTERN_IMAGE:
 7 |     CORE_OP: 'DCNv3'
 8 |     DEPTHS: [4, 4, 21, 4]
 9 |     GROUPS: [5, 10, 20, 40]
10 |     CHANNELS: 80
11 |     LAYER_SCALE: 1e-5
12 |     OFFSET_SCALE: 1.0
13 |     MLP_RATIO: 4.0
14 |     POST_NORM: True
15 | TRAIN:
16 |   EMA:
17 |     ENABLE: True
18 |     DECAY: 0.9999
19 |   BASE_LR: 5e-4
20 | 


--------------------------------------------------------------------------------
/classification/configs/internimage_t_1k_224.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   IMG_ON_MEMORY: True
 3 | MODEL:
 4 |   TYPE: intern_image
 5 |   DROP_PATH_RATE: 0.1
 6 |   INTERN_IMAGE:
 7 |     CORE_OP: 'DCNv3'
 8 |     DEPTHS: [4, 4, 18, 4]
 9 |     GROUPS: [4, 8, 16, 32]
10 |     CHANNELS: 64
11 |     OFFSET_SCALE: 1.0
12 |     MLP_RATIO: 4.0
13 | TRAIN:
14 |   EMA:
15 |     ENABLE: True
16 |     DECAY: 0.9999
17 |   BASE_LR: 5e-4
18 | 


--------------------------------------------------------------------------------
/classification/configs/internimage_xl_22kto1k_384.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   IMG_SIZE: 384
 3 |   IMG_ON_MEMORY: True
 4 | AUG:
 5 |   MIXUP: 0.0
 6 |   CUTMIX: 0.0
 7 |   REPROB: 0.0
 8 | MODEL:
 9 |   TYPE: intern_image
10 |   DROP_PATH_RATE: 0.2
11 |   LABEL_SMOOTHING: 0.3
12 |   INTERN_IMAGE:
13 |     CORE_OP: 'DCNv3'
14 |     DEPTHS: [5, 5, 24, 5]
15 |     GROUPS: [12, 24, 48, 96]
16 |     CHANNELS: 192
17 |     LAYER_SCALE: 1e-5
18 |     OFFSET_SCALE: 2.0
19 |     MLP_RATIO: 4.0
20 |     POST_NORM: True
21 |   PRETRAINED: 'pretrained/internimage_xl_22k_192to384.pth'
22 | TRAIN:
23 |   EMA:
24 |     ENABLE: true
25 |     DECAY: 0.9999
26 |   EPOCHS: 20
27 |   WARMUP_EPOCHS: 2
28 |   WEIGHT_DECAY: 0.05
29 |   BASE_LR: 2e-05 # 512
30 |   WARMUP_LR: .0
31 |   MIN_LR: .0
32 |   LR_LAYER_DECAY: true
33 |   LR_LAYER_DECAY_RATIO: 0.9
34 |   USE_CHECKPOINT: true
35 |   OPTIMIZER:
36 |     DCN_LR_MUL: 0.1
37 | AMP_OPT_LEVEL: O0
38 | EVAL_FREQ: 1
39 | 


--------------------------------------------------------------------------------
/classification/configs/without_lr_decay/internimage_b_1k_224.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   IMG_ON_MEMORY: True
 3 | MODEL:
 4 |   TYPE: intern_image
 5 |   DROP_PATH_RATE: 0.5
 6 |   INTERN_IMAGE:
 7 |     CORE_OP: 'DCNv3'
 8 |     DEPTHS: [4, 4, 21, 4]
 9 |     GROUPS: [7, 14, 28, 56]
10 |     CHANNELS: 112
11 |     LAYER_SCALE: 1e-5
12 |     OFFSET_SCALE: 1.0
13 |     MLP_RATIO: 4.0
14 |     POST_NORM: True
15 | TRAIN:
16 |   EMA:
17 |     ENABLE: True
18 |     DECAY: 0.9999
19 |   BASE_LR: 5e-4
20 | 


--------------------------------------------------------------------------------
/classification/configs/without_lr_decay/internimage_g_22kto1k_512.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   IMG_SIZE: 512
 3 |   IMG_ON_MEMORY: True
 4 | AUG:
 5 |   MIXUP: 0.0
 6 |   CUTMIX: 0.0
 7 |   REPROB: 0.0
 8 | MODEL:
 9 |   TYPE: intern_image
10 |   DROP_PATH_RATE: 0.4
11 |   LABEL_SMOOTHING: 0.3
12 |   INTERN_IMAGE:
13 |     CORE_OP: 'DCNv3'
14 |     DEPTHS: [2, 2, 48, 4]
15 |     GROUPS: [16, 32, 64, 128]
16 |     CHANNELS: 512
17 |     DW_KERNEL_SIZE: 5
18 |     LAYER_SCALE: None
19 |     OFFSET_SCALE: 1.0
20 |     MLP_RATIO: 4.0
21 |     POST_NORM: True
22 |     LEVEL2_POST_NORM: True
23 |     LEVEL2_POST_NORM_BLOCK_IDS: [5, 11, 17, 23, 29, 35, 41, 47]
24 |     CENTER_FEATURE_SCALE: True
25 |     USE_CLIP_PROJECTOR: True
26 |   PRETRAINED: 'pretrained/internimage_g_pretrainto22k_384.pth'
27 | TRAIN:
28 |   EMA:
29 |     ENABLE: true
30 |     DECAY: 0.9999
31 |   EPOCHS: 20
32 |   WARMUP_EPOCHS: 2
33 |   WEIGHT_DECAY: 0.05
34 |   BASE_LR: 2e-05 # 512
35 |   WARMUP_LR: .0
36 |   MIN_LR: .0
37 |   USE_CHECKPOINT: true
38 |   OPTIMIZER:
39 |     DCN_LR_MUL: 0.1
40 | AMP_OPT_LEVEL: O0
41 | EVAL_FREQ: 1
42 | 


--------------------------------------------------------------------------------
/classification/configs/without_lr_decay/internimage_h_22kto1k_640.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   IMG_SIZE: 640
 3 |   IMG_ON_MEMORY: True
 4 | AUG:
 5 |   MIXUP: 0.0
 6 |   CUTMIX: 0.0
 7 |   REPROB: 0.0
 8 | MODEL:
 9 |   TYPE: intern_image
10 |   DROP_PATH_RATE: 0.2
11 |   LABEL_SMOOTHING: 0.3
12 |   INTERN_IMAGE:
13 |     CORE_OP: 'DCNv3'
14 |     DEPTHS: [6, 6, 32, 6]
15 |     GROUPS: [10, 20, 40, 80]
16 |     CHANNELS: 320
17 |     DW_KERNEL_SIZE: 5
18 |     LAYER_SCALE: None
19 |     OFFSET_SCALE: 1.0
20 |     MLP_RATIO: 4.0
21 |     POST_NORM: False
22 |     RES_POST_NORM: True
23 |     LEVEL2_POST_NORM: True
24 |     LEVEL2_POST_NORM_BLOCK_IDS: [5, 11, 17, 23, 29]
25 |     CENTER_FEATURE_SCALE: True
26 |     USE_CLIP_PROJECTOR: True
27 |   PRETRAINED: 'pretrained/internimage_h_jointto22k_384.pth'
28 | TRAIN:
29 |   EMA:
30 |     ENABLE: true
31 |     DECAY: 0.9999
32 |   EPOCHS: 20
33 |   WARMUP_EPOCHS: 2
34 |   WEIGHT_DECAY: 0.05
35 |   BASE_LR: 2e-05 # 512
36 |   WARMUP_LR: .0
37 |   MIN_LR: .0
38 |   USE_CHECKPOINT: true
39 |   OPTIMIZER:
40 |     USE_ZERO: True
41 |     DCN_LR_MUL: 0.1
42 | AMP_OPT_LEVEL: O0
43 | EVAL_FREQ: 1
44 | 


--------------------------------------------------------------------------------
/classification/configs/without_lr_decay/internimage_l_22kto1k_384.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   IMG_SIZE: 384
 3 |   IMG_ON_MEMORY: True
 4 | AUG:
 5 |   MIXUP: 0.0
 6 |   CUTMIX: 0.0
 7 |   REPROB: 0.0
 8 | MODEL:
 9 |   TYPE: intern_image
10 |   DROP_PATH_RATE: 0.1
11 |   LABEL_SMOOTHING: 0.3
12 |   INTERN_IMAGE:
13 |     CORE_OP: 'DCNv3'
14 |     DEPTHS: [5, 5, 22, 5]
15 |     GROUPS: [10, 20, 40, 80]
16 |     CHANNELS: 160
17 |     LAYER_SCALE: 1e-5
18 |     OFFSET_SCALE: 2.0
19 |     MLP_RATIO: 4.0
20 |     POST_NORM: True
21 |   PRETRAINED: 'pretrained/internimage_l_22k_192to384.pth'
22 | TRAIN:
23 |   EMA:
24 |     ENABLE: true
25 |     DECAY: 0.9999
26 |   EPOCHS: 20
27 |   WARMUP_EPOCHS: 2
28 |   WEIGHT_DECAY: 0.05
29 |   BASE_LR: 2e-05 # 512
30 |   WARMUP_LR: .0
31 |   MIN_LR: .0
32 |   USE_CHECKPOINT: true
33 |   OPTIMIZER:
34 |     DCN_LR_MUL: 0.1
35 | AMP_OPT_LEVEL: O0
36 | EVAL_FREQ: 1
37 | 


--------------------------------------------------------------------------------
/classification/configs/without_lr_decay/internimage_s_1k_224.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   IMG_ON_MEMORY: True
 3 | MODEL:
 4 |   TYPE: intern_image
 5 |   DROP_PATH_RATE: 0.4
 6 |   INTERN_IMAGE:
 7 |     CORE_OP: 'DCNv3'
 8 |     DEPTHS: [4, 4, 21, 4]
 9 |     GROUPS: [5, 10, 20, 40]
10 |     CHANNELS: 80
11 |     LAYER_SCALE: 1e-5
12 |     OFFSET_SCALE: 1.0
13 |     MLP_RATIO: 4.0
14 |     POST_NORM: True
15 | TRAIN:
16 |   EMA:
17 |     ENABLE: True
18 |     DECAY: 0.9999
19 |   BASE_LR: 5e-4
20 | 


--------------------------------------------------------------------------------
/classification/configs/without_lr_decay/internimage_t_1k_224.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   IMG_ON_MEMORY: True
 3 | MODEL:
 4 |   TYPE: intern_image
 5 |   DROP_PATH_RATE: 0.1
 6 |   INTERN_IMAGE:
 7 |     CORE_OP: 'DCNv3'
 8 |     DEPTHS: [4, 4, 18, 4]
 9 |     GROUPS: [4, 8, 16, 32]
10 |     CHANNELS: 64
11 |     OFFSET_SCALE: 1.0
12 |     MLP_RATIO: 4.0
13 | TRAIN:
14 |   EMA:
15 |     ENABLE: True
16 |     DECAY: 0.9999
17 |   BASE_LR: 5e-4
18 | 


--------------------------------------------------------------------------------
/classification/configs/without_lr_decay/internimage_xl_22kto1k_384.yaml:
--------------------------------------------------------------------------------
 1 | DATA:
 2 |   IMG_SIZE: 384
 3 |   IMG_ON_MEMORY: True
 4 | AUG:
 5 |   MIXUP: 0.0
 6 |   CUTMIX: 0.0
 7 |   REPROB: 0.0
 8 | MODEL:
 9 |   TYPE: intern_image
10 |   DROP_PATH_RATE: 0.2
11 |   LABEL_SMOOTHING: 0.3
12 |   INTERN_IMAGE:
13 |     CORE_OP: 'DCNv3'
14 |     DEPTHS: [5, 5, 24, 5]
15 |     GROUPS: [12, 24, 48, 96]
16 |     CHANNELS: 192
17 |     LAYER_SCALE: 1e-5
18 |     OFFSET_SCALE: 2.0
19 |     MLP_RATIO: 4.0
20 |     POST_NORM: True
21 |   PRETRAINED: 'pretrained/internimage_xl_22k_192to384.pth'
22 | TRAIN:
23 |   EMA:
24 |     ENABLE: true
25 |     DECAY: 0.9999
26 |   EPOCHS: 20
27 |   WARMUP_EPOCHS: 2
28 |   WEIGHT_DECAY: 0.05
29 |   BASE_LR: 2e-05 # 512
30 |   WARMUP_LR: .0
31 |   MIN_LR: .0
32 |   USE_CHECKPOINT: true
33 |   OPTIMIZER:
34 |     DCN_LR_MUL: 0.1
35 | AMP_OPT_LEVEL: O0
36 | EVAL_FREQ: 1
37 | 


--------------------------------------------------------------------------------
/classification/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # InternImage
3 | # Copyright (c) 2022 OpenGVLab
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # --------------------------------------------------------
6 | 
7 | from .build import build_loader, build_loader2
8 | 


--------------------------------------------------------------------------------
/classification/huggingface/22k_model/internimage_g_jointto22k_384/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "OpenGVLab/internimage_g_jointto22k_384",
 3 |   "act_layer": "GELU",
 4 |   "architectures": [
 5 |     "InternImageModel"
 6 |   ],
 7 |   "auto_map": {
 8 |     "AutoConfig": "configuration_internimage.InternImageConfig",
 9 |     "AutoModel": "modeling_internimage.InternImageModel",
10 |     "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification"
11 |   },
12 |   "center_feature_scale": true,
13 |   "channels": 512,
14 |   "cls_scale": 1.5,
15 |   "core_op": "DCNv3",
16 |   "depths": [
17 |     2,
18 |     2,
19 |     48,
20 |     4
21 |   ],
22 |   "drop_path_rate": 0.0,
23 |   "drop_path_type": "linear",
24 |   "drop_rate": 0.0,
25 |   "dw_kernel_size": 5,
26 |   "groups": [
27 |     16,
28 |     32,
29 |     64,
30 |     128
31 |   ],
32 |   "layer_scale": null,
33 |   "level2_post_norm": true,
34 |   "level2_post_norm_block_ids": [
35 |     5,
36 |     11,
37 |     17,
38 |     23,
39 |     29,
40 |     35,
41 |     41,
42 |     47
43 |   ],
44 |   "mlp_ratio": 4.0,
45 |   "model_type": "internimage",
46 |   "norm_layer": "LN",
47 |   "num_classes": 21841,
48 |   "offset_scale": 1.0,
49 |   "post_norm": true,
50 |   "remove_center": false,
51 |   "res_post_norm": false,
52 |   "torch_dtype": "float32",
53 |   "transformers_version": "4.37.2",
54 |   "use_clip_projector": true,
55 |   "with_cp": false
56 | }
57 | 


--------------------------------------------------------------------------------
/classification/huggingface/22k_model/internimage_g_jointto22k_384/preprocessor_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "crop_size": 384,
 3 |   "do_center_crop": true,
 4 |   "do_normalize": true,
 5 |   "do_resize": true,
 6 |   "feature_extractor_type": "CLIPFeatureExtractor",
 7 |   "image_mean": [
 8 |     0.485,
 9 |     0.456,
10 |     0.406
11 |   ],
12 |   "image_std": [
13 |     0.229,
14 |     0.224,
15 |     0.225
16 |   ],
17 |   "resample": 3,
18 |   "size": 384
19 | }
20 | 


--------------------------------------------------------------------------------
/classification/huggingface/22k_model/internimage_h_jointto22k_384/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "OpenGVLab/internimage_h_jointto22k_384",
 3 |   "act_layer": "GELU",
 4 |   "architectures": [
 5 |     "InternImageModel"
 6 |   ],
 7 |   "auto_map": {
 8 |     "AutoConfig": "configuration_internimage.InternImageConfig",
 9 |     "AutoModel": "modeling_internimage.InternImageModel",
10 |     "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification"
11 |   },
12 |   "center_feature_scale": true,
13 |   "channels": 320,
14 |   "cls_scale": 1.5,
15 |   "core_op": "DCNv3",
16 |   "depths": [
17 |     6,
18 |     6,
19 |     32,
20 |     6
21 |   ],
22 |   "drop_path_rate": 0.0,
23 |   "drop_path_type": "linear",
24 |   "drop_rate": 0.0,
25 |   "dw_kernel_size": 5,
26 |   "groups": [
27 |     10,
28 |     20,
29 |     40,
30 |     80
31 |   ],
32 |   "layer_scale": null,
33 |   "level2_post_norm": true,
34 |   "level2_post_norm_block_ids": [
35 |     5,
36 |     11,
37 |     17,
38 |     23,
39 |     29
40 |   ],
41 |   "mlp_ratio": 4.0,
42 |   "model_type": "internimage",
43 |   "norm_layer": "LN",
44 |   "num_classes": 21841,
45 |   "offset_scale": 1.0,
46 |   "post_norm": false,
47 |   "remove_center": false,
48 |   "res_post_norm": true,
49 |   "torch_dtype": "float32",
50 |   "transformers_version": "4.37.2",
51 |   "use_clip_projector": true,
52 |   "with_cp": false
53 | }
54 | 


--------------------------------------------------------------------------------
/classification/huggingface/22k_model/internimage_h_jointto22k_384/preprocessor_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "crop_size": 384,
 3 |   "do_center_crop": true,
 4 |   "do_normalize": true,
 5 |   "do_resize": true,
 6 |   "feature_extractor_type": "CLIPFeatureExtractor",
 7 |   "image_mean": [
 8 |     0.485,
 9 |     0.456,
10 |     0.406
11 |   ],
12 |   "image_std": [
13 |     0.229,
14 |     0.224,
15 |     0.225
16 |   ],
17 |   "resample": 3,
18 |   "size": 384
19 | }
20 | 


--------------------------------------------------------------------------------
/classification/huggingface/22k_model/internimage_l_22k_384/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "OpenGVLab/internimage_l_22k_384",
 3 |   "act_layer": "GELU",
 4 |   "architectures": [
 5 |     "InternImageModel"
 6 |   ],
 7 |   "auto_map": {
 8 |     "AutoConfig": "configuration_internimage.InternImageConfig",
 9 |     "AutoModel": "modeling_internimage.InternImageModel",
10 |     "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification"
11 |   },
12 |   "center_feature_scale": false,
13 |   "channels": 160,
14 |   "cls_scale": 1.5,
15 |   "core_op": "DCNv3",
16 |   "depths": [
17 |     5,
18 |     5,
19 |     22,
20 |     5
21 |   ],
22 |   "drop_path_rate": 0.0,
23 |   "drop_path_type": "linear",
24 |   "drop_rate": 0.0,
25 |   "dw_kernel_size": null,
26 |   "groups": [
27 |     10,
28 |     20,
29 |     40,
30 |     80
31 |   ],
32 |   "layer_scale": 1e-05,
33 |   "level2_post_norm": false,
34 |   "level2_post_norm_block_ids": null,
35 |   "mlp_ratio": 4.0,
36 |   "model_type": "internimage",
37 |   "norm_layer": "LN",
38 |   "num_classes": 21841,
39 |   "offset_scale": 2.0,
40 |   "post_norm": true,
41 |   "remove_center": false,
42 |   "res_post_norm": false,
43 |   "torch_dtype": "float32",
44 |   "transformers_version": "4.37.2",
45 |   "use_clip_projector": false,
46 |   "with_cp": false
47 | }
48 | 


--------------------------------------------------------------------------------
/classification/huggingface/22k_model/internimage_l_22k_384/preprocessor_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "crop_size": 384,
 3 |   "do_center_crop": true,
 4 |   "do_normalize": true,
 5 |   "do_resize": true,
 6 |   "feature_extractor_type": "CLIPFeatureExtractor",
 7 |   "image_mean": [
 8 |     0.485,
 9 |     0.456,
10 |     0.406
11 |   ],
12 |   "image_std": [
13 |     0.229,
14 |     0.224,
15 |     0.225
16 |   ],
17 |   "resample": 3,
18 |   "size": 384
19 | }
20 | 


--------------------------------------------------------------------------------
/classification/huggingface/22k_model/internimage_xl_22k_384/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "OpenGVLab/internimage_xl_22k_384",
 3 |   "act_layer": "GELU",
 4 |   "architectures": [
 5 |     "InternImageModel"
 6 |   ],
 7 |   "auto_map": {
 8 |     "AutoConfig": "configuration_internimage.InternImageConfig",
 9 |     "AutoModel": "modeling_internimage.InternImageModel",
10 |     "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification"
11 |   },
12 |   "center_feature_scale": false,
13 |   "channels": 192,
14 |   "cls_scale": 1.5,
15 |   "core_op": "DCNv3",
16 |   "depths": [
17 |     5,
18 |     5,
19 |     24,
20 |     5
21 |   ],
22 |   "drop_path_rate": 0.0,
23 |   "drop_path_type": "linear",
24 |   "drop_rate": 0.0,
25 |   "dw_kernel_size": null,
26 |   "groups": [
27 |     12,
28 |     24,
29 |     48,
30 |     96
31 |   ],
32 |   "layer_scale": 1e-05,
33 |   "level2_post_norm": false,
34 |   "level2_post_norm_block_ids": null,
35 |   "mlp_ratio": 4.0,
36 |   "model_type": "internimage",
37 |   "norm_layer": "LN",
38 |   "num_classes": 21841,
39 |   "offset_scale": 2.0,
40 |   "post_norm": true,
41 |   "remove_center": false,
42 |   "res_post_norm": false,
43 |   "torch_dtype": "float32",
44 |   "transformers_version": "4.37.2",
45 |   "use_clip_projector": false,
46 |   "with_cp": false
47 | }
48 | 


--------------------------------------------------------------------------------
/classification/huggingface/22k_model/internimage_xl_22k_384/preprocessor_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "crop_size": 384,
 3 |   "do_center_crop": true,
 4 |   "do_normalize": true,
 5 |   "do_resize": true,
 6 |   "feature_extractor_type": "CLIPFeatureExtractor",
 7 |   "image_mean": [
 8 |     0.485,
 9 |     0.456,
10 |     0.406
11 |   ],
12 |   "image_std": [
13 |     0.229,
14 |     0.224,
15 |     0.225
16 |   ],
17 |   "resample": 3,
18 |   "size": 384
19 | }
20 | 


--------------------------------------------------------------------------------
/classification/huggingface/in1k_model/internimage_b_1k_224/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "OpenGVLab/internimage_b_1k_224",
 3 |   "act_layer": "GELU",
 4 |   "architectures": [
 5 |     "InternImageModel"
 6 |   ],
 7 |   "auto_map": {
 8 |     "AutoConfig": "configuration_internimage.InternImageConfig",
 9 |     "AutoModel": "modeling_internimage.InternImageModel",
10 |     "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification"
11 |   },
12 |   "center_feature_scale": false,
13 |   "channels": 112,
14 |   "cls_scale": 1.5,
15 |   "core_op": "DCNv3",
16 |   "depths": [
17 |     4,
18 |     4,
19 |     21,
20 |     4
21 |   ],
22 |   "drop_path_rate": 0.0,
23 |   "drop_path_type": "linear",
24 |   "drop_rate": 0.0,
25 |   "dw_kernel_size": null,
26 |   "groups": [
27 |     7,
28 |     14,
29 |     28,
30 |     56
31 |   ],
32 |   "layer_scale": 1e-05,
33 |   "level2_post_norm": false,
34 |   "level2_post_norm_block_ids": null,
35 |   "mlp_ratio": 4.0,
36 |   "model_type": "internimage",
37 |   "norm_layer": "LN",
38 |   "num_classes": 1000,
39 |   "offset_scale": 1.0,
40 |   "post_norm": true,
41 |   "remove_center": false,
42 |   "res_post_norm": false,
43 |   "torch_dtype": "float32",
44 |   "transformers_version": "4.37.2",
45 |   "use_clip_projector": false,
46 |   "with_cp": false
47 | }
48 | 


--------------------------------------------------------------------------------
/classification/huggingface/in1k_model/internimage_b_1k_224/preprocessor_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "crop_size": 224,
 3 |   "do_center_crop": true,
 4 |   "do_normalize": true,
 5 |   "do_resize": true,
 6 |   "feature_extractor_type": "CLIPFeatureExtractor",
 7 |   "image_mean": [
 8 |     0.485,
 9 |     0.456,
10 |     0.406
11 |   ],
12 |   "image_std": [
13 |     0.229,
14 |     0.224,
15 |     0.225
16 |   ],
17 |   "resample": 3,
18 |   "size": 224
19 | }
20 | 


--------------------------------------------------------------------------------
/classification/huggingface/in1k_model/internimage_g_22kto1k_512/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "OpenGVLab/internimage_g_22kto1k_512",
 3 |   "act_layer": "GELU",
 4 |   "architectures": [
 5 |     "InternImageModel"
 6 |   ],
 7 |   "auto_map": {
 8 |     "AutoConfig": "configuration_internimage.InternImageConfig",
 9 |     "AutoModel": "modeling_internimage.InternImageModel",
10 |     "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification"
11 |   },
12 |   "center_feature_scale": true,
13 |   "channels": 512,
14 |   "cls_scale": 1.5,
15 |   "core_op": "DCNv3",
16 |   "depths": [
17 |     2,
18 |     2,
19 |     48,
20 |     4
21 |   ],
22 |   "drop_path_rate": 0.0,
23 |   "drop_path_type": "linear",
24 |   "drop_rate": 0.0,
25 |   "dw_kernel_size": 5,
26 |   "groups": [
27 |     16,
28 |     32,
29 |     64,
30 |     128
31 |   ],
32 |   "layer_scale": null,
33 |   "level2_post_norm": true,
34 |   "level2_post_norm_block_ids": [
35 |     5,
36 |     11,
37 |     17,
38 |     23,
39 |     29,
40 |     35,
41 |     41,
42 |     47
43 |   ],
44 |   "mlp_ratio": 4.0,
45 |   "model_type": "internimage",
46 |   "norm_layer": "LN",
47 |   "num_classes": 1000,
48 |   "offset_scale": 1.0,
49 |   "post_norm": true,
50 |   "remove_center": false,
51 |   "res_post_norm": false,
52 |   "torch_dtype": "float32",
53 |   "transformers_version": "4.37.2",
54 |   "use_clip_projector": true,
55 |   "with_cp": false
56 | }
57 | 


--------------------------------------------------------------------------------
/classification/huggingface/in1k_model/internimage_g_22kto1k_512/preprocessor_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "crop_size": 512,
 3 |   "do_center_crop": true,
 4 |   "do_normalize": true,
 5 |   "do_resize": true,
 6 |   "feature_extractor_type": "CLIPFeatureExtractor",
 7 |   "image_mean": [
 8 |     0.485,
 9 |     0.456,
10 |     0.406
11 |   ],
12 |   "image_std": [
13 |     0.229,
14 |     0.224,
15 |     0.225
16 |   ],
17 |   "resample": 3,
18 |   "size": 512
19 | }
20 | 


--------------------------------------------------------------------------------
/classification/huggingface/in1k_model/internimage_h_22kto1k_640/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "OpenGVLab/internimage_h_22kto1k_640",
 3 |   "act_layer": "GELU",
 4 |   "architectures": [
 5 |     "InternImageModel"
 6 |   ],
 7 |   "auto_map": {
 8 |     "AutoConfig": "configuration_internimage.InternImageConfig",
 9 |     "AutoModel": "modeling_internimage.InternImageModel",
10 |     "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification"
11 |   },
12 |   "center_feature_scale": true,
13 |   "channels": 320,
14 |   "cls_scale": 1.5,
15 |   "core_op": "DCNv3",
16 |   "depths": [
17 |     6,
18 |     6,
19 |     32,
20 |     6
21 |   ],
22 |   "drop_path_rate": 0.0,
23 |   "drop_path_type": "linear",
24 |   "drop_rate": 0.0,
25 |   "dw_kernel_size": 5,
26 |   "groups": [
27 |     10,
28 |     20,
29 |     40,
30 |     80
31 |   ],
32 |   "layer_scale": null,
33 |   "level2_post_norm": true,
34 |   "level2_post_norm_block_ids": [
35 |     5,
36 |     11,
37 |     17,
38 |     23,
39 |     29
40 |   ],
41 |   "mlp_ratio": 4.0,
42 |   "model_type": "internimage",
43 |   "norm_layer": "LN",
44 |   "num_classes": 1000,
45 |   "offset_scale": 1.0,
46 |   "post_norm": false,
47 |   "remove_center": false,
48 |   "res_post_norm": true,
49 |   "torch_dtype": "float32",
50 |   "transformers_version": "4.37.2",
51 |   "use_clip_projector": true,
52 |   "with_cp": false
53 | }
54 | 


--------------------------------------------------------------------------------
/classification/huggingface/in1k_model/internimage_h_22kto1k_640/preprocessor_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "crop_size": 640,
 3 |   "do_center_crop": true,
 4 |   "do_normalize": true,
 5 |   "do_resize": true,
 6 |   "feature_extractor_type": "CLIPFeatureExtractor",
 7 |   "image_mean": [
 8 |     0.485,
 9 |     0.456,
10 |     0.406
11 |   ],
12 |   "image_std": [
13 |     0.229,
14 |     0.224,
15 |     0.225
16 |   ],
17 |   "resample": 3,
18 |   "size": 640
19 | }
20 | 


--------------------------------------------------------------------------------
/classification/huggingface/in1k_model/internimage_l_22kto1k_384/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "OpenGVLab/internimage_l_22kto1k_384",
 3 |   "act_layer": "GELU",
 4 |   "architectures": [
 5 |     "InternImageModel"
 6 |   ],
 7 |   "auto_map": {
 8 |     "AutoConfig": "configuration_internimage.InternImageConfig",
 9 |     "AutoModel": "modeling_internimage.InternImageModel",
10 |     "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification"
11 |   },
12 |   "center_feature_scale": false,
13 |   "channels": 160,
14 |   "cls_scale": 1.5,
15 |   "core_op": "DCNv3",
16 |   "depths": [
17 |     5,
18 |     5,
19 |     22,
20 |     5
21 |   ],
22 |   "drop_path_rate": 0.0,
23 |   "drop_path_type": "linear",
24 |   "drop_rate": 0.0,
25 |   "dw_kernel_size": null,
26 |   "groups": [
27 |     10,
28 |     20,
29 |     40,
30 |     80
31 |   ],
32 |   "layer_scale": 1e-05,
33 |   "level2_post_norm": false,
34 |   "level2_post_norm_block_ids": null,
35 |   "mlp_ratio": 4.0,
36 |   "model_type": "internimage",
37 |   "norm_layer": "LN",
38 |   "num_classes": 1000,
39 |   "offset_scale": 2.0,
40 |   "post_norm": true,
41 |   "remove_center": false,
42 |   "res_post_norm": false,
43 |   "torch_dtype": "float32",
44 |   "transformers_version": "4.37.2",
45 |   "use_clip_projector": false,
46 |   "with_cp": false
47 | }
48 | 


--------------------------------------------------------------------------------
/classification/huggingface/in1k_model/internimage_l_22kto1k_384/preprocessor_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "crop_size": 384,
 3 |   "do_center_crop": true,
 4 |   "do_normalize": true,
 5 |   "do_resize": true,
 6 |   "feature_extractor_type": "CLIPFeatureExtractor",
 7 |   "image_mean": [
 8 |     0.485,
 9 |     0.456,
10 |     0.406
11 |   ],
12 |   "image_std": [
13 |     0.229,
14 |     0.224,
15 |     0.225
16 |   ],
17 |   "resample": 3,
18 |   "size": 384
19 | }
20 | 


--------------------------------------------------------------------------------
/classification/huggingface/in1k_model/internimage_s_1k_224/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "OpenGVLab/internimage_s_1k_224",
 3 |   "act_layer": "GELU",
 4 |   "architectures": [
 5 |     "InternImageModel"
 6 |   ],
 7 |   "auto_map": {
 8 |     "AutoConfig": "configuration_internimage.InternImageConfig",
 9 |     "AutoModel": "modeling_internimage.InternImageModel",
10 |     "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification"
11 |   },
12 |   "center_feature_scale": false,
13 |   "channels": 80,
14 |   "cls_scale": 1.5,
15 |   "core_op": "DCNv3",
16 |   "depths": [
17 |     4,
18 |     4,
19 |     21,
20 |     4
21 |   ],
22 |   "drop_path_rate": 0.0,
23 |   "drop_path_type": "linear",
24 |   "drop_rate": 0.0,
25 |   "dw_kernel_size": null,
26 |   "groups": [
27 |     5,
28 |     10,
29 |     20,
30 |     40
31 |   ],
32 |   "layer_scale": 1e-05,
33 |   "level2_post_norm": false,
34 |   "level2_post_norm_block_ids": null,
35 |   "mlp_ratio": 4.0,
36 |   "model_type": "internimage",
37 |   "norm_layer": "LN",
38 |   "num_classes": 1000,
39 |   "offset_scale": 1.0,
40 |   "post_norm": true,
41 |   "remove_center": false,
42 |   "res_post_norm": false,
43 |   "torch_dtype": "float32",
44 |   "transformers_version": "4.37.2",
45 |   "use_clip_projector": false,
46 |   "with_cp": false
47 | }
48 | 


--------------------------------------------------------------------------------
/classification/huggingface/in1k_model/internimage_s_1k_224/preprocessor_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "crop_size": 224,
 3 |   "do_center_crop": true,
 4 |   "do_normalize": true,
 5 |   "do_resize": true,
 6 |   "feature_extractor_type": "CLIPFeatureExtractor",
 7 |   "image_mean": [
 8 |     0.485,
 9 |     0.456,
10 |     0.406
11 |   ],
12 |   "image_std": [
13 |     0.229,
14 |     0.224,
15 |     0.225
16 |   ],
17 |   "resample": 3,
18 |   "size": 224
19 | }
20 | 


--------------------------------------------------------------------------------
/classification/huggingface/in1k_model/internimage_t_1k_224/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "OpenGVLab/internimage_t_1k_224",
 3 |   "act_layer": "GELU",
 4 |   "architectures": [
 5 |     "InternImageModel"
 6 |   ],
 7 |   "auto_map": {
 8 |     "AutoConfig": "configuration_internimage.InternImageConfig",
 9 |     "AutoModel": "modeling_internimage.InternImageModel",
10 |     "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification"
11 |   },
12 |   "center_feature_scale": false,
13 |   "channels": 64,
14 |   "cls_scale": 1.5,
15 |   "core_op": "DCNv3",
16 |   "depths": [
17 |     4,
18 |     4,
19 |     18,
20 |     4
21 |   ],
22 |   "drop_path_rate": 0.0,
23 |   "drop_path_type": "linear",
24 |   "drop_rate": 0.0,
25 |   "dw_kernel_size": null,
26 |   "groups": [
27 |     4,
28 |     8,
29 |     16,
30 |     32
31 |   ],
32 |   "layer_scale": null,
33 |   "level2_post_norm": false,
34 |   "level2_post_norm_block_ids": null,
35 |   "mlp_ratio": 4.0,
36 |   "model_type": "internimage",
37 |   "norm_layer": "LN",
38 |   "num_classes": 1000,
39 |   "offset_scale": 1.0,
40 |   "post_norm": false,
41 |   "remove_center": false,
42 |   "res_post_norm": false,
43 |   "torch_dtype": "float32",
44 |   "transformers_version": "4.37.2",
45 |   "use_clip_projector": false,
46 |   "with_cp": false
47 | }
48 | 


--------------------------------------------------------------------------------
/classification/huggingface/in1k_model/internimage_t_1k_224/preprocessor_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "crop_size": 224,
 3 |   "do_center_crop": true,
 4 |   "do_normalize": true,
 5 |   "do_resize": true,
 6 |   "feature_extractor_type": "CLIPFeatureExtractor",
 7 |   "image_mean": [
 8 |     0.485,
 9 |     0.456,
10 |     0.406
11 |   ],
12 |   "image_std": [
13 |     0.229,
14 |     0.224,
15 |     0.225
16 |   ],
17 |   "resample": 3,
18 |   "size": 224
19 | }
20 | 


--------------------------------------------------------------------------------
/classification/huggingface/in1k_model/internimage_xl_22kto1k_384/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "OpenGVLab/internimage_xl_22kto1k_384",
 3 |   "act_layer": "GELU",
 4 |   "architectures": [
 5 |     "InternImageModel"
 6 |   ],
 7 |   "auto_map": {
 8 |     "AutoConfig": "configuration_internimage.InternImageConfig",
 9 |     "AutoModel": "modeling_internimage.InternImageModel",
10 |     "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification"
11 |   },
12 |   "center_feature_scale": false,
13 |   "channels": 192,
14 |   "cls_scale": 1.5,
15 |   "core_op": "DCNv3",
16 |   "depths": [
17 |     5,
18 |     5,
19 |     24,
20 |     5
21 |   ],
22 |   "drop_path_rate": 0.0,
23 |   "drop_path_type": "linear",
24 |   "drop_rate": 0.0,
25 |   "dw_kernel_size": null,
26 |   "groups": [
27 |     12,
28 |     24,
29 |     48,
30 |     96
31 |   ],
32 |   "layer_scale": 1e-05,
33 |   "level2_post_norm": false,
34 |   "level2_post_norm_block_ids": null,
35 |   "mlp_ratio": 4.0,
36 |   "model_type": "internimage",
37 |   "norm_layer": "LN",
38 |   "num_classes": 1000,
39 |   "offset_scale": 2.0,
40 |   "post_norm": true,
41 |   "remove_center": false,
42 |   "res_post_norm": false,
43 |   "torch_dtype": "float32",
44 |   "transformers_version": "4.37.2",
45 |   "use_clip_projector": false,
46 |   "with_cp": false
47 | }
48 | 


--------------------------------------------------------------------------------
/classification/huggingface/in1k_model/internimage_xl_22kto1k_384/preprocessor_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "crop_size": 384,
 3 |   "do_center_crop": true,
 4 |   "do_normalize": true,
 5 |   "do_resize": true,
 6 |   "feature_extractor_type": "CLIPFeatureExtractor",
 7 |   "image_mean": [
 8 |     0.485,
 9 |     0.456,
10 |     0.406
11 |   ],
12 |   "image_std": [
13 |     0.229,
14 |     0.224,
15 |     0.225
16 |   ],
17 |   "resample": 3,
18 |   "size": 384
19 | }
20 | 


--------------------------------------------------------------------------------
/classification/logger.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # InternImage
 3 | # Copyright (c) 2022 OpenGVLab
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # --------------------------------------------------------
 6 | 
 7 | import functools
 8 | import logging
 9 | import os
10 | import sys
11 | 
12 | from termcolor import colored
13 | 
14 | 
15 | @functools.lru_cache()
16 | def create_logger(output_dir, dist_rank=0, name=''):
17 |     # create logger
18 |     logger = logging.getLogger(name)
19 |     logger.setLevel(logging.DEBUG)
20 |     logger.propagate = False
21 | 
22 |     # create formatter
23 |     fmt = '[%(asctime)s %(name)s] (%(filename)s %(lineno)d): %(levelname)s %(message)s'
24 |     color_fmt = colored('[%(asctime)s %(name)s]', 'green') + \
25 |         colored('(%(filename)s %(lineno)d)', 'yellow') + \
26 |         ': %(levelname)s %(message)s'
27 | 
28 |     # create console handlers for master process
29 |     if dist_rank == 0:
30 |         console_handler = logging.StreamHandler(sys.stdout)
31 |         console_handler.setLevel(logging.DEBUG)
32 |         console_handler.setFormatter(
33 |             logging.Formatter(fmt=color_fmt, datefmt='%Y-%m-%d %H:%M:%S'))
34 |         logger.addHandler(console_handler)
35 | 
36 |     # create file handlers
37 |     file_handler = logging.FileHandler(os.path.join(
38 |         output_dir, f'log_rank{dist_rank}.txt'),
39 |                                        mode='a')
40 |     file_handler.setLevel(logging.DEBUG)
41 |     file_handler.setFormatter(
42 |         logging.Formatter(fmt=fmt, datefmt='%Y-%m-%d %H:%M:%S'))
43 |     logger.addHandler(file_handler)
44 | 
45 |     return logger
46 | 


--------------------------------------------------------------------------------
/classification/meta_data/train.txt.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/classification/meta_data/train.txt.zip


--------------------------------------------------------------------------------
/classification/meta_data/val.txt.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/classification/meta_data/val.txt.zip


--------------------------------------------------------------------------------
/classification/models/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # InternImage
3 | # Copyright (c) 2022 OpenGVLab
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # --------------------------------------------------------
6 | 
7 | from .build import build_model
8 | 


--------------------------------------------------------------------------------
/classification/ops_dcnv3/functions/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # InternImage
3 | # Copyright (c) 2022 OpenGVLab
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # --------------------------------------------------------
6 | 
7 | from .dcnv3_func import DCNv3Function, dcnv3_core_pytorch
8 | 


--------------------------------------------------------------------------------
/classification/ops_dcnv3/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # --------------------------------------------------------
3 | # InternImage
4 | # Copyright (c) 2022 OpenGVLab
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 | 
8 | python setup.py build install
9 | 


--------------------------------------------------------------------------------
/classification/ops_dcnv3/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # InternImage
3 | # Copyright (c) 2022 OpenGVLab
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # --------------------------------------------------------
6 | 
7 | from .dcnv3 import DCNv3, DCNv3_pytorch
8 | 


--------------------------------------------------------------------------------
/classification/ops_dcnv3/src/cpu/dcnv3_cpu.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * InternImage
 4 | * Copyright (c) 2022 OpenGVLab
 5 | * Licensed under The MIT License [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from
 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 9 | **************************************************************************************************
10 | */
11 | 
12 | #pragma once
13 | #include <torch/extension.h>
14 | 
15 | at::Tensor dcnv3_cpu_forward(const at::Tensor &input, const at::Tensor &offset,
16 |                              const at::Tensor &mask, const int kernel_h,
17 |                              const int kernel_w, const int stride_h,
18 |                              const int stride_w, const int pad_h,
19 |                              const int pad_w, const int dilation_h,
20 |                              const int dilation_w, const int group,
21 |                              const int group_channels, const float offset_scale,
22 |                              const int im2col_step);
23 | 
24 | std::vector<at::Tensor>
25 | dcnv3_cpu_backward(const at::Tensor &input, const at::Tensor &offset,
26 |                    const at::Tensor &mask, const int kernel_h,
27 |                    const int kernel_w, const int stride_h, const int stride_w,
28 |                    const int pad_h, const int pad_w, const int dilation_h,
29 |                    const int dilation_w, const int group,
30 |                    const int group_channels, const float offset_scale,
31 |                    const at::Tensor &grad_output, const int im2col_step);
32 | 


--------------------------------------------------------------------------------
/classification/ops_dcnv3/src/vision.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * InternImage
 4 | * Copyright (c) 2022 OpenGVLab
 5 | * Licensed under The MIT License [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from
 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 9 | **************************************************************************************************
10 | */
11 | 
12 | #include "dcnv3.h"
13 | 
14 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
15 |     m.def("dcnv3_forward", &dcnv3_forward, "dcnv3_forward");
16 |     m.def("dcnv3_backward", &dcnv3_backward, "dcnv3_backward");
17 | }
18 | 


--------------------------------------------------------------------------------
/classification/train_in1k.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | GPUS=${GPUS:-8}
 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
10 | CPUS_PER_TASK=${CPUS_PER_TASK:-12}
11 | SRUN_ARGS=${SRUN_ARGS:-""}
12 | 
13 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
14 |     srun -p ${PARTITION} \
15 |     --job-name=${JOB_NAME} \
16 |     --gres=gpu:${GPUS_PER_NODE} \
17 |     --ntasks=${GPUS} \
18 |     --ntasks-per-node=${GPUS_PER_NODE} \
19 |     --cpus-per-task=${CPUS_PER_TASK} \
20 |     --kill-on-bad-exit=1 \
21 |     --quotatype=reserved \
22 |     ${SRUN_ARGS} \
23 |     python -u main.py \
24 |     --cfg ${CONFIG} \
25 |     --accumulation-steps 1 \
26 |     --local-rank 0 \
27 |     --data-path data/imagenet \
28 |     --output work_dirs ${@:4}
29 | 


--------------------------------------------------------------------------------
/classification/train_in1k_deepspeed.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | GPUS=${GPUS:-8}
 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
10 | CPUS_PER_TASK=${CPUS_PER_TASK:-12}
11 | SRUN_ARGS=${SRUN_ARGS:-""}
12 | 
13 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
14 |     srun -p ${PARTITION} \
15 |     --job-name=${JOB_NAME} \
16 |     --gres=gpu:${GPUS_PER_NODE} \
17 |     --ntasks=${GPUS} \
18 |     --ntasks-per-node=${GPUS_PER_NODE} \
19 |     --cpus-per-task=${CPUS_PER_TASK} \
20 |     --kill-on-bad-exit=1 \
21 |     --quotatype=spot \
22 |     ${SRUN_ARGS} \
23 |     python -u main_deepspeed.py \
24 |     --cfg ${CONFIG} \
25 |     --local-rank 0 \
26 |     --data-path data/imagenet \
27 |     --output work_dirs_deepspeed ${@:4}
28 | 


--------------------------------------------------------------------------------
/classification/train_inat18.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | GPUS=${GPUS:-8}
 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
10 | CPUS_PER_TASK=${CPUS_PER_TASK:-12}
11 | SRUN_ARGS=${SRUN_ARGS:-""}
12 | 
13 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
14 |     srun -p ${PARTITION} \
15 |     --job-name=${JOB_NAME} \
16 |     --gres=gpu:${GPUS_PER_NODE} \
17 |     --ntasks=${GPUS} \
18 |     --ntasks-per-node=${GPUS_PER_NODE} \
19 |     --cpus-per-task=${CPUS_PER_TASK} \
20 |     --kill-on-bad-exit=1 \
21 |     --quotatype=reserved \
22 |     ${SRUN_ARGS} \
23 |     python -u main.py \
24 |     --cfg ${CONFIG} \
25 |     --accumulation-steps 1 \
26 |     --local-rank 0 \
27 |     --data-path data/inat2018 \
28 |     --output work_dirs ${@:4}
29 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/lvis_v0.5_instance.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | _base_ = 'coco_instance.py'
 3 | dataset_type = 'LVISV05Dataset'
 4 | data_root = 'data/lvis_v0.5/'
 5 | data = dict(
 6 |     samples_per_gpu=2,
 7 |     workers_per_gpu=2,
 8 |     train=dict(
 9 |         _delete_=True,
10 |         type='ClassBalancedDataset',
11 |         oversample_thr=1e-3,
12 |         dataset=dict(
13 |             type=dataset_type,
14 |             ann_file=data_root + 'annotations/lvis_v0.5_train.json',
15 |             img_prefix=data_root + 'train2017/')),
16 |     val=dict(
17 |         type=dataset_type,
18 |         ann_file=data_root + 'annotations/lvis_v0.5_val.json',
19 |         img_prefix=data_root + 'val2017/'),
20 |     test=dict(
21 |         type=dataset_type,
22 |         ann_file=data_root + 'annotations/lvis_v0.5_val.json',
23 |         img_prefix=data_root + 'val2017/'))
24 | evaluation = dict(metric=['bbox', 'segm'])
25 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | checkpoint_config = dict(interval=1)
 2 | # yapf:disable
 3 | log_config = dict(
 4 |     interval=50,
 5 |     hooks=[
 6 |         dict(type='TextLoggerHook'),
 7 |         # dict(type='TensorboardLoggerHook')
 8 |     ])
 9 | # yapf:enable
10 | custom_hooks = [dict(type='NumClassCheckHook')]
11 | 
12 | dist_params = dict(backend='nccl')
13 | log_level = 'INFO'
14 | load_from = None
15 | resume_from = None
16 | workflow = [('train', 1)]
17 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_20e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 19])
11 | runner = dict(type='EpochBasedRunner', max_epochs=20)
12 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 22])
11 | runner = dict(type='EpochBasedRunner', max_epochs=24)
12 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_3x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[27, 33])
11 | runner = dict(type='EpochBasedRunner', max_epochs=36)
12 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_6x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=2000,
 9 |     warmup_ratio=0.001,
10 |     step=[62, 68])
11 | runner = dict(type='EpochBasedRunner', max_epochs=72)
12 | 


--------------------------------------------------------------------------------
/detection/configs/lvis/README.md:
--------------------------------------------------------------------------------
 1 | # LVIS
 2 | 
 3 | ## Introduction
 4 | 
 5 | LVIS is a dataset for long tail instance segmentation. It has annotations for over 1000 object categories in 164k images.
 6 | 
 7 | ## Model Zoo
 8 | 
 9 | ### DINO + CB-InternImage
10 | 
11 | Here, we report the box AP on the minival set and the val set, respectively.
12 | 
13 | |     backbone     |  pretrain  | minival (ss) | val (ss/ms) | #param |                                Config                                 |                                                     Download                                                      |
14 | | :--------------: | :--------: | :----------: | :---------: | :----: | :-------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------: |
15 | | CB-InternImage-H | Objects365 |     65.8     | 62.3 / 63.2 | 2.18B  | [config](./dino_4scale_cbinternimage_h_objects365_lvis_minival_ss.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_cbinternimage_h_objects365_lvis.pth) |
16 | 


--------------------------------------------------------------------------------
/detection/configs/openimages/README.md:
--------------------------------------------------------------------------------
 1 | # OpenImages
 2 | 
 3 | ## Introduction
 4 | 
 5 | OpenImages V6 is a large-scale dataset , consists of 9 million training images, 41,620 validation samples, and 125,456 test samples. It is a partially annotated dataset, with 9,600 trainable classes.
 6 | 
 7 | ## Model Zoo
 8 | 
 9 | ### DINO + CB-InternImage
10 | 
11 | |     backbone     |  pretrain  | mAP (ss) | #param |                               Config                                |                                                        Download                                                         |
12 | | :--------------: | :--------: | :------: | :----: | :-----------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------: |
13 | | CB-InternImage-H | Objects365 |   74.1   | 2.18B  | [config](./dino_4scale_cbinternimage_h_objects365_openimages_ss.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_cbinternimage_h_objects365_openimages.pth) |
14 | 


--------------------------------------------------------------------------------
/detection/deploy/configs/_base_/backends/tensorrt-fp16.py:
--------------------------------------------------------------------------------
1 | backend_config = dict(
2 |     type='tensorrt', common_config=dict(fp16_mode=True, max_workspace_size=0))
3 | 


--------------------------------------------------------------------------------
/detection/deploy/configs/_base_/backends/tensorrt.py:
--------------------------------------------------------------------------------
1 | backend_config = dict(
2 |     type='tensorrt', common_config=dict(fp16_mode=False, max_workspace_size=0))
3 | 


--------------------------------------------------------------------------------
/detection/deploy/configs/_base_/onnx_config.py:
--------------------------------------------------------------------------------
 1 | onnx_config = dict(
 2 |     type='onnx',
 3 |     export_params=True,
 4 |     keep_initializers_as_inputs=False,
 5 |     opset_version=11,
 6 |     save_file='end2end.onnx',
 7 |     input_names=['input'],
 8 |     output_names=['output'],
 9 |     input_shape=None,
10 |     optimize=True)
11 | 


--------------------------------------------------------------------------------
/detection/deploy/configs/mmdet/_base_/base_dynamic.py:
--------------------------------------------------------------------------------
 1 | _base_ = ['./base_static.py']
 2 | onnx_config = dict(
 3 |     dynamic_axes={
 4 |         'input': {
 5 |             0: 'batch',
 6 |             2: 'height',
 7 |             3: 'width'
 8 |         },
 9 |         'dets': {
10 |             0: 'batch',
11 |             1: 'num_dets',
12 |         },
13 |         'labels': {
14 |             0: 'batch',
15 |             1: 'num_dets',
16 |         },
17 |     }, )
18 | 


--------------------------------------------------------------------------------
/detection/deploy/configs/mmdet/_base_/base_instance-seg_dynamic.py:
--------------------------------------------------------------------------------
 1 | _base_ = ['./base_instance-seg_static.py']
 2 | onnx_config = dict(
 3 |     dynamic_axes={
 4 |         'input': {
 5 |             0: 'batch',
 6 |             2: 'height',
 7 |             3: 'width'
 8 |         },
 9 |         'dets': {
10 |             0: 'batch',
11 |             1: 'num_dets',
12 |         },
13 |         'labels': {
14 |             0: 'batch',
15 |             1: 'num_dets',
16 |         },
17 |         'masks': {
18 |             0: 'batch',
19 |             1: 'num_dets',
20 |             2: 'height',
21 |             3: 'width'
22 |         },
23 |     })
24 | 


--------------------------------------------------------------------------------
/detection/deploy/configs/mmdet/_base_/base_instance-seg_static.py:
--------------------------------------------------------------------------------
1 | _base_ = ['./base_static.py']
2 | 
3 | onnx_config = dict(output_names=['dets', 'labels', 'masks'])
4 | codebase_config = dict(post_processing=dict(export_postprocess_mask=False))
5 | 


--------------------------------------------------------------------------------
/detection/deploy/configs/mmdet/_base_/base_static.py:
--------------------------------------------------------------------------------
 1 | _base_ = ['../../_base_/onnx_config.py']
 2 | 
 3 | onnx_config = dict(output_names=['dets', 'labels'], input_shape=None)
 4 | codebase_config = dict(
 5 |     type='mmdet',
 6 |     task='ObjectDetection',
 7 |     model_type='end2end',
 8 |     post_processing=dict(
 9 |         score_threshold=0.05,
10 |         confidence_threshold=0.005,  # for YOLOv3
11 |         iou_threshold=0.5,
12 |         max_output_boxes_per_class=200,
13 |         pre_top_k=5000,
14 |         keep_top_k=100,
15 |         background_label_id=-1,
16 |     ))
17 | 


--------------------------------------------------------------------------------
/detection/deploy/configs/mmdet/instance-seg/instance-seg_tensorrt_dynamic-320x320-1344x1344.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/base_instance-seg_dynamic.py',
 3 |     '../../_base_/backends/tensorrt.py'
 4 | ]
 5 | 
 6 | backend_config = dict(
 7 |     common_config=dict(max_workspace_size=1 << 30),
 8 |     model_inputs=[
 9 |         dict(
10 |             input_shapes=dict(
11 |                 input=dict(
12 |                     min_shape=[1, 3, 320, 320],
13 |                     opt_shape=[1, 3, 800, 1344],
14 |                     max_shape=[1, 3, 1344, 1344])))
15 |     ])
16 | 


--------------------------------------------------------------------------------
/detection/deploy/demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/detection/deploy/demo.jpg


--------------------------------------------------------------------------------
/detection/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29511}
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
10 | 


--------------------------------------------------------------------------------
/detection/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-29500}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=63667 \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 | 


--------------------------------------------------------------------------------
/detection/mmdet_custom/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # InternImage
3 | # Copyright (c) 2022 OpenGVLab
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # --------------------------------------------------------
6 | 
7 | from .datasets import *
8 | from .models import *  # noqa: F401,F403
9 | 


--------------------------------------------------------------------------------
/detection/mmdet_custom/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # InternImage
3 | # Copyright (c) 2022 OpenGVLab
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # --------------------------------------------------------
6 | 
7 | from .crowd_human import CrowdHumanDataset
8 | 


--------------------------------------------------------------------------------
/detection/mmdet_custom/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # InternImage
 3 | # Copyright (c) 2022 OpenGVLab
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # --------------------------------------------------------
 6 | 
 7 | from .backbones import *  # noqa: F401,F403
 8 | from .dense_heads import *  # noqa: F401,F403
 9 | from .detectors import *  # noqa: F401,F403
10 | from .necks import *  # noqa: F401,F403
11 | from .utils import *  # noqa: F401,F403
12 | 


--------------------------------------------------------------------------------
/detection/mmdet_custom/models/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # InternImage
 3 | # Copyright (c) 2022 OpenGVLab
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # --------------------------------------------------------
 6 | 
 7 | from .cbnet import CBInternImage
 8 | from .intern_image import InternImage
 9 | 
10 | __all__ = ['InternImage', 'CBInternImage']
11 | 


--------------------------------------------------------------------------------
/detection/mmdet_custom/models/dense_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # InternImage
 3 | # Copyright (c) 2022 OpenGVLab
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # --------------------------------------------------------
 6 | 
 7 | from .cbdino_head import CBDINOHead
 8 | from .deformable_detr_head import DeformableDETRHead
 9 | from .detr_head import DETRHead
10 | from .dino_head import DINOHead
11 | 
12 | __all__ = ['DeformableDETRHead', 'DETRHead', 'DINOHead', 'CBDINOHead']
13 | 


--------------------------------------------------------------------------------
/detection/mmdet_custom/models/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # InternImage
 3 | # Copyright (c) 2022 OpenGVLab
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # --------------------------------------------------------
 6 | 
 7 | from .cbnet_dino import CBDINO
 8 | from .dino import DINO
 9 | 
10 | __all__ = ['DINO', 'CBDINO']
11 | 


--------------------------------------------------------------------------------
/detection/mmdet_custom/models/detectors/dino.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmdet.models.builder import DETECTORS
 3 | from mmdet.models.detectors.detr import DETR
 4 | 
 5 | 
 6 | @DETECTORS.register_module()
 7 | class DINO(DETR):
 8 | 
 9 |     def __init__(self, *args, **kwargs):
10 |         super(DETR, self).__init__(*args, **kwargs)
11 | 


--------------------------------------------------------------------------------
/detection/mmdet_custom/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .cbnet_channel_mapper import CBChannelMapper
2 | 
3 | __all__ = ['CBChannelMapper']
4 | 


--------------------------------------------------------------------------------
/detection/mmdet_custom/models/necks/cbnet_channel_mapper.py:
--------------------------------------------------------------------------------
 1 | from mmdet.models.builder import NECKS
 2 | from mmdet.models.necks import ChannelMapper
 3 | 
 4 | 
 5 | @NECKS.register_module()
 6 | class CBChannelMapper(ChannelMapper):
 7 | 
 8 |     def __init__(self, cb_idx=1, **kwargs):
 9 |         super(CBChannelMapper, self).__init__(**kwargs)
10 |         self.cb_idx = cb_idx
11 | 
12 |     def forward(self, inputs):
13 |         if not isinstance(inputs[0], (list, tuple)):
14 |             inputs = [inputs]
15 | 
16 |         if self.training:
17 |             outs = []
18 |             # from IPython import embed; embed()
19 |             for x in inputs:
20 |                 out = super().forward(x)
21 |                 outs.append(out)
22 |             return outs
23 |         else:
24 |             out = super().forward(inputs[self.cb_idx])
25 |             return out
26 | 


--------------------------------------------------------------------------------
/detection/mmdet_custom/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .query_denoising import build_dn_generator
2 | from .transformer import DinoTransformer, DinoTransformerDecoder
3 | 
4 | __all__ = ['build_dn_generator', 'DinoTransformer', 'DinoTransformerDecoder']
5 | 


--------------------------------------------------------------------------------
/detection/ops_dcnv3/functions/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # InternImage
3 | # Copyright (c) 2022 OpenGVLab
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # --------------------------------------------------------
6 | 
7 | from .dcnv3_func import DCNv3Function, dcnv3_core_pytorch
8 | 


--------------------------------------------------------------------------------
/detection/ops_dcnv3/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # --------------------------------------------------------
3 | # InternImage
4 | # Copyright (c) 2022 OpenGVLab
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 | 
8 | python setup.py build install
9 | 


--------------------------------------------------------------------------------
/detection/ops_dcnv3/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # InternImage
3 | # Copyright (c) 2022 OpenGVLab
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # --------------------------------------------------------
6 | 
7 | from .dcnv3 import DCNv3, DCNv3_pytorch
8 | 


--------------------------------------------------------------------------------
/detection/ops_dcnv3/src/cpu/dcnv3_cpu.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * InternImage
 4 | * Copyright (c) 2022 OpenGVLab
 5 | * Licensed under The MIT License [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from
 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 9 | **************************************************************************************************
10 | */
11 | 
12 | #pragma once
13 | #include <torch/extension.h>
14 | 
15 | at::Tensor dcnv3_cpu_forward(const at::Tensor &input, const at::Tensor &offset,
16 |                              const at::Tensor &mask, const int kernel_h,
17 |                              const int kernel_w, const int stride_h,
18 |                              const int stride_w, const int pad_h,
19 |                              const int pad_w, const int dilation_h,
20 |                              const int dilation_w, const int group,
21 |                              const int group_channels, const float offset_scale,
22 |                              const int im2col_step);
23 | 
24 | std::vector<at::Tensor>
25 | dcnv3_cpu_backward(const at::Tensor &input, const at::Tensor &offset,
26 |                    const at::Tensor &mask, const int kernel_h,
27 |                    const int kernel_w, const int stride_h, const int stride_w,
28 |                    const int pad_h, const int pad_w, const int dilation_h,
29 |                    const int dilation_w, const int group,
30 |                    const int group_channels, const float offset_scale,
31 |                    const at::Tensor &grad_output, const int im2col_step);
32 | 


--------------------------------------------------------------------------------
/detection/ops_dcnv3/src/cuda/dcnv3_cuda.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * InternImage
 4 | * Copyright (c) 2022 OpenGVLab
 5 | * Licensed under The MIT License [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from
 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 9 | **************************************************************************************************
10 | */
11 | 
12 | #pragma once
13 | #include <torch/extension.h>
14 | 
15 | at::Tensor dcnv3_cuda_forward(const at::Tensor &input, const at::Tensor &offset,
16 |                               const at::Tensor &mask, const int kernel_h,
17 |                               const int kernel_w, const int stride_h,
18 |                               const int stride_w, const int pad_h,
19 |                               const int pad_w, const int dilation_h,
20 |                               const int dilation_w, const int group,
21 |                               const int group_channels,
22 |                               const float offset_scale, const int im2col_step);
23 | 
24 | std::vector<at::Tensor>
25 | dcnv3_cuda_backward(const at::Tensor &input, const at::Tensor &offset,
26 |                     const at::Tensor &mask, const int kernel_h,
27 |                     const int kernel_w, const int stride_h, const int stride_w,
28 |                     const int pad_h, const int pad_w, const int dilation_h,
29 |                     const int dilation_w, const int group,
30 |                     const int group_channels, const float offset_scale,
31 |                     const at::Tensor &grad_output, const int im2col_step);
32 | 


--------------------------------------------------------------------------------
/detection/ops_dcnv3/src/vision.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * InternImage
 4 | * Copyright (c) 2022 OpenGVLab
 5 | * Licensed under The MIT License [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from
 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 9 | **************************************************************************************************
10 | */
11 | 
12 | #include "dcnv3.h"
13 | 
14 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
15 |     m.def("dcnv3_forward", &dcnv3_forward, "dcnv3_forward");
16 |     m.def("dcnv3_backward", &dcnv3_backward, "dcnv3_backward");
17 | }
18 | 


--------------------------------------------------------------------------------
/detection/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     --quotatype=spot \
24 |     ${SRUN_ARGS} \
25 |     python -u test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
26 | 


--------------------------------------------------------------------------------
/detection/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | WORK_DIR=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-10}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${@:5}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     --quotatype=spot \
24 |     ${SRUN_ARGS} \
25 |     python -u train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
26 | 


--------------------------------------------------------------------------------
/detection/tools/evaluate/__init__.py:
--------------------------------------------------------------------------------
1 | from .compute_APMR import compute_APMR
2 | from .compute_JI import compute_JI_with_ignore
3 | 


--------------------------------------------------------------------------------
/docs/figs/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/docs/figs/arch.png


--------------------------------------------------------------------------------
/docs/figs/intern_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/docs/figs/intern_pipeline.png


--------------------------------------------------------------------------------
/docs/figs/intern_pipeline_en.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/docs/figs/intern_pipeline_en.png


--------------------------------------------------------------------------------
/docs/figs/log.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/docs/figs/log.png


--------------------------------------------------------------------------------
/docs/figs/network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/docs/figs/network.png


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/cityscapes_1024x1024.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes.py'
 2 | img_norm_cfg = dict(
 3 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 4 | crop_size = (1024, 1024)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations'),
 8 |     dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
 9 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
10 |     dict(type='RandomFlip', prob=0.5),
11 |     dict(type='PhotoMetricDistortion'),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(2048, 1024),
22 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
23 |         flip=False,
24 |         transforms=[
25 |             dict(type='Resize', keep_ratio=True),
26 |             dict(type='RandomFlip'),
27 |             dict(type='Normalize', **img_norm_cfg),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     train=dict(pipeline=train_pipeline),
34 |     val=dict(pipeline=test_pipeline),
35 |     test=dict(pipeline=test_pipeline))
36 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/cityscapes_extra_1024x1024.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes_extra.py'
 2 | img_norm_cfg = dict(
 3 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 4 | crop_size = (1024, 1024)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations'),
 8 |     dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
 9 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
10 |     dict(type='RandomFlip', prob=0.5),
11 |     dict(type='PhotoMetricDistortion'),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(2048, 1024),
22 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
23 |         flip=False,
24 |         transforms=[
25 |             dict(type='Resize', keep_ratio=True),
26 |             dict(type='RandomFlip'),
27 |             dict(type='Normalize', **img_norm_cfg),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     train=dict(pipeline=train_pipeline),
34 |     val=dict(pipeline=test_pipeline),
35 |     test=dict(pipeline=test_pipeline))
36 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/pascal_voc12_aug.py:
--------------------------------------------------------------------------------
 1 | _base_ = './pascal_voc12.py'
 2 | # dataset settings
 3 | data = dict(
 4 |     train=dict(
 5 |         ann_dir=['SegmentationClass', 'SegmentationClassAug'],
 6 |         split=[
 7 |             'ImageSets/Segmentation/train.txt',
 8 |             'ImageSets/Segmentation/aug.txt'
 9 |         ]))
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/potsdam.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/segmentation/configs/_base_/datasets/potsdam.py


--------------------------------------------------------------------------------
/segmentation/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | # yapf:disable
 2 | log_config = dict(
 3 |     interval=50,
 4 |     hooks=[
 5 |         dict(type='TextLoggerHook', by_epoch=False),
 6 |         # dict(type='TensorboardLoggerHook')
 7 |     ])
 8 | # yapf:enable
 9 | dist_params = dict(backend='nccl')
10 | log_level = 'INFO'
11 | load_from = None
12 | resume_from = None
13 | workflow = [('train', 1)]
14 | cudnn_benchmark = True
15 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/segformer_mit-b0.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained=None,
 6 |     backbone=dict(
 7 |         type='MixVisionTransformer',
 8 |         in_channels=3,
 9 |         embed_dims=32,
10 |         num_stages=4,
11 |         num_layers=[2, 2, 2, 2],
12 |         num_heads=[1, 2, 5, 8],
13 |         patch_sizes=[7, 3, 3, 3],
14 |         sr_ratios=[8, 4, 2, 1],
15 |         out_indices=(0, 1, 2, 3),
16 |         mlp_ratio=4,
17 |         qkv_bias=True,
18 |         drop_rate=0.0,
19 |         attn_drop_rate=0.0,
20 |         drop_path_rate=0.1),
21 |     decode_head=dict(
22 |         type='SegformerHead',
23 |         in_channels=[32, 64, 160, 256],
24 |         in_index=[0, 1, 2, 3],
25 |         channels=256,
26 |         dropout_ratio=0.1,
27 |         num_classes=19,
28 |         norm_cfg=norm_cfg,
29 |         align_corners=False,
30 |         loss_decode=dict(
31 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
32 |     # model training and testing settings
33 |     train_cfg=dict(),
34 |     test_cfg=dict(mode='whole'))
35 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/upernet_r50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 1, 1),
12 |         strides=(1, 2, 2, 2),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     decode_head=dict(
18 |         type='UPerHead',
19 |         in_channels=[256, 512, 1024, 2048],
20 |         in_index=[0, 1, 2, 3],
21 |         pool_scales=(1, 2, 3, 6),
22 |         channels=512,
23 |         dropout_ratio=0.1,
24 |         num_classes=19,
25 |         norm_cfg=norm_cfg,
26 |         align_corners=False,
27 |         loss_decode=dict(
28 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29 |     auxiliary_head=dict(
30 |         type='FCNHead',
31 |         in_channels=1024,
32 |         in_index=2,
33 |         channels=256,
34 |         num_convs=1,
35 |         concat_input=False,
36 |         dropout_ratio=0.1,
37 |         num_classes=19,
38 |         norm_cfg=norm_cfg,
39 |         align_corners=False,
40 |         loss_decode=dict(
41 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_160k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=160000)
 8 | checkpoint_config = dict(by_epoch=False, interval=16000)
 9 | evaluation = dict(interval=16000, metric='mIoU', pre_eval=True)
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_20k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=20000)
 8 | checkpoint_config = dict(by_epoch=False, interval=2000)
 9 | evaluation = dict(interval=2000, metric='mIoU', pre_eval=True)
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_320k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=320000)
 8 | checkpoint_config = dict(by_epoch=False, interval=32000)
 9 | evaluation = dict(interval=32000, metric='mIoU')
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_40k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=40000)
 8 | checkpoint_config = dict(by_epoch=False, interval=4000)
 9 | evaluation = dict(interval=4000, metric='mIoU', pre_eval=True)
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_80k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=80000)
 8 | checkpoint_config = dict(by_epoch=False, interval=8000)
 9 | evaluation = dict(interval=8000, metric='mIoU', pre_eval=True)
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/coco_stuff10k/README.md:
--------------------------------------------------------------------------------
 1 | # COCO-Stuff-10K
 2 | 
 3 | <!-- [ALGORITHM] -->
 4 | 
 5 | ## Introduction
 6 | 
 7 | COCO-Stuff-10K is a dataset designed to enhance scene understanding tasks in computer vision by providing pixel-level annotations for both "things" (discrete objects with well-defined shapes, like cars and people) and "stuff" (amorphous background regions, such as grass and sky). This dataset augments 10,000 images from the original COCO dataset, offering detailed labels across 182 classes—91 "thing" classes and 91 "stuff" classes.
 8 | 
 9 | ## Model Zoo
10 | 
11 | ### Mask2Former + InternImage
12 | 
13 | |   backbone    | resolution | mIoU (ss/ms) | #param | FLOPs |                                Config                                 |                                                                                                                           Download                                                                                                                           |
14 | | :-----------: | :--------: | :----------: | :----: | :---: | :-------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
15 | | InternImage-H |  512x512   | 59.2 / 59.6  | 1.28B  | 1528G | [config](./mask2former_internimage_h_512_40k_cocostuff164k_to_10k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask2former_internimage_h_512_40k_cocostuff164k_to_10k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/mask2former_internimage_h_512_40k_cocostuff164k_to_10k.log.json) |
16 | 


--------------------------------------------------------------------------------
/segmentation/configs/nyu_depth_v2/README.md:
--------------------------------------------------------------------------------
 1 | # NYU-Depth-V2
 2 | 
 3 | <!-- [ALGORITHM] -->
 4 | 
 5 | ## Introduction
 6 | 
 7 | The NYU Depth V2 dataset is a comprehensive collection of indoor scene data captured using a Microsoft Kinect device. It is widely utilized in computer vision research, particularly for tasks such as depth estimation and semantic segmentation.
 8 | 
 9 | ## Model Zoo
10 | 
11 | ### Mask2Former + InternImage
12 | 
13 | |   backbone    | resolution | mIoU (ss/ms) | #param | FLOPs |                        Config                        |                                                                                                          Download                                                                                                          |
14 | | :-----------: | :--------: | :----------: | :----: | :---: | :--------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
15 | | InternImage-H |  480x480   | 67.1 / 68.1  | 1.07B  | 867G  | [config](./mask2former_internimage_h_480_40k_nyu.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask2former_internimage_h_480_40k_nyu.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/mask2former_internimage_h_480_40k_nyu.log.json) |
16 | 


--------------------------------------------------------------------------------
/segmentation/configs/pascal_context/README.md:
--------------------------------------------------------------------------------
 1 | # Pascal Context 59
 2 | 
 3 | <!-- [ALGORITHM] -->
 4 | 
 5 | ## Introduction
 6 | 
 7 | The PASCAL Context dataset is an extension of the PASCAL VOC 2010 dataset, providing comprehensive pixel-wise annotations for over 400 classes, including the original 20 object categories and additional background classes. Due to the sparsity of many object categories, a subset of the 59 most frequent classes is commonly used for tasks like semantic segmentation.
 8 | 
 9 | ## Model Zoo
10 | 
11 | ### Mask2Former + InternImage
12 | 
13 | |   backbone    | resolution | mIoU (ss/ms) | #param | FLOPs |                               Config                               |                                                                                                                        Download                                                                                                                        |
14 | | :-----------: | :--------: | :----------: | :----: | :---: | :----------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
15 | | InternImage-H |  480x480   | 69.7 / 70.3  | 1.07B  | 867G  | [config](./mask2former_internimage_h_480_40k_pascal_context_59.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask2former_internimage_h_480_40k_pascal_context_59.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/mask2former_internimage_h_480_40k_pascal_context_59.log.json) |
16 | 


--------------------------------------------------------------------------------
/segmentation/deploy/configs/_base_/backends/tensorrt.py:
--------------------------------------------------------------------------------
1 | backend_config = dict(
2 |     type='tensorrt', common_config=dict(fp16_mode=False, max_workspace_size=0))
3 | 


--------------------------------------------------------------------------------
/segmentation/deploy/configs/_base_/onnx_config.py:
--------------------------------------------------------------------------------
 1 | onnx_config = dict(
 2 |     type='onnx',
 3 |     export_params=True,
 4 |     keep_initializers_as_inputs=False,
 5 |     opset_version=11,
 6 |     save_file='end2end.onnx',
 7 |     input_names=['input'],
 8 |     output_names=['output'],
 9 |     input_shape=None,
10 |     optimize=True)
11 | 


--------------------------------------------------------------------------------
/segmentation/deploy/configs/mmseg/segmentation_static.py:
--------------------------------------------------------------------------------
1 | _base_ = ['../_base_/onnx_config.py']
2 | codebase_config = dict(type='mmseg', task='Segmentation', with_argmax=True)
3 | 


--------------------------------------------------------------------------------
/segmentation/deploy/configs/mmseg/segmentation_tensorrt_static-512x512.py:
--------------------------------------------------------------------------------
 1 | _base_ = ['./segmentation_static.py', '../_base_/backends/tensorrt.py']
 2 | 
 3 | onnx_config = dict(input_shape=[512, 512])
 4 | backend_config = dict(
 5 |     common_config=dict(max_workspace_size=1 << 30),
 6 |     model_inputs=[
 7 |         dict(
 8 |             input_shapes=dict(
 9 |                 input=dict(
10 |                     min_shape=[1, 3, 512, 512],
11 |                     opt_shape=[1, 3, 512, 512],
12 |                     max_shape=[1, 3, 512, 512])))
13 |     ])
14 | 


--------------------------------------------------------------------------------
/segmentation/deploy/demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/segmentation/deploy/demo.png


--------------------------------------------------------------------------------
/segmentation/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29510}
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
10 | 


--------------------------------------------------------------------------------
/segmentation/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-29300}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 | 


--------------------------------------------------------------------------------
/segmentation/mmcv_custom/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # InternImage
 3 | # Copyright (c) 2022 OpenGVLab
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # --------------------------------------------------------
 6 | 
 7 | # -*- coding: utf-8 -*-
 8 | from .custom_layer_decay_optimizer_constructor import \
 9 |     CustomLayerDecayOptimizerConstructor
10 | 
11 | __all__ = ['CustomLayerDecayOptimizerConstructor',]
12 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # InternImage
 3 | # Copyright (c) 2022 OpenGVLab
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # --------------------------------------------------------
 6 | 
 7 | from .core import *  # noqa: F401,F403
 8 | from .datasets import *  # noqa: F401,F403
 9 | from .models import *  # noqa: F401,F403
10 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
 2 | from mmseg.core.evaluation import *  # noqa: F401, F403
 3 | from mmseg.core.seg import *  # noqa: F401, F403
 4 | 
 5 | from .anchor import *  # noqa: F401,F403
 6 | from .box import *  # noqa: F401,F403
 7 | from .evaluation import *  # noqa: F401,F403
 8 | from .mask import *  # noqa: F401,F403
 9 | from .utils import *  # noqa: F401, F403
10 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/anchor/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .point_generator import MlvlPointGenerator  # noqa: F401,F403
3 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/anchor/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import warnings
 3 | 
 4 | from mmcv.utils import Registry, build_from_cfg
 5 | 
 6 | PRIOR_GENERATORS = Registry('Generator for anchors and points')
 7 | 
 8 | ANCHOR_GENERATORS = PRIOR_GENERATORS
 9 | 
10 | 
11 | def build_prior_generator(cfg, default_args=None):
12 |     return build_from_cfg(cfg, PRIOR_GENERATORS, default_args)
13 | 
14 | 
15 | def build_anchor_generator(cfg, default_args=None):
16 |     warnings.warn(
17 |         '``build_anchor_generator`` would be deprecated soon, please use '
18 |         '``build_prior_generator`` ')
19 |     return build_prior_generator(cfg, default_args=default_args)
20 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/box/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .builder import *  # noqa: F401,F403
3 | from .samplers import MaskPseudoSampler  # noqa: F401,F403
4 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/box/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.utils import Registry, build_from_cfg
 3 | 
 4 | BBOX_SAMPLERS = Registry('bbox_sampler')
 5 | BBOX_CODERS = Registry('bbox_coder')
 6 | 
 7 | 
 8 | def build_sampler(cfg, **default_args):
 9 |     """Builder of box sampler."""
10 |     return build_from_cfg(cfg, BBOX_SAMPLERS, default_args)
11 | 
12 | 
13 | def build_bbox_coder(cfg, **default_args):
14 |     """Builder of box coder."""
15 |     return build_from_cfg(cfg, BBOX_CODERS, default_args)
16 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/box/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .mask_pseudo_sampler import MaskPseudoSampler  # noqa: F401,F403
3 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .panoptic_utils import INSTANCE_OFFSET  # noqa: F401,F403
3 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/evaluation/panoptic_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | # A custom value to distinguish instance ID and category ID; need to
3 | # be greater than the number of categories.
4 | # For a pixel in the panoptic result map:
5 | #   pan_id = ins_id * INSTANCE_OFFSET + cat_id
6 | INSTANCE_OFFSET = 1000
7 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/mask/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .utils import mask2bbox  # noqa: F401,F403
3 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .dist_utils import (DistOptimizerHook, all_reduce_dict, allreduce_grads,
 3 |                          reduce_mean)
 4 | from .misc import add_prefix, multi_apply
 5 | 
 6 | __all__ = [
 7 |     'add_prefix', 'multi_apply', 'DistOptimizerHook', 'allreduce_grads',
 8 |     'all_reduce_dict', 'reduce_mean'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/core/utils/misc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | def multi_apply(func, *args, **kwargs):
 3 |     """Apply function to a list of arguments.
 4 | 
 5 |     Note:
 6 |         This function applies the ``func`` to multiple inputs and
 7 |         map the multiple outputs of the ``func`` into different
 8 |         list. Each list contains the same type of outputs corresponding
 9 |         to different inputs.
10 | 
11 |     Args:
12 |         func (Function): A function that will be applied to a list of
13 |             arguments
14 | 
15 |     Returns:
16 |         tuple(list): A tuple containing multiple list, each list contains \
17 |             a kind of returned results by the function
18 |     """
19 |     pfunc = partial(func, **kwargs) if kwargs else func
20 |     map_results = map(pfunc, *args)
21 |     return tuple(map(list, zip(*map_results)))
22 | 
23 | 
24 | def add_prefix(inputs, prefix):
25 |     """Add prefix for dict.
26 | 
27 |     Args:
28 |         inputs (dict): The input dict with str keys.
29 |         prefix (str): The prefix to add.
30 | 
31 |     Returns:
32 | 
33 |         dict: The dict with keys updated with ``prefix``.
34 |     """
35 | 
36 |     outputs = dict()
37 |     for name, value in inputs.items():
38 |         outputs[f'{prefix}.{name}'] = value
39 | 
40 |     return outputs
41 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .dataset_wrappers import ConcatDataset
 3 | from .mapillary import MapillaryDataset  # noqa: F401,F403
 4 | from .nyu_depth_v2 import NYUDepthV2Dataset  # noqa: F401,F403
 5 | from .pipelines import *  # noqa: F401,F403
 6 | 
 7 | __all__ = [
 8 |     'MapillaryDataset', 'NYUDepthV2Dataset', 'ConcatDataset'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .formatting import DefaultFormatBundle, ToMask
3 | from .transform import MapillaryHack, PadShortSide, SETR_Resize
4 | 
5 | __all__ = [
6 |     'DefaultFormatBundle', 'ToMask', 'SETR_Resize',
7 |     'PadShortSide', 'MapillaryHack'
8 | ]
9 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # InternImage
 3 | # Copyright (c) 2022 OpenGVLab
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # --------------------------------------------------------
 6 | 
 7 | from .backbones import *  # noqa: F401,F403
 8 | from .decode_heads import *  # noqa: F401,F403
 9 | from .losses import *  # noqa: F401,F403
10 | from .plugins import *  # noqa: F401,F403
11 | from .segmentors import *  # noqa: F401,F403
12 | from .utils import *  # noqa: F401,F403
13 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # InternImage
 3 | # Copyright (c) 2022 OpenGVLab
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # --------------------------------------------------------
 6 | 
 7 | from .intern_image import InternImage
 8 | 
 9 | __all__ = ['InternImage']
10 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import warnings  # noqa: F401,F403
 3 | 
 4 | from mmcv.utils import Registry
 5 | 
 6 | TRANSFORMER = Registry('Transformer')
 7 | MASK_ASSIGNERS = Registry('mask_assigner')
 8 | MATCH_COST = Registry('match_cost')
 9 | 
10 | 
11 | def build_match_cost(cfg):
12 |     """Build Match Cost."""
13 |     return MATCH_COST.build(cfg)
14 | 
15 | 
16 | def build_assigner(cfg):
17 |     """Build Assigner."""
18 |     return MASK_ASSIGNERS.build(cfg)
19 | 
20 | 
21 | def build_transformer(cfg):
22 |     """Build Transformer."""
23 |     return TRANSFORMER.build(cfg)
24 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/decode_heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .mask2former_head import Mask2FormerHead
3 | from .maskformer_head import MaskFormerHead
4 | 
5 | __all__ = [
6 |     'MaskFormerHead',
7 |     'Mask2FormerHead',
8 | ]
9 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
 3 |                                  cross_entropy, mask_cross_entropy)
 4 | from .dice_loss import DiceLoss
 5 | from .focal_loss import FocalLoss
 6 | from .match_costs import (ClassificationCost, CrossEntropyLossCost, DiceCost,
 7 |                           MaskFocalLossCost)
 8 | 
 9 | __all__ = [
10 |     'cross_entropy', 'binary_cross_entropy', 'mask_cross_entropy',
11 |     'CrossEntropyLoss', 'DiceLoss', 'FocalLoss', 'ClassificationCost',
12 |     'MaskFocalLossCost', 'DiceCost', 'CrossEntropyLossCost'
13 | ]
14 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
2 | from .msdeformattn_pixel_decoder import MSDeformAttnPixelDecoder
3 | from .pixel_decoder import PixelDecoder, TransformerEncoderPixelDecoder
4 | 
5 | __all__ = [
6 |     'PixelDecoder', 'TransformerEncoderPixelDecoder',
7 |     'MSDeformAttnPixelDecoder'
8 | ]
9 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/segmentors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .encoder_decoder_mask2former import EncoderDecoderMask2Former
3 | from .encoder_decoder_mask2former_aug import EncoderDecoderMask2FormerAug
4 | 
5 | __all__ = ['EncoderDecoderMask2Former', 'EncoderDecoderMask2FormerAug']
6 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Shanghai AI Lab. All rights reserved.
 2 | from .assigner import MaskHungarianAssigner
 3 | from .point_sample import get_uncertain_point_coords_with_randomness
 4 | from .positional_encoding import (LearnedPositionalEncoding,
 5 |                                   SinePositionalEncoding)
 6 | from .transformer import (DetrTransformerDecoder, DetrTransformerDecoderLayer,
 7 |                           DynamicConv, Transformer)
 8 | 
 9 | __all__ = [
10 |     'DetrTransformerDecoderLayer', 'DetrTransformerDecoder', 'DynamicConv',
11 |     'Transformer', 'LearnedPositionalEncoding', 'SinePositionalEncoding',
12 |     'MaskHungarianAssigner', 'get_uncertain_point_coords_with_randomness'
13 | ]
14 | 


--------------------------------------------------------------------------------
/segmentation/ops_dcnv3/functions/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # InternImage
3 | # Copyright (c) 2022 OpenGVLab
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # --------------------------------------------------------
6 | 
7 | from .dcnv3_func import DCNv3Function, dcnv3_core_pytorch
8 | 


--------------------------------------------------------------------------------
/segmentation/ops_dcnv3/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # --------------------------------------------------------
3 | # InternImage
4 | # Copyright (c) 2022 OpenGVLab
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 | 
8 | python setup.py build install
9 | 


--------------------------------------------------------------------------------
/segmentation/ops_dcnv3/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # InternImage
3 | # Copyright (c) 2022 OpenGVLab
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # --------------------------------------------------------
6 | 
7 | from .dcnv3 import DCNv3, DCNv3_pytorch
8 | 


--------------------------------------------------------------------------------
/segmentation/ops_dcnv3/src/cpu/dcnv3_cpu.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * InternImage
 4 | * Copyright (c) 2022 OpenGVLab
 5 | * Licensed under The MIT License [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from
 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 9 | **************************************************************************************************
10 | */
11 | 
12 | #pragma once
13 | #include <torch/extension.h>
14 | 
15 | at::Tensor dcnv3_cpu_forward(const at::Tensor &input, const at::Tensor &offset,
16 |                              const at::Tensor &mask, const int kernel_h,
17 |                              const int kernel_w, const int stride_h,
18 |                              const int stride_w, const int pad_h,
19 |                              const int pad_w, const int dilation_h,
20 |                              const int dilation_w, const int group,
21 |                              const int group_channels, const float offset_scale,
22 |                              const int im2col_step);
23 | 
24 | std::vector<at::Tensor>
25 | dcnv3_cpu_backward(const at::Tensor &input, const at::Tensor &offset,
26 |                    const at::Tensor &mask, const int kernel_h,
27 |                    const int kernel_w, const int stride_h, const int stride_w,
28 |                    const int pad_h, const int pad_w, const int dilation_h,
29 |                    const int dilation_w, const int group,
30 |                    const int group_channels, const float offset_scale,
31 |                    const at::Tensor &grad_output, const int im2col_step);
32 | 


--------------------------------------------------------------------------------
/segmentation/ops_dcnv3/src/vision.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * InternImage
 4 | * Copyright (c) 2022 OpenGVLab
 5 | * Licensed under The MIT License [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from
 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 9 | **************************************************************************************************
10 | */
11 | 
12 | #include "dcnv3.h"
13 | 
14 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
15 |     m.def("dcnv3_forward", &dcnv3_forward, "dcnv3_forward");
16 |     m.def("dcnv3_backward", &dcnv3_backward, "dcnv3_backward");
17 | }
18 | 


--------------------------------------------------------------------------------
/segmentation/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     --quotatype=auto \
24 |     ${SRUN_ARGS} \
25 |     python -u test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
26 | 


--------------------------------------------------------------------------------
/segmentation/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | GPUS=${GPUS:-8}
 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
10 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
11 | SRUN_ARGS=${SRUN_ARGS:-""}
12 | PY_ARGS=${@:4}
13 | 
14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --cpus-per-task=${CPUS_PER_TASK} \
21 |     --quotatype=spot \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u train.py ${CONFIG} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/tensorrt/modulated_deform_conv_v3/trt_deform_conv_v3_kernel.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef TRT_DEFORM_CONV_V3_KERNEL_HPP
 2 | #define TRT_DEFORM_CONV_V3_KERNEL_HPP
 3 | #include <cuda_runtime.h>
 4 | 
 5 | #include "common_cuda_helper.hpp"
 6 | 
 7 | template <typename scalar_t>
 8 | void DeformConvv3ForwardCUDAKernelLauncher(const scalar_t* input, const scalar_t* offset,
 9 |                                            const scalar_t* mask, scalar_t* output, void* workspace,
10 |                                            int batch, int channels, int height, int width,
11 |                                            int channels_out, int kernel_w, int kernel_h,
12 |                                            int stride_w, int stride_h, int pad_w, int pad_h,
13 |                                            int dilation_w, int dilation_h, int group,
14 |                                            int group_channel, float offset_scale, int im2col_step,
15 |                                             cudaStream_t stream);
16 | 
17 | #endif  // TRT_DEFORM_CONV_V3_KERNEL_HPP
18 | 


--------------------------------------------------------------------------------