├── .flake8 ├── .gitignore ├── .isort.cfg ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── README_CN.md ├── autonomous_driving ├── Online-HD-Map-Construction │ ├── LICENSE │ ├── README.md │ ├── src │ │ ├── __init__.py │ │ ├── configs │ │ │ ├── _base_ │ │ │ │ ├── datasets │ │ │ │ │ ├── coco_instance.py │ │ │ │ │ ├── kitti-3d-3class.py │ │ │ │ │ ├── kitti-3d-car.py │ │ │ │ │ ├── lyft-3d.py │ │ │ │ │ ├── nuim_instance.py │ │ │ │ │ ├── nus-3d.py │ │ │ │ │ ├── nus-mono3d.py │ │ │ │ │ ├── range100_lyft-3d.py │ │ │ │ │ ├── s3dis_seg-3d-13class.py │ │ │ │ │ ├── scannet-3d-18class.py │ │ │ │ │ ├── scannet_seg-3d-20class.py │ │ │ │ │ ├── sunrgbd-3d-10class.py │ │ │ │ │ ├── waymoD5-3d-3class.py │ │ │ │ │ └── waymoD5-3d-car.py │ │ │ │ ├── default_runtime.py │ │ │ │ ├── models │ │ │ │ │ ├── 3dssd.py │ │ │ │ │ ├── cascade_mask_rcnn_r50_fpn.py │ │ │ │ │ ├── centerpoint_01voxel_second_secfpn_nus.py │ │ │ │ │ ├── centerpoint_02pillar_second_secfpn_nus.py │ │ │ │ │ ├── fcos3d.py │ │ │ │ │ ├── groupfree3d.py │ │ │ │ │ ├── h3dnet.py │ │ │ │ │ ├── hv_pointpillars_fpn_lyft.py │ │ │ │ │ ├── hv_pointpillars_fpn_nus.py │ │ │ │ │ ├── hv_pointpillars_fpn_range100_lyft.py │ │ │ │ │ ├── hv_pointpillars_secfpn_kitti.py │ │ │ │ │ ├── hv_pointpillars_secfpn_waymo.py │ │ │ │ │ ├── hv_second_secfpn_kitti.py │ │ │ │ │ ├── hv_second_secfpn_waymo.py │ │ │ │ │ ├── imvotenet_image.py │ │ │ │ │ ├── mask_rcnn_r50_fpn.py │ │ │ │ │ ├── paconv_cuda_ssg.py │ │ │ │ │ ├── paconv_ssg.py │ │ │ │ │ ├── parta2.py │ │ │ │ │ ├── pointnet2_msg.py │ │ │ │ │ ├── pointnet2_ssg.py │ │ │ │ │ └── votenet.py │ │ │ │ └── schedules │ │ │ │ │ ├── cosine.py │ │ │ │ │ ├── cyclic_20e.py │ │ │ │ │ ├── cyclic_40e.py │ │ │ │ │ ├── mmdet_schedule_1x.py │ │ │ │ │ ├── schedule_2x.py │ │ │ │ │ ├── schedule_3x.py │ │ │ │ │ ├── seg_cosine_150e.py │ │ │ │ │ ├── seg_cosine_200e.py │ │ │ │ │ └── seg_cosine_50e.py │ │ │ ├── vectormapnet.py │ │ │ └── vectormapnet_intern.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ ├── argo_dataset.py │ │ │ ├── base_dataset.py │ │ │ ├── evaluation │ │ │ │ ├── AP.py │ │ │ │ ├── __init__.py │ │ │ │ ├── distance.py │ │ │ │ └── vector_eval.py │ │ │ └── pipelines │ │ │ │ ├── __init__.py │ │ │ │ ├── formating.py │ │ │ │ ├── loading.py │ │ │ │ ├── poly_bbox.py │ │ │ │ ├── transform.py │ │ │ │ └── vectorize.py │ │ └── models │ │ │ ├── __init__.py │ │ │ ├── assigner │ │ │ ├── __init__.py │ │ │ ├── assigner.py │ │ │ └── match_cost.py │ │ │ ├── augmentation │ │ │ ├── __init__.py │ │ │ └── sythesis_det.py │ │ │ ├── backbones │ │ │ ├── __init__.py │ │ │ ├── internimage.py │ │ │ └── ipm_backbone.py │ │ │ ├── heads │ │ │ ├── __init__.py │ │ │ ├── base_map_head.py │ │ │ ├── detgen_utils │ │ │ │ ├── __init__.py │ │ │ │ ├── causal_trans.py │ │ │ │ └── utils.py │ │ │ ├── detr_bbox.py │ │ │ ├── detr_head.py │ │ │ ├── dg_head.py │ │ │ ├── map_element_detector.py │ │ │ └── polyline_generator.py │ │ │ ├── losses │ │ │ ├── __init__.py │ │ │ └── detr_loss.py │ │ │ ├── mapers │ │ │ ├── __init__.py │ │ │ ├── base_mapper.py │ │ │ └── vectormapnet.py │ │ │ └── transformer_utils │ │ │ ├── __init__.py │ │ │ ├── base_transformer.py │ │ │ ├── deformable_transformer.py │ │ │ └── fp16_dattn.py │ └── tools │ │ ├── dist_test.sh │ │ ├── dist_train.sh │ │ ├── evaluate_submission.py │ │ ├── mmdet_test.py │ │ ├── mmdet_train.py │ │ ├── test.py │ │ ├── train.py │ │ └── visualization │ │ ├── renderer.py │ │ └── visualize.py ├── README.md ├── occupancy_prediction │ ├── CITATION.cff │ ├── CODE_OF_CONDUCT.md │ ├── LICENSE │ ├── README.md │ ├── docs │ │ └── getting_started.md │ ├── projects │ │ ├── __init__.py │ │ ├── configs │ │ │ ├── _base_ │ │ │ │ ├── datasets │ │ │ │ │ ├── coco_instance.py │ │ │ │ │ ├── kitti-3d-3class.py │ │ │ │ │ ├── kitti-3d-car.py │ │ │ │ │ ├── lyft-3d.py │ │ │ │ │ ├── nuim_instance.py │ │ │ │ │ ├── nus-3d.py │ │ │ │ │ ├── nus-mono3d.py │ │ │ │ │ ├── range100_lyft-3d.py │ │ │ │ │ ├── s3dis-3d-5class.py │ │ │ │ │ ├── s3dis_seg-3d-13class.py │ │ │ │ │ ├── scannet-3d-18class.py │ │ │ │ │ ├── scannet_seg-3d-20class.py │ │ │ │ │ ├── sunrgbd-3d-10class.py │ │ │ │ │ ├── waymoD5-3d-3class.py │ │ │ │ │ └── waymoD5-3d-car.py │ │ │ │ ├── default_runtime.py │ │ │ │ ├── models │ │ │ │ │ ├── 3dssd.py │ │ │ │ │ ├── cascade_mask_rcnn_r50_fpn.py │ │ │ │ │ ├── centerpoint_01voxel_second_secfpn_nus.py │ │ │ │ │ ├── centerpoint_02pillar_second_secfpn_nus.py │ │ │ │ │ ├── fcos3d.py │ │ │ │ │ ├── groupfree3d.py │ │ │ │ │ ├── h3dnet.py │ │ │ │ │ ├── hv_pointpillars_fpn_lyft.py │ │ │ │ │ ├── hv_pointpillars_fpn_nus.py │ │ │ │ │ ├── hv_pointpillars_fpn_range100_lyft.py │ │ │ │ │ ├── hv_pointpillars_secfpn_kitti.py │ │ │ │ │ ├── hv_pointpillars_secfpn_waymo.py │ │ │ │ │ ├── hv_second_secfpn_kitti.py │ │ │ │ │ ├── hv_second_secfpn_waymo.py │ │ │ │ │ ├── imvotenet_image.py │ │ │ │ │ ├── mask_rcnn_r50_fpn.py │ │ │ │ │ ├── paconv_cuda_ssg.py │ │ │ │ │ ├── paconv_ssg.py │ │ │ │ │ ├── parta2.py │ │ │ │ │ ├── pointnet2_msg.py │ │ │ │ │ ├── pointnet2_ssg.py │ │ │ │ │ └── votenet.py │ │ │ │ └── schedules │ │ │ │ │ ├── cosine.py │ │ │ │ │ ├── cyclic_20e.py │ │ │ │ │ ├── cyclic_40e.py │ │ │ │ │ ├── mmdet_schedule_1x.py │ │ │ │ │ ├── schedule_2x.py │ │ │ │ │ ├── schedule_3x.py │ │ │ │ │ ├── seg_cosine_150e.py │ │ │ │ │ ├── seg_cosine_200e.py │ │ │ │ │ └── seg_cosine_50e.py │ │ │ ├── bevformer │ │ │ │ ├── .ipynb_checkpoints │ │ │ │ │ └── bevformer_small_occ-checkpoint.py │ │ │ │ ├── bevformer_base_occ.py │ │ │ │ ├── bevformer_intern-s_occ.py │ │ │ │ └── bevformer_small_occ.py │ │ │ └── datasets │ │ │ │ └── custom_nus-3d.py │ │ └── mmdet3d_plugin │ │ │ ├── __init__.py │ │ │ ├── bevformer │ │ │ ├── __init__.py │ │ │ ├── apis │ │ │ │ ├── __init__.py │ │ │ │ ├── mmdet_train.py │ │ │ │ ├── test.py │ │ │ │ └── train.py │ │ │ ├── backbones │ │ │ │ ├── __init__.py │ │ │ │ ├── custom_layer_decay_optimizer_constructor.py │ │ │ │ ├── internimage.py │ │ │ │ └── ops_dcnv3 │ │ │ │ │ ├── functions │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── dcnv3_func.py │ │ │ │ │ ├── make.sh │ │ │ │ │ ├── modules │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── dcnv3.py │ │ │ │ │ ├── setup.py │ │ │ │ │ ├── src │ │ │ │ │ ├── cpu │ │ │ │ │ │ ├── dcnv3_cpu.cpp │ │ │ │ │ │ └── dcnv3_cpu.h │ │ │ │ │ ├── cuda │ │ │ │ │ │ ├── dcnv3_cuda.cu │ │ │ │ │ │ ├── dcnv3_cuda.h │ │ │ │ │ │ └── dcnv3_im2col_cuda.cuh │ │ │ │ │ ├── dcnv3.h │ │ │ │ │ └── vision.cpp │ │ │ │ │ └── test.py │ │ │ ├── dense_heads │ │ │ │ ├── __init__.py │ │ │ │ └── bevformer_occ_head.py │ │ │ ├── detectors │ │ │ │ ├── __init__.py │ │ │ │ └── bevformer_occ.py │ │ │ ├── hooks │ │ │ │ ├── __init__.py │ │ │ │ └── custom_hooks.py │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ ├── custom_base_transformer_layer.py │ │ │ │ ├── decoder.py │ │ │ │ ├── encoder.py │ │ │ │ ├── multi_scale_deformable_attn_function.py │ │ │ │ ├── spatial_cross_attention.py │ │ │ │ ├── temporal_self_attention.py │ │ │ │ ├── transformer.py │ │ │ │ └── transformer_occ.py │ │ │ └── runner │ │ │ │ ├── __init__.py │ │ │ │ └── epoch_based_runner.py │ │ │ ├── core │ │ │ ├── bbox │ │ │ │ ├── assigners │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── hungarian_assigner_3d.py │ │ │ │ ├── coders │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── nms_free_coder.py │ │ │ │ ├── match_costs │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── match_cost.py │ │ │ │ └── util.py │ │ │ └── evaluation │ │ │ │ ├── __init__.py │ │ │ │ └── eval_hooks.py │ │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── nuscenes_dataset.py │ │ │ ├── nuscenes_occ.py │ │ │ ├── nuscnes_eval.py │ │ │ ├── occ_metrics.py │ │ │ ├── pipelines │ │ │ │ ├── __init__.py │ │ │ │ ├── formating.py │ │ │ │ ├── loading.py │ │ │ │ └── transform_3d.py │ │ │ └── samplers │ │ │ │ ├── __init__.py │ │ │ │ ├── distributed_sampler.py │ │ │ │ ├── group_sampler.py │ │ │ │ └── sampler.py │ │ │ └── models │ │ │ ├── backbones │ │ │ ├── __init__.py │ │ │ └── vovnet.py │ │ │ ├── hooks │ │ │ ├── __init__.py │ │ │ └── hooks.py │ │ │ ├── opt │ │ │ ├── __init__.py │ │ │ └── adamw.py │ │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── bricks.py │ │ │ ├── grid_mask.py │ │ │ ├── position_embedding.py │ │ │ ├── positional_encoding.py │ │ │ └── visual.py │ ├── tools │ │ ├── .ipynb_checkpoints │ │ │ └── train-checkpoint.py │ │ ├── analysis_tools │ │ │ ├── __init__.py │ │ │ ├── analyze_logs.py │ │ │ ├── benchmark.py │ │ │ ├── get_params.py │ │ │ └── visual.py │ │ ├── create_data.py │ │ ├── data_converter │ │ │ ├── __init__.py │ │ │ ├── create_gt_database.py │ │ │ ├── nuscenes_converter.py │ │ │ └── nuscenes_occ_converter.py │ │ ├── dist_test.sh │ │ ├── dist_train.sh │ │ ├── fp16 │ │ │ ├── dist_train.sh │ │ │ └── train.py │ │ ├── misc │ │ │ ├── browse_dataset.py │ │ │ ├── fuse_conv_bn.py │ │ │ ├── print_config.py │ │ │ └── visualize_results.py │ │ ├── model_converters │ │ │ ├── convert_votenet_checkpoints.py │ │ │ ├── publish_model.py │ │ │ └── regnet2mmdet.py │ │ ├── slurm_train.sh │ │ ├── test.py │ │ └── train.py │ └── utils │ │ └── vis.py └── openlane-v2 │ ├── .gitignore │ ├── CITATION │ ├── CODE_OF_CONDUCT │ ├── LICENSE │ ├── README-zh-hans.md │ ├── README.md │ ├── data │ ├── OpenLane-V2 │ │ ├── data_dict_sample.json │ │ ├── data_dict_subset_A.json │ │ ├── openlanev2.md5 │ │ └── preprocess.py │ └── README.md │ ├── docs │ ├── annotation.md │ ├── devkit.md │ ├── metrics.md │ ├── statistics.md │ └── submission.md │ ├── imgs │ ├── lane.gif │ ├── poster.gif │ ├── topology.gif │ └── traffic_element.gif │ ├── openlanev2 │ ├── __init__.py │ ├── dataset │ │ ├── __init__.py │ │ ├── collection.py │ │ └── frame.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── distance.py │ │ ├── evaluate.py │ │ └── f_score.py │ ├── io │ │ ├── __init__.py │ │ └── io.py │ ├── preprocessing │ │ ├── __init__.py │ │ ├── check.py │ │ └── collect.py │ ├── utils.py │ └── visualization │ │ ├── __init__.py │ │ ├── bev.py │ │ ├── pv.py │ │ └── utils.py │ ├── plugin │ └── mmdet3d │ │ ├── baseline │ │ ├── __init__.py │ │ ├── core │ │ │ ├── __init__.py │ │ │ └── bbox │ │ │ │ ├── __init__.py │ │ │ │ ├── assigners.py │ │ │ │ └── match_costs.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ ├── openlane_v2_dataset.py │ │ │ └── pipelines │ │ │ │ ├── __init__.py │ │ │ │ ├── formating.py │ │ │ │ ├── loading.py │ │ │ │ └── transforms.py │ │ └── models │ │ │ ├── __init__.py │ │ │ ├── backbones │ │ │ ├── __init__.py │ │ │ ├── intern_image.py │ │ │ └── ops_dcnv3 │ │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ └── dcnv3_func.py │ │ │ │ ├── make.sh │ │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ └── dcnv3.py │ │ │ │ ├── setup.py │ │ │ │ ├── src │ │ │ │ ├── cpu │ │ │ │ │ ├── dcnv3_cpu.cpp │ │ │ │ │ └── dcnv3_cpu.h │ │ │ │ ├── cuda │ │ │ │ │ ├── dcnv3_cuda.cu │ │ │ │ │ ├── dcnv3_cuda.h │ │ │ │ │ └── dcnv3_im2col_cuda.cuh │ │ │ │ ├── dcnv3.h │ │ │ │ └── vision.cpp │ │ │ │ └── test.py │ │ │ ├── detectors │ │ │ ├── __init__.py │ │ │ ├── baseline.py │ │ │ └── road_bev.py │ │ │ ├── heads │ │ │ ├── __init__.py │ │ │ ├── custom_detr_head.py │ │ │ ├── lc_deformable_detr_head.py │ │ │ ├── relationship_head.py │ │ │ ├── te_deformable_detr_head.py │ │ │ └── topology_head.py │ │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── bevformer_constructer.py │ │ │ ├── custom_base_transformer_layer.py │ │ │ ├── decoder.py │ │ │ ├── encoder.py │ │ │ ├── multi_scale_deformable_attn_function.py │ │ │ ├── spatial_cross_attention.py │ │ │ ├── temporal_self_attention.py │ │ │ └── transformer.py │ │ │ └── necks │ │ │ ├── __init__.py │ │ │ ├── custom_fpn.py │ │ │ └── custom_ipm_view_transformer.py │ │ └── configs │ │ ├── baseline.py │ │ ├── baseline_large.py │ │ └── internimage-s.py │ ├── requirements.txt │ ├── setup.py │ ├── tools │ ├── analysis_tools │ │ ├── analyze_logs.py │ │ ├── benchmark.py │ │ └── get_flops.py │ ├── create_data.py │ ├── create_data.sh │ ├── data_converter │ │ ├── __init__.py │ │ ├── create_gt_database.py │ │ ├── indoor_converter.py │ │ ├── kitti_converter.py │ │ ├── kitti_data_utils.py │ │ ├── lyft_converter.py │ │ ├── lyft_data_fixer.py │ │ ├── nuimage_converter.py │ │ ├── nuscenes_converter.py │ │ ├── s3dis_data_utils.py │ │ ├── scannet_data_utils.py │ │ ├── sunrgbd_data_utils.py │ │ └── waymo_converter.py │ ├── deployment │ │ ├── mmdet3d2torchserve.py │ │ ├── mmdet3d_handler.py │ │ └── test_torchserver.py │ ├── dist_test.sh │ ├── dist_train.sh │ ├── misc │ │ ├── browse_dataset.py │ │ ├── fuse_conv_bn.py │ │ ├── print_config.py │ │ └── visualize_results.py │ ├── model_converters │ │ ├── convert_h3dnet_checkpoints.py │ │ ├── convert_votenet_checkpoints.py │ │ ├── publish_model.py │ │ └── regnet2mmdet.py │ ├── slurm_test.sh │ ├── slurm_train.sh │ ├── test.py │ ├── train.py │ ├── update_data_coords.py │ └── update_data_coords.sh │ └── tutorial.ipynb ├── classification ├── README.md ├── config.py ├── configs │ ├── accelerate │ │ ├── deepspeed │ │ │ ├── ds_config_zero1.json │ │ │ ├── ds_config_zero1_wo_loss_scale.json │ │ │ ├── ds_config_zero3_offload.json │ │ │ └── ds_config_zero3_offload_wo_loss_scale.json │ │ ├── dist_8gpus_ddp_fp16.yaml │ │ ├── dist_8gpus_zero1.yaml │ │ ├── dist_8gpus_zero1_wo_loss_scale.yaml │ │ ├── dist_8gpus_zero3_offload.yaml │ │ └── dist_8gpus_zero3_offload_wo_loss_scale.yaml │ ├── inaturalist2018 │ │ └── internimage_h_22ktoinat18_384.yaml │ ├── internimage_b_1k_224.yaml │ ├── internimage_g_22kto1k_512.yaml │ ├── internimage_h_22kto1k_384.yaml │ ├── internimage_h_22kto1k_640.yaml │ ├── internimage_l_22kto1k_384.yaml │ ├── internimage_s_1k_224.yaml │ ├── internimage_t_1k_224.yaml │ ├── internimage_xl_22kto1k_384.yaml │ └── without_lr_decay │ │ ├── internimage_b_1k_224.yaml │ │ ├── internimage_g_22kto1k_512.yaml │ │ ├── internimage_h_22kto1k_640.yaml │ │ ├── internimage_l_22kto1k_384.yaml │ │ ├── internimage_s_1k_224.yaml │ │ ├── internimage_t_1k_224.yaml │ │ └── internimage_xl_22kto1k_384.yaml ├── dataset │ ├── __init__.py │ ├── build.py │ ├── cached_image_folder.py │ ├── samplers.py │ └── zipreader.py ├── ddp_hooks.py ├── ema_deepspeed.py ├── export.py ├── extract_feature.py ├── huggingface │ ├── 22k_model │ │ ├── internimage_g_jointto22k_384 │ │ │ ├── README.md │ │ │ ├── config.json │ │ │ ├── configuration_internimage.py │ │ │ ├── dcnv3.py │ │ │ ├── dcnv3_func.py │ │ │ ├── modeling_internimage.py │ │ │ └── preprocessor_config.json │ │ ├── internimage_h_jointto22k_384 │ │ │ ├── README.md │ │ │ ├── config.json │ │ │ ├── configuration_internimage.py │ │ │ ├── dcnv3.py │ │ │ ├── dcnv3_func.py │ │ │ ├── modeling_internimage.py │ │ │ └── preprocessor_config.json │ │ ├── internimage_l_22k_384 │ │ │ ├── README.md │ │ │ ├── config.json │ │ │ ├── configuration_internimage.py │ │ │ ├── dcnv3.py │ │ │ ├── dcnv3_func.py │ │ │ ├── modeling_internimage.py │ │ │ └── preprocessor_config.json │ │ └── internimage_xl_22k_384 │ │ │ ├── README.md │ │ │ ├── config.json │ │ │ ├── configuration_internimage.py │ │ │ ├── dcnv3.py │ │ │ ├── dcnv3_func.py │ │ │ ├── modeling_internimage.py │ │ │ └── preprocessor_config.json │ ├── README.md │ ├── convert.py │ ├── in1k_model │ │ ├── internimage_b_1k_224 │ │ │ ├── README.md │ │ │ ├── config.json │ │ │ ├── configuration_internimage.py │ │ │ ├── dcnv3.py │ │ │ ├── dcnv3_func.py │ │ │ ├── modeling_internimage.py │ │ │ └── preprocessor_config.json │ │ ├── internimage_g_22kto1k_512 │ │ │ ├── README.md │ │ │ ├── config.json │ │ │ ├── configuration_internimage.py │ │ │ ├── dcnv3.py │ │ │ ├── dcnv3_func.py │ │ │ ├── modeling_internimage.py │ │ │ └── preprocessor_config.json │ │ ├── internimage_h_22kto1k_640 │ │ │ ├── README.md │ │ │ ├── config.json │ │ │ ├── configuration_internimage.py │ │ │ ├── dcnv3.py │ │ │ ├── dcnv3_func.py │ │ │ ├── modeling_internimage.py │ │ │ └── preprocessor_config.json │ │ ├── internimage_l_22kto1k_384 │ │ │ ├── README.md │ │ │ ├── config.json │ │ │ ├── configuration_internimage.py │ │ │ ├── dcnv3.py │ │ │ ├── dcnv3_func.py │ │ │ ├── modeling_internimage.py │ │ │ └── preprocessor_config.json │ │ ├── internimage_s_1k_224 │ │ │ ├── README.md │ │ │ ├── config.json │ │ │ ├── configuration_internimage.py │ │ │ ├── dcnv3.py │ │ │ ├── dcnv3_func.py │ │ │ ├── modeling_internimage.py │ │ │ └── preprocessor_config.json │ │ ├── internimage_t_1k_224 │ │ │ ├── README.md │ │ │ ├── config.json │ │ │ ├── configuration_internimage.py │ │ │ ├── dcnv3.py │ │ │ ├── dcnv3_func.py │ │ │ ├── modeling_internimage.py │ │ │ └── preprocessor_config.json │ │ └── internimage_xl_22kto1k_384 │ │ │ ├── README.md │ │ │ ├── config.json │ │ │ ├── configuration_internimage.py │ │ │ ├── dcnv3.py │ │ │ ├── dcnv3_func.py │ │ │ ├── modeling_internimage.py │ │ │ └── preprocessor_config.json │ └── test.py ├── logger.py ├── lr_scheduler.py ├── main.py ├── main_accelerate.py ├── main_deepspeed.py ├── meta_data │ ├── 22k_class_to_idx.json │ ├── map22kto1k.txt │ ├── train.txt.zip │ └── val.txt.zip ├── models │ ├── __init__.py │ ├── build.py │ ├── intern_image.py │ └── intern_image_meta_former.py ├── ops_dcnv3 │ ├── functions │ │ ├── __init__.py │ │ └── dcnv3_func.py │ ├── make.sh │ ├── modules │ │ ├── __init__.py │ │ └── dcnv3.py │ ├── setup.py │ ├── src │ │ ├── cpu │ │ │ ├── dcnv3_cpu.cpp │ │ │ └── dcnv3_cpu.h │ │ ├── cuda │ │ │ ├── dcnv3_cuda.cu │ │ │ ├── dcnv3_cuda.h │ │ │ └── dcnv3_im2col_cuda.cuh │ │ ├── dcnv3.h │ │ └── vision.cpp │ └── test.py ├── optimizer.py ├── train_in1k.sh ├── train_in1k_deepspeed.sh ├── train_inat18.sh └── utils.py ├── detection ├── README.md ├── configs │ ├── _base_ │ │ ├── datasets │ │ │ ├── cityscapes_detection.py │ │ │ ├── cityscapes_instance.py │ │ │ ├── coco_detection.py │ │ │ ├── coco_instance.py │ │ │ ├── coco_panoptic.py │ │ │ ├── crowd_human.py │ │ │ ├── deepfashion.py │ │ │ ├── lvis_v0.5_instance.py │ │ │ ├── lvis_v1_instance.py │ │ │ ├── lvis_v1_instance_minival.py │ │ │ ├── obj365_detection.py │ │ │ ├── openimages_detection.py │ │ │ ├── voc0712.py │ │ │ └── wider_face.py │ │ ├── default_runtime.py │ │ ├── models │ │ │ ├── cascade_mask_rcnn_r50_fpn.py │ │ │ ├── cascade_mask_rcnn_r50_fpn_crowdhuman.py │ │ │ ├── cascade_rcnn_r50_fpn.py │ │ │ ├── fast_rcnn_r50_fpn.py │ │ │ ├── faster_rcnn_r50_caffe_c4.py │ │ │ ├── faster_rcnn_r50_caffe_dc5.py │ │ │ ├── faster_rcnn_r50_fpn.py │ │ │ ├── mask_rcnn_convnext_fpn.py │ │ │ ├── mask_rcnn_r50_caffe_c4.py │ │ │ ├── mask_rcnn_r50_fpn.py │ │ │ ├── retinanet_r50_fpn.py │ │ │ ├── rpn_r50_caffe_c4.py │ │ │ ├── rpn_r50_fpn.py │ │ │ └── ssd300.py │ │ └── schedules │ │ │ ├── schedule_1x.py │ │ │ ├── schedule_20e.py │ │ │ ├── schedule_2x.py │ │ │ ├── schedule_3x.py │ │ │ └── schedule_6x.py │ ├── coco │ │ ├── README.md │ │ ├── cascade_internimage_l_fpn_1x_coco.py │ │ ├── cascade_internimage_l_fpn_3x_coco.py │ │ ├── cascade_internimage_xl_fpn_1x_coco.py │ │ ├── cascade_internimage_xl_fpn_3x_coco.py │ │ ├── dino_4scale_cbinternimage_h_objects365_coco_ss.py │ │ ├── dino_4scale_internimage_g_objects365_coco_ss.py │ │ ├── dino_4scale_internimage_h_objects365_coco_ss.py │ │ ├── dino_4scale_internimage_l_1x_coco_0.1x_backbone_lr.py │ │ ├── dino_4scale_internimage_l_1x_coco_layer_wise_lr.py │ │ ├── dino_4scale_internimage_t_1x_coco_layer_wise_lr.py │ │ ├── mask_rcnn_internimage_b_fpn_1x_coco.py │ │ ├── mask_rcnn_internimage_b_fpn_3x_coco.py │ │ ├── mask_rcnn_internimage_s_fpn_1x_coco.py │ │ ├── mask_rcnn_internimage_s_fpn_3x_coco.py │ │ ├── mask_rcnn_internimage_t_fpn_1x_coco.py │ │ ├── mask_rcnn_internimage_t_fpn_1x_coco_with_dcnv4.py │ │ └── mask_rcnn_internimage_t_fpn_3x_coco.py │ ├── crowd_human │ │ ├── README.md │ │ └── cascade_internimage_xl_fpn_3x_crowd_human.py │ ├── lvis │ │ ├── README.md │ │ ├── dino_4scale_cbinternimage_h_objects365_lvis_minival_ss.py │ │ └── dino_4scale_cbinternimage_h_objects365_lvis_val_ss.py │ ├── openimages │ │ ├── README.md │ │ └── dino_4scale_cbinternimage_h_objects365_openimages_ss.py │ └── voc │ │ ├── README.md │ │ ├── dino_4scale_cbinternimage_h_objects365_voc07.py │ │ └── dino_4scale_cbinternimage_h_objects365_voc12.py ├── deploy.py ├── deploy │ ├── configs │ │ ├── _base_ │ │ │ ├── backends │ │ │ │ ├── tensorrt-fp16.py │ │ │ │ └── tensorrt.py │ │ │ └── onnx_config.py │ │ └── mmdet │ │ │ ├── _base_ │ │ │ ├── base_dynamic.py │ │ │ ├── base_instance-seg_dynamic.py │ │ │ ├── base_instance-seg_static.py │ │ │ └── base_static.py │ │ │ └── instance-seg │ │ │ └── instance-seg_tensorrt_dynamic-320x320-1344x1344.py │ └── demo.jpg ├── dist_test.sh ├── dist_train.sh ├── get_flops.py ├── image_demo.py ├── mmcv_custom │ ├── __init__.py │ ├── custom_layer_decay_optimizer_constructor.py │ └── efficient_ffn.py ├── mmdet_custom │ ├── __init__.py │ ├── datasets │ │ ├── __init__.py │ │ └── crowd_human.py │ └── models │ │ ├── __init__.py │ │ ├── backbones │ │ ├── __init__.py │ │ ├── cbnet.py │ │ └── intern_image.py │ │ ├── dense_heads │ │ ├── __init__.py │ │ ├── cbdino_head.py │ │ ├── deformable_detr_head.py │ │ ├── detr_head.py │ │ └── dino_head.py │ │ ├── detectors │ │ ├── __init__.py │ │ ├── cbnet_dino.py │ │ └── dino.py │ │ ├── necks │ │ ├── __init__.py │ │ └── cbnet_channel_mapper.py │ │ └── utils │ │ ├── __init__.py │ │ ├── query_denoising.py │ │ └── transformer.py ├── ops_dcnv3 │ ├── functions │ │ ├── __init__.py │ │ └── dcnv3_func.py │ ├── make.sh │ ├── modules │ │ ├── __init__.py │ │ └── dcnv3.py │ ├── setup.py │ ├── src │ │ ├── cpu │ │ │ ├── dcnv3_cpu.cpp │ │ │ └── dcnv3_cpu.h │ │ ├── cuda │ │ │ ├── dcnv3_cuda.cu │ │ │ ├── dcnv3_cuda.h │ │ │ └── dcnv3_im2col_cuda.cuh │ │ ├── dcnv3.h │ │ └── vision.cpp │ └── test.py ├── slurm_test.sh ├── slurm_train.sh ├── test.py ├── tools │ ├── create_crowd_anno.py │ ├── download_dataset.py │ └── evaluate │ │ └── __init__.py └── train.py ├── docs └── figs │ ├── arch.png │ ├── intern_pipeline.png │ ├── intern_pipeline_en.png │ ├── log.png │ └── network.png ├── sam ├── engine.py └── main_zero_shot_instance_seg.py ├── segmentation ├── README.md ├── configs │ ├── _base_ │ │ ├── datasets │ │ │ ├── ade20k.py │ │ │ ├── chase_db1.py │ │ │ ├── cityscapes.py │ │ │ ├── cityscapes_1024x1024.py │ │ │ ├── cityscapes_extra.py │ │ │ ├── cityscapes_extra_1024x1024.py │ │ │ ├── coco-stuff10k.py │ │ │ ├── coco-stuff164k.py │ │ │ ├── drive.py │ │ │ ├── hrf.py │ │ │ ├── loveda.py │ │ │ ├── mapillary.py │ │ │ ├── mapillary_1024x1024.py │ │ │ ├── mapillary_896x896.py │ │ │ ├── nyu_depth_v2.py │ │ │ ├── pascal_context.py │ │ │ ├── pascal_context_59.py │ │ │ ├── pascal_voc12.py │ │ │ ├── pascal_voc12_aug.py │ │ │ ├── potsdam.py │ │ │ └── stare.py │ │ ├── default_runtime.py │ │ ├── models │ │ │ ├── mask2former_beit.py │ │ │ ├── segformer_mit-b0.py │ │ │ └── upernet_r50.py │ │ └── schedules │ │ │ ├── schedule_160k.py │ │ │ ├── schedule_20k.py │ │ │ ├── schedule_320k.py │ │ │ ├── schedule_40k.py │ │ │ └── schedule_80k.py │ ├── ade20k │ │ ├── README.md │ │ ├── mask2former_internimage_h_896_80k_cocostuff2ade20k_ms.py │ │ ├── mask2former_internimage_h_896_80k_cocostuff2ade20k_ss.py │ │ ├── upernet_internimage_b_512_160k_ade20k.py │ │ ├── upernet_internimage_g_896_160k_ade20k.py │ │ ├── upernet_internimage_h_896_160k_ade20k.py │ │ ├── upernet_internimage_l_640_160k_ade20k.py │ │ ├── upernet_internimage_s_512_160k_ade20k.py │ │ ├── upernet_internimage_t_512_160k_ade20k.py │ │ └── upernet_internimage_xl_640_160k_ade20k.py │ ├── cityscapes │ │ ├── README.md │ │ ├── mask2former_internimage_h_1024x1024_80k_mapillary2cityscapes.py │ │ ├── segformer_internimage_l_512x1024_160k_mapillary2cityscapes.py │ │ ├── segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.py │ │ ├── upernet_internimage_b_512x1024_160k_cityscapes.py │ │ ├── upernet_internimage_l_512x1024_160k_cityscapes.py │ │ ├── upernet_internimage_l_512x1024_160k_mapillary2cityscapes.py │ │ ├── upernet_internimage_s_512x1024_160k_cityscapes.py │ │ ├── upernet_internimage_t_512x1024_160k_cityscapes.py │ │ ├── upernet_internimage_xl_512x1024_160k_cityscapes.py │ │ └── upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.py │ ├── coco_stuff10k │ │ ├── README.md │ │ └── mask2former_internimage_h_512_40k_cocostuff164k_to_10k.py │ ├── coco_stuff164k │ │ ├── README.md │ │ └── mask2former_internimage_h_896_80k_cocostuff164k.py │ ├── mapillary │ │ ├── README.md │ │ ├── mask2former_internimage_h_896x896_80k_mapillary.py │ │ ├── segformer_internimage_l_512x1024_80k_mapillary.py │ │ ├── segformer_internimage_xl_512x1024_80k_mapillary.py │ │ ├── upernet_internimage_l_512x1024_80k_mapillary.py │ │ └── upernet_internimage_xl_512x1024_80k_mapillary.py │ ├── nyu_depth_v2 │ │ ├── README.md │ │ └── mask2former_internimage_h_480_40k_nyu.py │ └── pascal_context │ │ ├── README.md │ │ └── mask2former_internimage_h_480_40k_pascal_context_59.py ├── deploy.py ├── deploy │ ├── configs │ │ ├── _base_ │ │ │ ├── backends │ │ │ │ └── tensorrt.py │ │ │ └── onnx_config.py │ │ └── mmseg │ │ │ ├── segmentation_static.py │ │ │ └── segmentation_tensorrt_static-512x512.py │ └── demo.png ├── dist_test.sh ├── dist_train.sh ├── get_flops.py ├── image_demo.py ├── mmcv_custom │ ├── __init__.py │ └── custom_layer_decay_optimizer_constructor.py ├── mmseg_custom │ ├── __init__.py │ ├── core │ │ ├── __init__.py │ │ ├── anchor │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── point_generator.py │ │ ├── box │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── samplers │ │ │ │ ├── __init__.py │ │ │ │ ├── base_sampler.py │ │ │ │ ├── mask_pseudo_sampler.py │ │ │ │ ├── mask_sampling_result.py │ │ │ │ └── sampling_result.py │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ └── panoptic_utils.py │ │ ├── mask │ │ │ ├── __init__.py │ │ │ └── utils.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── dist_utils.py │ │ │ └── misc.py │ ├── datasets │ │ ├── __init__.py │ │ ├── dataset_wrappers.py │ │ ├── mapillary.py │ │ ├── nyu_depth_v2.py │ │ └── pipelines │ │ │ ├── __init__.py │ │ │ ├── formatting.py │ │ │ └── transform.py │ └── models │ │ ├── __init__.py │ │ ├── backbones │ │ ├── __init__.py │ │ └── intern_image.py │ │ ├── builder.py │ │ ├── decode_heads │ │ ├── __init__.py │ │ ├── mask2former_head.py │ │ └── maskformer_head.py │ │ ├── losses │ │ ├── __init__.py │ │ ├── cross_entropy_loss.py │ │ ├── dice_loss.py │ │ ├── focal_loss.py │ │ ├── match_costs.py │ │ └── match_loss.py │ │ ├── plugins │ │ ├── __init__.py │ │ ├── msdeformattn_pixel_decoder.py │ │ └── pixel_decoder.py │ │ ├── segmentors │ │ ├── __init__.py │ │ ├── encoder_decoder_mask2former.py │ │ └── encoder_decoder_mask2former_aug.py │ │ └── utils │ │ ├── __init__.py │ │ ├── assigner.py │ │ ├── point_sample.py │ │ ├── positional_encoding.py │ │ └── transformer.py ├── ops_dcnv3 │ ├── functions │ │ ├── __init__.py │ │ └── dcnv3_func.py │ ├── make.sh │ ├── modules │ │ ├── __init__.py │ │ └── dcnv3.py │ ├── setup.py │ ├── src │ │ ├── cpu │ │ │ ├── dcnv3_cpu.cpp │ │ │ └── dcnv3_cpu.h │ │ ├── cuda │ │ │ ├── dcnv3_cuda.cu │ │ │ ├── dcnv3_cuda.h │ │ │ └── dcnv3_im2col_cuda.cuh │ │ ├── dcnv3.h │ │ └── vision.cpp │ └── test.py ├── slurm_test.sh ├── slurm_train.sh ├── test.py └── train.py └── tensorrt └── modulated_deform_conv_v3 ├── trt_deform_conv_v3.cpp ├── trt_deform_conv_v3.hpp ├── trt_deform_conv_v3_kernel.cu └── trt_deform_conv_v3_kernel.hpp /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E501, E502, F403, C901, W504, W605, E251, E122, E126, E127, E722, W503, E128, E741, E731, E701, E712 3 | select = E1, E3, E502, E7, E9, W1, W5, W6 4 | max-line-length = 180 5 | exclude=*.egg/*,build,dist,detection/configs/* 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .DS_Store 3 | __pycache__/ 4 | classification/convertor/ 5 | segmentation/convertor/ 6 | detection/convertor/ 7 | checkpoint_dir/ 8 | demo/ 9 | pretrained/ 10 | upload.py 11 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line-length = 180 3 | multi_line_output = 0 4 | extra_standard_library = setuptools 5 | known_third_party = PIL,asynctest,cityscapesscripts,cv2,gather_models,matplotlib,mmcv,numpy,onnx,onnxruntime,pycocotools,pytest,pytorch_sphinx_theme,requests,scipy,seaborn,six,terminaltables,torch,ts,yaml 6 | no_lines_before = STDLIB,LOCALFOLDER 7 | default_section = THIRDPARTY 8 | 9 | [yapf] 10 | BASED_ON_STYLE = pep8 11 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 12 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 13 | 14 | [codespell] 15 | skip = *.ipynb 16 | quiet-level = 3 17 | ignore-words-list = patten,nd,ty,mot,hist,formating,winn,gool,datas,wan,confids,TOOD,tood 18 | © 2022 GitHub, Inc. 19 | Terms 20 | Privacy 21 | Security 22 | Status 23 | Docs 24 | Contact GitHub 25 | Pricing 26 | API 27 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: ^internvl_chat_llava/ 2 | repos: 3 | - repo: https://github.com/PyCQA/flake8 4 | rev: 5.0.4 5 | hooks: 6 | - id: flake8 7 | - repo: https://github.com/PyCQA/isort 8 | rev: 5.11.5 9 | hooks: 10 | - id: isort 11 | - repo: https://github.com/pre-commit/pre-commit-hooks 12 | rev: v4.3.0 13 | hooks: 14 | - id: trailing-whitespace 15 | - id: check-yaml 16 | - id: end-of-file-fixer 17 | - id: requirements-txt-fixer 18 | - id: double-quote-string-fixer 19 | - id: check-merge-conflict 20 | - id: fix-encoding-pragma 21 | args: ["--remove"] 22 | - id: mixed-line-ending 23 | args: ["--fix=lf"] 24 | - repo: https://github.com/executablebooks/mdformat 25 | rev: 0.7.9 26 | hooks: 27 | - id: mdformat 28 | args: ["--number"] 29 | additional_dependencies: 30 | - mdformat-openmmlab 31 | - mdformat_frontmatter 32 | - linkify-it-py 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 OpenGVLab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable push 3 | # By default we use textlogger hook and tensorboard 4 | # For more loggers see 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook 6 | log_config = dict( 7 | interval=50, 8 | hooks=[ 9 | dict(type='TextLoggerHook'), 10 | dict(type='TensorboardLoggerHook') 11 | ]) 12 | # yapf:enable 13 | dist_params = dict(backend='nccl') 14 | log_level = 'INFO' 15 | work_dir = None 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/models/hv_pointpillars_fpn_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-80, -80, -5, 80, 80, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]), 15 | pts_middle_encoder=dict(output_shape=[640, 640]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-100, -100, -5, 100, 100, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]), 15 | pts_middle_encoder=dict(output_shape=[800, 800]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/models/paconv_cuda_ssg.py: -------------------------------------------------------------------------------- 1 | _base_ = './paconv_ssg.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | sa_cfg=dict( 6 | type='PAConvCUDASAModule', 7 | scorenet_cfg=dict(mlp_channels=[8, 16, 16])))) 8 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/models/pointnet2_msg.py: -------------------------------------------------------------------------------- 1 | _base_ = './pointnet2_ssg.py' 2 | 3 | # model settings 4 | model = dict( 5 | backbone=dict( 6 | _delete_=True, 7 | type='PointNet2SAMSG', 8 | in_channels=6, # [xyz, rgb], should be modified with dataset 9 | num_points=(1024, 256, 64, 16), 10 | radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)), 11 | num_samples=((16, 32), (16, 32), (16, 32), (16, 32)), 12 | sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96, 13 | 128)), 14 | ((128, 196, 256), (128, 196, 256)), ((256, 256, 512), 15 | (256, 384, 512))), 16 | aggregation_channels=(None, None, None, None), 17 | fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')), 18 | fps_sample_range_lists=((-1), (-1), (-1), (-1)), 19 | dilated_group=(False, False, False, False), 20 | out_indices=(0, 1, 2, 3), 21 | sa_cfg=dict( 22 | type='PointSAModuleMSG', 23 | pool_mod='max', 24 | use_xyz=True, 25 | normalize_xyz=False)), 26 | decode_head=dict( 27 | fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128), 28 | (128, 128, 128, 128)))) 29 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/models/pointnet2_ssg.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='PointNet2SASSG', 6 | in_channels=6, # [xyz, rgb], should be modified with dataset 7 | num_points=(1024, 256, 64, 16), 8 | radius=(0.1, 0.2, 0.4, 0.8), 9 | num_samples=(32, 32, 32, 32), 10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, 11 | 512)), 12 | fp_channels=(), 13 | norm_cfg=dict(type='BN2d'), 14 | sa_cfg=dict( 15 | type='PointSAModule', 16 | pool_mod='max', 17 | use_xyz=True, 18 | normalize_xyz=False)), 19 | decode_head=dict( 20 | type='PointNet2Head', 21 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), 22 | (128, 128, 128, 128)), 23 | channels=128, 24 | dropout_ratio=0.5, 25 | conv_cfg=dict(type='Conv1d'), 26 | norm_cfg=dict(type='BN1d'), 27 | act_cfg=dict(type='ReLU'), 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', 30 | use_sigmoid=False, 31 | class_weight=None, # should be modified with dataset 32 | loss_weight=1.0)), 33 | # model training and testing settings 34 | train_cfg=dict(), 35 | test_cfg=dict(mode='slide')) 36 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/cosine.py: -------------------------------------------------------------------------------- 1 | # This schedule is mainly used by models with dynamic voxelization 2 | # optimizer 3 | lr = 0.003 # max learning rate 4 | optimizer = dict( 5 | type='AdamW', 6 | lr=lr, 7 | betas=(0.95, 0.99), # the momentum is change during training 8 | weight_decay=0.001) 9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 10 | 11 | lr_config = dict( 12 | policy='CosineAnnealing', 13 | warmup='linear', 14 | warmup_iters=1000, 15 | warmup_ratio=1.0 / 10, 16 | min_lr_ratio=1e-5) 17 | 18 | momentum_config = None 19 | 20 | runner = dict(type='EpochBasedRunner', max_epochs=40) 21 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/cyclic_20e.py: -------------------------------------------------------------------------------- 1 | # For nuScenes dataset, we usually evaluate the model at the end of training. 2 | # Since the models are trained by 24 epochs by default, we set evaluation 3 | # interval to be 20. Please change the interval accordingly if you do not 4 | # use a default schedule. 5 | # optimizer 6 | # This schedule is mainly used by models on nuScenes dataset 7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01) 8 | # max_norm=10 is better for SECOND 9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 10 | lr_config = dict( 11 | policy='cyclic', 12 | target_ratio=(10, 1e-4), 13 | cyclic_times=1, 14 | step_ratio_up=0.4, 15 | ) 16 | momentum_config = dict( 17 | policy='cyclic', 18 | target_ratio=(0.85 / 0.95, 1), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | 23 | # runtime settings 24 | runner = dict(type='EpochBasedRunner', max_epochs=20) 25 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/cyclic_40e.py: -------------------------------------------------------------------------------- 1 | # The schedule is usually used by models trained on KITTI dataset 2 | 3 | # The learning rate set in the cyclic schedule is the initial learning rate 4 | # rather than the max learning rate. Since the target_ratio is (10, 1e-4), 5 | # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4 6 | lr = 0.0018 7 | # The optimizer follows the setting in SECOND.Pytorch, but here we use 8 | # the offcial AdamW optimizer implemented by PyTorch. 9 | optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) 10 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 11 | # We use cyclic learning rate and momentum schedule following SECOND.Pytorch 12 | # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa 13 | # We implement them in mmcv, for more details, please refer to 14 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa 15 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa 16 | lr_config = dict( 17 | policy='cyclic', 18 | target_ratio=(10, 1e-4), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | momentum_config = dict( 23 | policy='cyclic', 24 | target_ratio=(0.85 / 0.95, 1), 25 | cyclic_times=1, 26 | step_ratio_up=0.4, 27 | ) 28 | # Although the max_epochs is 40, this schedule is usually used we 29 | # RepeatDataset with repeat ratio N, thus the actual max epoch 30 | # number could be Nx40 31 | runner = dict(type='EpochBasedRunner', max_epochs=40) 32 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/mmdet_schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on nuScenes dataset 3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01) 4 | # max_norm=10 is better for SECOND 5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 6 | lr_config = dict( 7 | policy='step', 8 | warmup='linear', 9 | warmup_iters=1000, 10 | warmup_ratio=1.0 / 1000, 11 | step=[20, 23]) 12 | momentum_config = None 13 | # runtime settings 14 | runner = dict(type='EpochBasedRunner', max_epochs=24) 15 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/schedule_3x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on indoor dataset, 3 | # e.g., VoteNet on SUNRGBD and ScanNet 4 | lr = 0.008 # max learning rate 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01) 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32]) 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=36) 10 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/seg_cosine_150e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=150) 10 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/seg_cosine_200e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on ScanNet dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=200) 10 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/configs/_base_/schedules/seg_cosine_50e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=50) 10 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/datasets/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/datasets/evaluation/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/datasets/evaluation/distance.py: -------------------------------------------------------------------------------- 1 | from numpy.typing import NDArray 2 | from scipy.spatial import distance 3 | 4 | 5 | def chamfer_distance(line1: NDArray, line2: NDArray) -> float: 6 | ''' Calculate chamfer distance between two lines. Make sure the 7 | lines are interpolated. 8 | 9 | Args: 10 | line1 (array): coordinates of line1 11 | line2 (array): coordinates of line2 12 | 13 | Returns: 14 | distance (float): chamfer distance 15 | ''' 16 | 17 | dist_matrix = distance.cdist(line1, line2, 'euclidean') 18 | dist12 = dist_matrix.min(-1).sum() / len(line1) 19 | dist21 = dist_matrix.min(-2).sum() / len(line2) 20 | 21 | return (dist12 + dist21) / 2 22 | 23 | 24 | def frechet_distance(line1: NDArray, line2: NDArray) -> float: 25 | ''' Calculate frechet distance between two lines. Make sure the 26 | lines are interpolated. 27 | 28 | Args: 29 | line1 (array): coordinates of line1 30 | line2 (array): coordinates of line2 31 | 32 | Returns: 33 | distance (float): frechet distance 34 | ''' 35 | 36 | raise NotImplementedError 37 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .formating import FormatBundleMap 2 | from .loading import LoadMultiViewImagesFromFiles 3 | from .poly_bbox import PolygonizeLocalMapBbox 4 | from .transform import Normalize3D, PadMultiViewImages, ResizeMultiViewImages 5 | from .vectorize import VectorizeMap 6 | 7 | # for argoverse 8 | 9 | __all__ = [ 10 | 'LoadMultiViewImagesFromFiles', 11 | 'FormatBundleMap', 'Normalize3D', 'ResizeMultiViewImages', 'PadMultiViewImages', 12 | 'VectorizeMap', 'PolygonizeLocalMapBbox' 13 | ] 14 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/models/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/models/assigner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/models/assigner/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/models/augmentation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/models/augmentation/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .ipm_backbone import IPMEncoder 2 | 3 | __all__ = [ 4 | 'IPMEncoder' 5 | ] 6 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/models/heads/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/models/heads/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/models/heads/base_map_head.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch.nn as nn 4 | from mmcv.runner import auto_fp16 5 | from mmcv.utils import print_log 6 | from mmdet.utils import get_root_logger 7 | 8 | 9 | class BaseMapHead(nn.Module, metaclass=ABCMeta): 10 | """Base class for mappers.""" 11 | 12 | def __init__(self): 13 | super(BaseMapHead, self).__init__() 14 | self.fp16_enabled = False 15 | 16 | def init_weights(self, pretrained=None): 17 | """Initialize the weights in detector. 18 | Args: 19 | pretrained (str, optional): Path to pre-trained weights. 20 | Defaults to None. 21 | """ 22 | if pretrained is not None: 23 | logger = get_root_logger() 24 | print_log(f'load model from: {pretrained}', logger=logger) 25 | 26 | @auto_fp16(apply_to=('img',)) 27 | def forward(self, *args, **kwargs): 28 | pass 29 | 30 | @abstractmethod 31 | def loss(self, pred, gt): 32 | ''' 33 | Compute loss 34 | Output: 35 | dict( 36 | loss: torch.Tensor 37 | log_vars: dict( 38 | str: float, 39 | ) 40 | num_samples: int 41 | ) 42 | ''' 43 | return 44 | 45 | @abstractmethod 46 | def post_process(self, pred): 47 | ''' 48 | convert model predictions to vectorized outputs 49 | the output format should be consistent with the evaluation function 50 | ''' 51 | return 52 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/models/heads/detgen_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/models/heads/detgen_utils/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/models/losses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/models/losses/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/models/mapers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/models/mapers/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/models/transformer_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/Online-HD-Map-Construction/src/models/transformer_utils/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/src/models/transformer_utils/base_transformer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmcv.cnn.bricks.registry import TRANSFORMER_LAYER_SEQUENCE 3 | 4 | 5 | @TRANSFORMER_LAYER_SEQUENCE.register_module() 6 | class PlaceHolderEncoder(nn.Module): 7 | 8 | def __init__(self, *args, embed_dims=None, **kwargs): 9 | super(PlaceHolderEncoder, self).__init__() 10 | self.embed_dims = embed_dims 11 | 12 | def forward(self, *args, query=None, **kwargs): 13 | return query 14 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 11 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-29500} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /autonomous_driving/Online-HD-Map-Construction/tools/evaluate_submission.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | sys.path.append(os.path.abspath('.')) 5 | import argparse 6 | 7 | from src.datasets.evaluation.vector_eval import VectorEvaluate 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser( 12 | description='Evaluate a submission file') 13 | 14 | parser.add_argument('submission', 15 | help='submission file in pickle or json format to be evaluated') 16 | 17 | parser.add_argument('gt', 18 | help='gt annotation file') 19 | 20 | args = parser.parse_args() 21 | return args 22 | 23 | 24 | def main(args): 25 | evaluator = VectorEvaluate(args.gt, n_workers=0) 26 | results = evaluator.evaluate(args.submission) 27 | 28 | print(results) 29 | 30 | 31 | if __name__ == '__main__': 32 | args = parse_args() 33 | main(args) 34 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - name: "OpenOccupancy Benchmark Contributors" 5 | title: "OpenOccupancy: 3D Occupancy Benchmark for Scene Perception in Autonomous Driving" 6 | date-released: 2023-02-10 7 | url: "https://github.com/CVPR2023-Occupancy-Prediction-Challenge/CVPR2023-Occupancy-Prediction-Challenge" 8 | license: Apache-2.0 9 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/occupancy_prediction/projects/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable push 3 | # By default we use textlogger hook and tensorboard 4 | # For more loggers see 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook 6 | log_config = dict( 7 | interval=50, 8 | hooks=[ 9 | dict(type='TextLoggerHook'), 10 | dict(type='TensorboardLoggerHook') 11 | ]) 12 | # yapf:enable 13 | dist_params = dict(backend='nccl') 14 | log_level = 'INFO' 15 | work_dir = None 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-80, -80, -5, 80, 80, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]), 15 | pts_middle_encoder=dict(output_shape=[640, 640]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-100, -100, -5, 100, 100, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]), 15 | pts_middle_encoder=dict(output_shape=[800, 800]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/configs/_base_/models/paconv_cuda_ssg.py: -------------------------------------------------------------------------------- 1 | _base_ = './paconv_ssg.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | sa_cfg=dict( 6 | type='PAConvCUDASAModule', 7 | scorenet_cfg=dict(mlp_channels=[8, 16, 16])))) 8 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/configs/_base_/models/pointnet2_msg.py: -------------------------------------------------------------------------------- 1 | _base_ = './pointnet2_ssg.py' 2 | 3 | # model settings 4 | model = dict( 5 | backbone=dict( 6 | _delete_=True, 7 | type='PointNet2SAMSG', 8 | in_channels=6, # [xyz, rgb], should be modified with dataset 9 | num_points=(1024, 256, 64, 16), 10 | radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)), 11 | num_samples=((16, 32), (16, 32), (16, 32), (16, 32)), 12 | sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96, 13 | 128)), 14 | ((128, 196, 256), (128, 196, 256)), ((256, 256, 512), 15 | (256, 384, 512))), 16 | aggregation_channels=(None, None, None, None), 17 | fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')), 18 | fps_sample_range_lists=((-1), (-1), (-1), (-1)), 19 | dilated_group=(False, False, False, False), 20 | out_indices=(0, 1, 2, 3), 21 | sa_cfg=dict( 22 | type='PointSAModuleMSG', 23 | pool_mod='max', 24 | use_xyz=True, 25 | normalize_xyz=False)), 26 | decode_head=dict( 27 | fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128), 28 | (128, 128, 128, 128)))) 29 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/configs/_base_/models/pointnet2_ssg.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='PointNet2SASSG', 6 | in_channels=6, # [xyz, rgb], should be modified with dataset 7 | num_points=(1024, 256, 64, 16), 8 | radius=(0.1, 0.2, 0.4, 0.8), 9 | num_samples=(32, 32, 32, 32), 10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, 11 | 512)), 12 | fp_channels=(), 13 | norm_cfg=dict(type='BN2d'), 14 | sa_cfg=dict( 15 | type='PointSAModule', 16 | pool_mod='max', 17 | use_xyz=True, 18 | normalize_xyz=False)), 19 | decode_head=dict( 20 | type='PointNet2Head', 21 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), 22 | (128, 128, 128, 128)), 23 | channels=128, 24 | dropout_ratio=0.5, 25 | conv_cfg=dict(type='Conv1d'), 26 | norm_cfg=dict(type='BN1d'), 27 | act_cfg=dict(type='ReLU'), 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', 30 | use_sigmoid=False, 31 | class_weight=None, # should be modified with dataset 32 | loss_weight=1.0)), 33 | # model training and testing settings 34 | train_cfg=dict(), 35 | test_cfg=dict(mode='slide')) 36 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/cosine.py: -------------------------------------------------------------------------------- 1 | # This schedule is mainly used by models with dynamic voxelization 2 | # optimizer 3 | lr = 0.003 # max learning rate 4 | optimizer = dict( 5 | type='AdamW', 6 | lr=lr, 7 | betas=(0.95, 0.99), # the momentum is change during training 8 | weight_decay=0.001) 9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 10 | 11 | lr_config = dict( 12 | policy='CosineAnnealing', 13 | warmup='linear', 14 | warmup_iters=1000, 15 | warmup_ratio=1.0 / 10, 16 | min_lr_ratio=1e-5) 17 | 18 | momentum_config = None 19 | 20 | runner = dict(type='EpochBasedRunner', max_epochs=40) 21 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/cyclic_20e.py: -------------------------------------------------------------------------------- 1 | # For nuScenes dataset, we usually evaluate the model at the end of training. 2 | # Since the models are trained by 24 epochs by default, we set evaluation 3 | # interval to be 20. Please change the interval accordingly if you do not 4 | # use a default schedule. 5 | # optimizer 6 | # This schedule is mainly used by models on nuScenes dataset 7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01) 8 | # max_norm=10 is better for SECOND 9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 10 | lr_config = dict( 11 | policy='cyclic', 12 | target_ratio=(10, 1e-4), 13 | cyclic_times=1, 14 | step_ratio_up=0.4, 15 | ) 16 | momentum_config = dict( 17 | policy='cyclic', 18 | target_ratio=(0.85 / 0.95, 1), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | 23 | # runtime settings 24 | runner = dict(type='EpochBasedRunner', max_epochs=20) 25 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/cyclic_40e.py: -------------------------------------------------------------------------------- 1 | # The schedule is usually used by models trained on KITTI dataset 2 | 3 | # The learning rate set in the cyclic schedule is the initial learning rate 4 | # rather than the max learning rate. Since the target_ratio is (10, 1e-4), 5 | # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4 6 | lr = 0.0018 7 | # The optimizer follows the setting in SECOND.Pytorch, but here we use 8 | # the offcial AdamW optimizer implemented by PyTorch. 9 | optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) 10 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 11 | # We use cyclic learning rate and momentum schedule following SECOND.Pytorch 12 | # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa 13 | # We implement them in mmcv, for more details, please refer to 14 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa 15 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa 16 | lr_config = dict( 17 | policy='cyclic', 18 | target_ratio=(10, 1e-4), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | momentum_config = dict( 23 | policy='cyclic', 24 | target_ratio=(0.85 / 0.95, 1), 25 | cyclic_times=1, 26 | step_ratio_up=0.4, 27 | ) 28 | # Although the max_epochs is 40, this schedule is usually used we 29 | # RepeatDataset with repeat ratio N, thus the actual max epoch 30 | # number could be Nx40 31 | runner = dict(type='EpochBasedRunner', max_epochs=40) 32 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/mmdet_schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on nuScenes dataset 3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01) 4 | # max_norm=10 is better for SECOND 5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 6 | lr_config = dict( 7 | policy='step', 8 | warmup='linear', 9 | warmup_iters=1000, 10 | warmup_ratio=1.0 / 1000, 11 | step=[20, 23]) 12 | momentum_config = None 13 | # runtime settings 14 | runner = dict(type='EpochBasedRunner', max_epochs=24) 15 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/schedule_3x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on indoor dataset, 3 | # e.g., VoteNet on SUNRGBD and ScanNet 4 | lr = 0.008 # max learning rate 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01) 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32]) 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=36) 10 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/seg_cosine_150e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=150) 10 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/seg_cosine_200e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on ScanNet dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=200) 10 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/seg_cosine_50e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=50) 10 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/__init__.py: -------------------------------------------------------------------------------- 1 | from .bevformer import * 2 | from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D 3 | from .core.bbox.coders.nms_free_coder import NMSFreeCoder 4 | from .core.bbox.match_costs import BBox3DL1Cost 5 | from .core.evaluation.eval_hooks import CustomDistEvalHook 6 | from .datasets.pipelines import (CustomCollect3D, NormalizeMultiviewImage, 7 | PadMultiViewImage, 8 | PhotoMetricDistortionMultiViewImage) 9 | from .models.backbones.vovnet import VoVNet 10 | from .models.opt.adamw import AdamW2 11 | from .models.utils import * 12 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * 2 | from .dense_heads import * 3 | from .detectors import * 4 | from .hooks import * 5 | from .modules import * 6 | from .runner import * 7 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .mmdet_train import custom_train_detector 2 | from .train import custom_train_model 3 | 4 | # from .test import custom_multi_gpu_test 5 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom_layer_decay_optimizer_constructor import \ 2 | CustomLayerDecayOptimizerConstructor 3 | from .internimage import InternImage 4 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/backbones/ops_dcnv3/functions/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .dcnv3_func import DCNv3Function, dcnv3_core_pytorch 8 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/backbones/ops_dcnv3/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -------------------------------------------------------- 3 | # InternImage 4 | # Copyright (c) 2022 OpenGVLab 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | python setup.py build install 9 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/backbones/ops_dcnv3/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .dcnv3 import DCNv3, DCNv3_pytorch 8 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/backbones/ops_dcnv3/src/vision.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * InternImage 4 | * Copyright (c) 2022 OpenGVLab 5 | * Licensed under The MIT License [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 9 | ************************************************************************************************** 10 | */ 11 | 12 | #include "dcnv3.h" 13 | 14 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 15 | m.def("dcnv3_forward", &dcnv3_forward, "dcnv3_forward"); 16 | m.def("dcnv3_backward", &dcnv3_backward, "dcnv3_backward"); 17 | } 18 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bevformer_occ_head import BEVFormerOccHead 2 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .bevformer_occ import BEVFormerOcc 2 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom_hooks import TransferWeight 2 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/hooks/custom_hooks.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner.hooks.hook import HOOKS, Hook 2 | 3 | 4 | @HOOKS.register_module() 5 | class TransferWeight(Hook): 6 | 7 | def __init__(self, every_n_inters=1): 8 | self.every_n_inters = every_n_inters 9 | 10 | def after_train_iter(self, runner): 11 | if self.every_n_inner_iters(runner, self.every_n_inters): 12 | runner.eval_model.load_state_dict(runner.model.state_dict()) 13 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .decoder import DetectionTransformerDecoder 2 | from .encoder import BEVFormerEncoder, BEVFormerLayer 3 | from .spatial_cross_attention import (MSDeformableAttention3D, 4 | SpatialCrossAttention) 5 | from .temporal_self_attention import TemporalSelfAttention 6 | from .transformer import PerceptionTransformer 7 | from .transformer_occ import TransformerOcc 8 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/bevformer/runner/__init__.py: -------------------------------------------------------------------------------- 1 | from .epoch_based_runner import EpochBasedRunner_video 2 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .hungarian_assigner_3d import HungarianAssigner3D 2 | 3 | __all__ = ['HungarianAssigner3D'] 4 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/core/bbox/coders/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_free_coder import NMSFreeCoder 2 | 3 | __all__ = ['NMSFreeCoder'] 4 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py: -------------------------------------------------------------------------------- 1 | from .match_cost import BBox3DL1Cost 2 | 3 | __all__ = ['build_match_cost', 'BBox3DL1Cost'] 4 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from mmdet.core.bbox.match_costs.builder import MATCH_COST 3 | 4 | 5 | @MATCH_COST.register_module() 6 | class BBox3DL1Cost(object): 7 | """BBox3DL1Cost. 8 | Args: 9 | weight (int | float, optional): loss_weight 10 | """ 11 | 12 | def __init__(self, weight=1.): 13 | self.weight = weight 14 | 15 | def __call__(self, bbox_pred, gt_bboxes): 16 | """ 17 | Args: 18 | bbox_pred (Tensor): Predicted boxes with normalized coordinates 19 | (cx, cy, w, h), which are all in range [0, 1]. Shape 20 | [num_query, 4]. 21 | gt_bboxes (Tensor): Ground truth boxes with normalized 22 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4]. 23 | Returns: 24 | torch.Tensor: bbox_cost value with weight 25 | """ 26 | bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1) 27 | return bbox_cost * self.weight 28 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/core/bbox/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def normalize_bbox(bboxes, pc_range): 5 | cx = bboxes[..., 0:1] 6 | cy = bboxes[..., 1:2] 7 | cz = bboxes[..., 2:3] 8 | w = bboxes[..., 3:4].log() 9 | l = bboxes[..., 4:5].log() 10 | h = bboxes[..., 5:6].log() 11 | 12 | rot = bboxes[..., 6:7] 13 | if bboxes.size(-1) > 7: 14 | vx = bboxes[..., 7:8] 15 | vy = bboxes[..., 8:9] 16 | normalized_bboxes = torch.cat( 17 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1 18 | ) 19 | else: 20 | normalized_bboxes = torch.cat( 21 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1 22 | ) 23 | return normalized_bboxes 24 | 25 | 26 | def denormalize_bbox(normalized_bboxes, pc_range): 27 | # rotation 28 | rot_sine = normalized_bboxes[..., 6:7] 29 | 30 | rot_cosine = normalized_bboxes[..., 7:8] 31 | rot = torch.atan2(rot_sine, rot_cosine) 32 | 33 | # center in the bev 34 | cx = normalized_bboxes[..., 0:1] 35 | cy = normalized_bboxes[..., 1:2] 36 | cz = normalized_bboxes[..., 4:5] 37 | 38 | # size 39 | w = normalized_bboxes[..., 2:3] 40 | l = normalized_bboxes[..., 3:4] 41 | h = normalized_bboxes[..., 5:6] 42 | 43 | w = w.exp() 44 | l = l.exp() 45 | h = h.exp() 46 | if normalized_bboxes.size(-1) > 8: 47 | # velocity 48 | vx = normalized_bboxes[:, 8:9] 49 | vy = normalized_bboxes[:, 9:10] 50 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1) 51 | else: 52 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1) 53 | return denormalized_bboxes 54 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .eval_hooks import CustomDistEvalHook 2 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import custom_build_dataset 2 | from .nuscenes_dataset import CustomNuScenesDataset 3 | from .nuscenes_occ import NuSceneOcc 4 | 5 | __all__ = [ 6 | 'CustomNuScenesDataset' 7 | ] 8 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .formating import CustomDefaultFormatBundle3D 2 | from .loading import LoadOccGTFromFile 3 | from .transform_3d import (CustomCollect3D, NormalizeMultiviewImage, 4 | PadMultiViewImage, 5 | PhotoMetricDistortionMultiViewImage, 6 | RandomScaleImageMultiViewImage) 7 | 8 | __all__ = [ 9 | 'PadMultiViewImage', 'NormalizeMultiviewImage', 10 | 'PhotoMetricDistortionMultiViewImage', 'CustomDefaultFormatBundle3D', 'CustomCollect3D', 11 | 'RandomScaleImageMultiViewImage' 12 | ] 13 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/pipelines/formating.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.parallel import DataContainer as DC 3 | from mmdet3d.datasets.pipelines import DefaultFormatBundle3D 4 | from mmdet.datasets.builder import PIPELINES 5 | from mmdet.datasets.pipelines import to_tensor 6 | 7 | 8 | @PIPELINES.register_module() 9 | class CustomDefaultFormatBundle3D(DefaultFormatBundle3D): 10 | """Default formatting bundle. 11 | It simplifies the pipeline of formatting common fields for voxels, 12 | including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and 13 | "gt_semantic_seg". 14 | These fields are formatted as follows. 15 | - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) 16 | - proposals: (1)to tensor, (2)to DataContainer 17 | - gt_bboxes: (1)to tensor, (2)to DataContainer 18 | - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer 19 | - gt_labels: (1)to tensor, (2)to DataContainer 20 | """ 21 | 22 | def __call__(self, results): 23 | """Call function to transform and format common fields in results. 24 | Args: 25 | results (dict): Result dict contains the data to convert. 26 | Returns: 27 | dict: The result dict contains the data that is formatted with 28 | default bundle. 29 | """ 30 | # Format 3D data 31 | results = super(CustomDefaultFormatBundle3D, self).__call__(results) 32 | results['gt_map_masks'] = DC( 33 | to_tensor(results['gt_map_masks']), stack=True) 34 | 35 | return results 36 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/pipelines/loading.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | from mmdet.datasets.builder import PIPELINES 5 | 6 | 7 | @PIPELINES.register_module() 8 | class LoadOccGTFromFile(object): 9 | """Load multi channel images from a list of separate channel files. 10 | 11 | Expects results['img_filename'] to be a list of filenames. 12 | note that we read image in BGR style to align with opencv.imread 13 | Args: 14 | to_float32 (bool): Whether to convert the img to float32. 15 | Defaults to False. 16 | color_type (str): Color type of the file. Defaults to 'unchanged'. 17 | """ 18 | 19 | def __init__( 20 | self, 21 | data_root, 22 | ): 23 | self.data_root = data_root 24 | 25 | def __call__(self, results): 26 | # print(results.keys()) 27 | occ_gt_path = results['occ_gt_path'] 28 | occ_gt_path = os.path.join(self.data_root, occ_gt_path) 29 | 30 | occ_labels = np.load(occ_gt_path) 31 | semantics = occ_labels['semantics'] 32 | mask_lidar = occ_labels['mask_lidar'] 33 | mask_camera = occ_labels['mask_camera'] 34 | 35 | results['voxel_semantics'] = semantics 36 | results['mask_lidar'] = mask_lidar 37 | results['mask_camera'] = mask_camera 38 | 39 | return results 40 | 41 | def __repr__(self): 42 | """str: Return a string that describes the module.""" 43 | return "{} (data_root={}')".format( 44 | self.__class__.__name__, self.data_root) 45 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .distributed_sampler import DistributedSampler 2 | from .group_sampler import DistributedGroupSampler 3 | from .sampler import SAMPLER, build_sampler 4 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.utils.data import DistributedSampler as _DistributedSampler 5 | 6 | from .sampler import SAMPLER 7 | 8 | 9 | @SAMPLER.register_module() 10 | class DistributedSampler(_DistributedSampler): 11 | 12 | def __init__(self, 13 | dataset=None, 14 | num_replicas=None, 15 | rank=None, 16 | shuffle=True, 17 | seed=0): 18 | super().__init__( 19 | dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle) 20 | # for the compatibility from PyTorch 1.3+ 21 | self.seed = seed if seed is not None else 0 22 | 23 | def __iter__(self): 24 | # deterministically shuffle based on epoch 25 | if self.shuffle: 26 | assert False 27 | else: 28 | indices = torch.arange(len(self.dataset)).tolist() 29 | 30 | # add extra samples to make it evenly divisible 31 | # in case that indices is shorter than half of total_size 32 | indices = (indices * 33 | math.ceil(self.total_size / len(indices)))[:self.total_size] 34 | assert len(indices) == self.total_size 35 | 36 | # subsample 37 | per_replicas = self.total_size // self.num_replicas 38 | # indices = indices[self.rank:self.total_size:self.num_replicas] 39 | indices = indices[self.rank * per_replicas:(self.rank + 1) * per_replicas] 40 | assert len(indices) == self.num_samples 41 | 42 | return iter(indices) 43 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/datasets/samplers/sampler.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils.registry import Registry, build_from_cfg 2 | 3 | SAMPLER = Registry('sampler') 4 | 5 | 6 | def build_sampler(cfg, default_args): 7 | return build_from_cfg(cfg, SAMPLER, default_args) 8 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .vovnet import VoVNet 2 | 3 | __all__ = ['VoVNet'] 4 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .hooks import GradChecker 2 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/hooks/hooks.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner.hooks.hook import HOOKS, Hook 2 | 3 | 4 | @HOOKS.register_module() 5 | class GradChecker(Hook): 6 | 7 | def after_train_iter(self, runner): 8 | for key, val in runner.model.named_parameters(): 9 | if val.grad is None and val.requires_grad: 10 | print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key)) 11 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/opt/__init__.py: -------------------------------------------------------------------------------- 1 | from .adamw import AdamW2 2 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .bricks import run_time 2 | from .grid_mask import GridMask 3 | from .position_embedding import RelPositionEmbedding 4 | from .positional_encoding import LearnedPositionalEncoding3D 5 | from .visual import save_tensor 6 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/bricks.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import defaultdict 3 | 4 | import torch 5 | 6 | time_maps = defaultdict(lambda: 0.) 7 | count_maps = defaultdict(lambda: 0.) 8 | 9 | 10 | def run_time(name): 11 | def middle(fn): 12 | def wrapper(*args, **kwargs): 13 | torch.cuda.synchronize() 14 | start = time.time() 15 | res = fn(*args, **kwargs) 16 | torch.cuda.synchronize() 17 | time_maps['%s : %s' % (name, fn.__name__)] += time.time() - start 18 | count_maps['%s : %s' % (name, fn.__name__)] += 1 19 | print('%s : %s takes up %f ' % (name, fn.__name__, time_maps['%s : %s' % (name, fn.__name__)] / count_maps[ 20 | '%s : %s' % (name, fn.__name__)])) 21 | return res 22 | 23 | return wrapper 24 | 25 | return middle 26 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/position_embedding.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class RelPositionEmbedding(nn.Module): 8 | def __init__(self, num_pos_feats=64, pos_norm=True): 9 | super().__init__() 10 | self.num_pos_feats = num_pos_feats 11 | self.fc = nn.Linear(4, self.num_pos_feats, bias=False) 12 | # nn.init.orthogonal_(self.fc.weight) 13 | # self.fc.weight.requires_grad = False 14 | self.pos_norm = pos_norm 15 | if self.pos_norm: 16 | self.norm = nn.LayerNorm(self.num_pos_feats) 17 | 18 | def forward(self, tensor): 19 | # mask = nesttensor.mask 20 | B, C, H, W = tensor.shape 21 | # print('tensor.shape', tensor.shape) 22 | y_range = (torch.arange(H) / float(H - 1)).to(tensor.device) 23 | # y_axis = torch.stack((y_range, 1-y_range),dim=1) 24 | y_axis = torch.stack((torch.cos(y_range * math.pi), torch.sin(y_range * math.pi)), dim=1) 25 | y_axis = y_axis.reshape(H, 1, 2).repeat(1, W, 1).reshape(H * W, 2) 26 | 27 | x_range = (torch.arange(W) / float(W - 1)).to(tensor.device) 28 | # x_axis =torch.stack((x_range,1-x_range),dim=1) 29 | x_axis = torch.stack((torch.cos(x_range * math.pi), torch.sin(x_range * math.pi)), dim=1) 30 | x_axis = x_axis.reshape(1, W, 2).repeat(H, 1, 1).reshape(H * W, 2) 31 | x_pos = torch.cat((y_axis, x_axis), dim=1) 32 | x_pos = self.fc(x_pos) 33 | 34 | if self.pos_norm: 35 | x_pos = self.norm(x_pos) 36 | # print('xpos,', x_pos.max(),x_pos.min()) 37 | return x_pos 38 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/models/utils/visual.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import matplotlib.pyplot as plt 3 | import torch 4 | import torchvision 5 | from torchvision.utils import make_grid 6 | 7 | 8 | def convert_color(img_path): 9 | plt.figure() 10 | img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 11 | plt.imsave(img_path, img, cmap=plt.get_cmap('viridis')) 12 | plt.close() 13 | 14 | 15 | def save_tensor(tensor, path, pad_value=254.0, ): 16 | print('save_tensor', path) 17 | tensor = tensor.to(torch.float).detach().cpu() 18 | if tensor.type() == 'torch.BoolTensor': 19 | tensor = tensor * 255 20 | if len(tensor.shape) == 3: 21 | tensor = tensor.unsqueeze(1) 22 | tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy() 23 | torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path) 24 | convert_color(path) 25 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/tools/analysis_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/occupancy_prediction/tools/analysis_tools/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/tools/analysis_tools/get_params.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | file_path = './ckpts/bevformer_v4.pth' 4 | model = torch.load(file_path, map_location='cpu') 5 | all = 0 6 | for key in list(model['state_dict'].keys()): 7 | all += model['state_dict'][key].nelement() 8 | print(all) 9 | 10 | # smaller 63374123 11 | # v4 69140395 12 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/tools/data_converter/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29503} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} --eval bbox 11 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | NNODES=${NNODES:-1} 6 | NODE_RANK=${NODE_RANK:-0} 7 | PORT=${PORT:-29500} 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 9 | 10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 11 | python -m torch.distributed.launch \ 12 | --nnodes=$NNODES \ 13 | --node_rank=$NODE_RANK \ 14 | --master_addr=$MASTER_ADDR \ 15 | --nproc_per_node=$GPUS \ 16 | --master_port=$PORT \ 17 | $(dirname "$0")/train.py \ 18 | $CONFIG \ 19 | --deterministic \ 20 | --launcher pytorch ${@:3} 21 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/tools/fp16/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-28508} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic 10 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/tools/misc/print_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | from mmcv import Config, DictAction 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser(description='Print the whole config') 9 | parser.add_argument('config', help='config file path') 10 | parser.add_argument( 11 | '--options', nargs='+', action=DictAction, help='arguments in dict') 12 | args = parser.parse_args() 13 | 14 | return args 15 | 16 | 17 | def main(): 18 | args = parse_args() 19 | 20 | cfg = Config.fromfile(args.config) 21 | if args.options is not None: 22 | cfg.merge_from_dict(args.options) 23 | print(f'Config:\n{cfg.pretty_text}') 24 | 25 | 26 | if __name__ == '__main__': 27 | main() 28 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/tools/misc/visualize_results.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | import mmcv 5 | from mmcv import Config 6 | from mmdet3d.datasets import build_dataset 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser( 11 | description='MMDet3D visualize the results') 12 | parser.add_argument('config', help='test config file path') 13 | parser.add_argument('--result', help='results file in pickle format') 14 | parser.add_argument( 15 | '--show-dir', help='directory where visualize results will be saved') 16 | args = parser.parse_args() 17 | 18 | return args 19 | 20 | 21 | def main(): 22 | args = parse_args() 23 | 24 | if args.result is not None and \ 25 | not args.result.endswith(('.pkl', '.pickle')): 26 | raise ValueError('The results file must be a pkl file.') 27 | 28 | cfg = Config.fromfile(args.config) 29 | cfg.data.test.test_mode = True 30 | 31 | # build the dataset 32 | dataset = build_dataset(cfg.data.test) 33 | results = mmcv.load(args.result) 34 | 35 | if getattr(dataset, 'show', None) is not None: 36 | # data loading pipeline for showing 37 | eval_pipeline = cfg.get('eval_pipeline', {}) 38 | if eval_pipeline: 39 | dataset.show(results, args.show_dir, pipeline=eval_pipeline) 40 | else: 41 | dataset.show(results, args.show_dir) # use default pipeline 42 | else: 43 | raise NotImplementedError( 44 | 'Show is not implemented for dataset {}!'.format( 45 | type(dataset).__name__)) 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/tools/model_converters/publish_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import subprocess 4 | 5 | import torch 6 | 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser( 10 | description='Process a checkpoint to be published') 11 | parser.add_argument('in_file', help='input checkpoint filename') 12 | parser.add_argument('out_file', help='output checkpoint filename') 13 | args = parser.parse_args() 14 | return args 15 | 16 | 17 | def process_checkpoint(in_file, out_file): 18 | checkpoint = torch.load(in_file, map_location='cpu') 19 | # remove optimizer for smaller file size 20 | if 'optimizer' in checkpoint: 21 | del checkpoint['optimizer'] 22 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 23 | # add the code here. 24 | torch.save(checkpoint, out_file) 25 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 26 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) 27 | subprocess.Popen(['mv', out_file, final_file]) 28 | 29 | 30 | def main(): 31 | args = parse_args() 32 | process_checkpoint(args.in_file, args.out_file) 33 | 34 | 35 | if __name__ == '__main__': 36 | main() 37 | -------------------------------------------------------------------------------- /autonomous_driving/occupancy_prediction/tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${@:5} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/CITATION: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - name: "OpenLane-V2 Dataset Contributors" 5 | title: "OpenLane-V2: The world's First Perception and Reasoning Benchmark for Scene Structure in Autonomous Driving" 6 | date-released: 2023-01-19 7 | url: "https://github.com/OpenDriveLab/OpenLane-V2" 8 | type: dataset 9 | license: Apache-2.0 10 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/data/OpenLane-V2/openlanev2.md5: -------------------------------------------------------------------------------- 1 | 21c607fa5a1930275b7f1409b25042a0 OpenLane-V2_sample.tar 2 | 8ade7daeec1b64f8ab91a50c81d812f6 OpenLane-V2_subset_A_image_0.tar 3 | c78e776f79e2394d2d5d95b7b5985e0f OpenLane-V2_subset_A_image_1.tar 4 | 4bf09079144aa54cb4dcd5ff6e00cf79 OpenLane-V2_subset_A_image_2.tar 5 | fd9e64345445975f462213b209632aee OpenLane-V2_subset_A_image_3.tar 6 | ae07e48c88ea2c3f6afbdf5ff71e9821 OpenLane-V2_subset_A_image_4.tar 7 | df62c1f6e6b3fb2a2a0868c78ab19c92 OpenLane-V2_subset_A_image_5.tar 8 | 7bff1ce30329235f8e0f25f6f6653b8f OpenLane-V2_subset_A_image_6.tar 9 | c73af4a7aef2692b96e4e00795120504 OpenLane-V2_subset_A_image_7.tar 10 | fb2f61e7309e0b48e2697e085a66a259 OpenLane-V2_subset_A_image_8.tar 11 | 95bf28ccf22583d20434d75800be065d OpenLane-V2_subset_A_info.tar 12 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/data/OpenLane-V2/preprocess.py: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Binaries and/or source for the following packages or projects 3 | # are presented under one or more of the following open source licenses: 4 | # preprocess.py The OpenLane-V2 Dataset Authors Apache License, Version 2.0 5 | # 6 | # Contact wanghuijie@pjlab.org.cn if you have any issue. 7 | # 8 | # Copyright (c) 2023 The OpenLane-v2 Dataset Authors. All Rights Reserved. 9 | # 10 | # Licensed under the Apache License, Version 2.0 (the "License"); 11 | # you may not use this file except in compliance with the License. 12 | # You may obtain a copy of the License at 13 | # 14 | # http://www.apache.org/licenses/LICENSE-2.0 15 | # 16 | # Unless required by applicable law or agreed to in writing, software 17 | # distributed under the License is distributed on an "AS IS" BASIS, 18 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 | # See the License for the specific language governing permissions and 20 | # limitations under the License. 21 | # ==============================================================================' 22 | 23 | from openlanev2.io import io 24 | from openlanev2.preprocessing import collect 25 | 26 | root_path = './OpenLane-V2' 27 | for file in io.os_listdir(root_path): 28 | if file.endswith('json'): 29 | subset = file.split('.')[0] 30 | for split, segments in io.json_load(f'{root_path}/{file}').items(): 31 | point_interval = 1 if split == 'train' else 20 32 | collect(root_path, {split: segments}, f'{subset}_{split}', point_interval=point_interval) 33 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/docs/annotation.md: -------------------------------------------------------------------------------- 1 | # Annotation 2 | 3 | ## Criterion 4 | 5 | The road structure cognition task is defined as inputting the surrounding view images, reconstructing the high-precision 6 | map of the self-vehicle, and outputting the recognition result of the direction of the self-vehicle. The specific 7 | expansion is to input the surrounding view images of the vehicle, HDMap; the output is the lane centerlines, the traffic 8 | signs, the topology of the lane centerlines, and the correspondence between lanes centerlines and traffic signs. Below 9 | are examples of visualizing annotations and relationships between different elements on 2D images. 10 | 11 | ![image](https://user-images.githubusercontent.com/47048022/209953048-f8ded0da-6005-45b7-8e3d-501dbd422058.png) 12 | ![image](https://user-images.githubusercontent.com/47048022/209954207-7b8a1b5a-8243-41d5-91fe-f2de5949107e.png) 13 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/docs/statistics.md: -------------------------------------------------------------------------------- 1 | # Statistics 2 | 3 | ## `subset_A` 4 | 5 | ### Temporal Consistency 6 | 7 | ![image](https://user-images.githubusercontent.com/29263416/228440318-f24136e5-7a26-4b28-bb74-6a448c900756.png) 8 | 9 | ### Instance Distribution 10 | 11 | ![image](https://user-images.githubusercontent.com/29263416/228441160-19d399c8-548c-4bef-8909-06ffcd0c027b.png) 12 | 13 | ### Centerline Property 14 | 15 | ![image](https://user-images.githubusercontent.com/29263416/228442761-5895e5b4-6d3a-4b90-8dbb-4ecfab98190e.png) 16 | 17 | ### Topology Distribution 18 | 19 | ![image](https://user-images.githubusercontent.com/29263416/228443434-a74085dc-28f8-400d-99a2-f0c67b49bf66.png) 20 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/docs/submission.md: -------------------------------------------------------------------------------- 1 | # Submission 2 | 3 | ## Format 4 | 5 | The submitted results are required to be stored in a pickle file, which is a dict of identifier 6 | and [formatted predictions](../data/README.md#annotations) of a frame: 7 | 8 | ``` 9 | { 10 | 'method': -- name of the method 11 | 'authors': -- list of str, authors 12 | 'e-mail': -- e-mail address 13 | 'institution / company': -- institution or company 14 | 'country / region': -- country or region, checked by iso3166* 15 | 'results': { 16 | [identifier]: { -- identifier of the frame, (split, segment_id, timestamp) 17 | 'lane_centerline': ... 18 | 'traffic_element': ... 19 | 'topology_lclc': ... 20 | 'topology_lcte': ... 21 | }, 22 | ... 23 | } 24 | } 25 | ``` 26 | 27 | \*: For validation, `from iso3166 import countries; countries.get(str)` can be used. 28 | 29 | ## Steps 30 | 31 | 1. Create a team on [EvalAI](https://eval.ai/web/challenges/challenge-page/1925). 32 | 2. Click the 'Participate' tag, then choose a team for participation. 33 | 3. Choose the phase 'Test Phase (CVPR 2023 Autonomous Driving Challenge)' and upload the file formatted as mentioned 34 | above. 35 | 4. Check if the submitted file is valid, which is indicated by the 'Status' under the tag of 'My Submissions'. A valid 36 | submission would provide performance scores. 37 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/imgs/lane.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/imgs/lane.gif -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/imgs/poster.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/imgs/poster.gif -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/imgs/topology.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/imgs/topology.gif -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/imgs/traffic_element.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/imgs/traffic_element.gif -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/openlanev2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/openlanev2/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/openlanev2/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/openlanev2/dataset/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/openlanev2/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/openlanev2/evaluation/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/openlanev2/io/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/openlanev2/io/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/openlanev2/preprocessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/openlanev2/preprocessing/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/openlanev2/utils.py: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Binaries and/or source for the following packages or projects 3 | # are presented under one or more of the following open source licenses: 4 | # utils.py The OpenLane-V2 Dataset Authors Apache License, Version 2.0 5 | # 6 | # Contact wanghuijie@pjlab.org.cn if you have any issue. 7 | # 8 | # Copyright (c) 2023 The OpenLane-v2 Dataset Authors. All Rights Reserved. 9 | # 10 | # Licensed under the Apache License, Version 2.0 (the "License"); 11 | # you may not use this file except in compliance with the License. 12 | # You may obtain a copy of the License at 13 | # 14 | # http://www.apache.org/licenses/LICENSE-2.0 15 | # 16 | # Unless required by applicable law or agreed to in writing, software 17 | # distributed under the License is distributed on an "AS IS" BASIS, 18 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 | # See the License for the specific language governing permissions and 20 | # limitations under the License. 21 | # ============================================================================== 22 | 23 | TRAFFIC_ELEMENT_ATTRIBUTE = { 24 | 'unknown': 0, 25 | 'red': 1, 26 | 'green': 2, 27 | 'yellow': 3, 28 | 'go_straight': 4, 29 | 'turn_left': 5, 30 | 'turn_right': 6, 31 | 'no_left_turn': 7, 32 | 'no_right_turn': 8, 33 | 'u_turn': 9, 34 | 'no_u_turn': 10, 35 | 'slight_left': 11, 36 | 'slight_right': 12, 37 | } 38 | 39 | 40 | def format_metric(metric): 41 | for key, val in metric.items(): 42 | print(f'{key} - {val["score"]}') 43 | for k, v in val.items(): 44 | if 'score' not in k: 45 | print(f' {k} - {v}') 46 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/openlanev2/visualization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/openlanev2/visualization/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/core/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/core/bbox/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/core/bbox/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/core/bbox/match_costs.py: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Binaries and/or source for the following packages or projects 3 | # are presented under one or more of the following open source licenses: 4 | # match_costs.py The OpenLane-V2 Dataset Authors Apache License, Version 2.0 5 | # 6 | # Contact wanghuijie@pjlab.org.cn if you have any issue. 7 | # 8 | # Copyright (c) 2023 The OpenLane-v2 Dataset Authors. All Rights Reserved. 9 | # 10 | # Licensed under the Apache License, Version 2.0 (the "License"); 11 | # you may not use this file except in compliance with the License. 12 | # You may obtain a copy of the License at 13 | # 14 | # http://www.apache.org/licenses/LICENSE-2.0 15 | # 16 | # Unless required by applicable law or agreed to in writing, software 17 | # distributed under the License is distributed on an "AS IS" BASIS, 18 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 | # See the License for the specific language governing permissions and 20 | # limitations under the License. 21 | # ============================================================================== 22 | 23 | import torch 24 | from mmdet.core.bbox.match_costs.builder import MATCH_COST 25 | 26 | 27 | @MATCH_COST.register_module() 28 | class LaneL1Cost: 29 | r""" 30 | Notes 31 | ----- 32 | Adapted from https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/match_costs/match_cost.py#L11. 33 | 34 | """ 35 | 36 | def __init__(self, weight=1.): 37 | self.weight = weight 38 | 39 | def __call__(self, lane_pred, gt_lanes): 40 | lane_cost = torch.cdist(lane_pred, gt_lanes, p=1) 41 | return lane_cost * self.weight 42 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/datasets/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/datasets/pipelines/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .intern_image import InternImage 2 | 3 | __all__ = ['InternImage'] 4 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/backbones/ops_dcnv3/functions/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/backbones/ops_dcnv3/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -------------------------------------------------------- 3 | # InternImage 4 | # Copyright (c) 2022 OpenGVLab 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | python setup.py build install 9 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/backbones/ops_dcnv3/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/backbones/ops_dcnv3/src/vision.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * InternImage 4 | * Copyright (c) 2022 OpenGVLab 5 | * Licensed under The MIT License [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 9 | ************************************************************************************************** 10 | */ 11 | 12 | #include "dcnv3.h" 13 | 14 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 15 | m.def("dcnv3_forward", &dcnv3_forward, "dcnv3_forward"); 16 | m.def("dcnv3_backward", &dcnv3_backward, "dcnv3_backward"); 17 | } 18 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/detectors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/detectors/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/heads/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/heads/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/modules/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/necks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/autonomous_driving/openlane-v2/plugin/mmdet3d/baseline/models/necks/__init__.py -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/requirements.txt: -------------------------------------------------------------------------------- 1 | chardet 2 | iso3166 3 | jupyter 4 | matplotlib 5 | ninja 6 | numpy >=1.22.0, <1.24.0 7 | opencv-python 8 | openmim 9 | ortools ==9.2.9972 10 | scikit-learn 11 | scipy ==1.8.0 12 | similaritymeasures 13 | tqdm 14 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/setup.py: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Binaries and/or source for the following packages or projects 3 | # are presented under one or more of the following open source licenses: 4 | # setup.py The OpenLane-V2 Dataset Authors Apache License, Version 2.0 5 | # 6 | # Contact wanghuijie@pjlab.org.cn if you have any issue. 7 | # 8 | # Copyright (c) 2023 The OpenLane-v2 Dataset Authors. All Rights Reserved. 9 | # 10 | # Licensed under the Apache License, Version 2.0 (the "License"); 11 | # you may not use this file except in compliance with the License. 12 | # You may obtain a copy of the License at 13 | # 14 | # http://www.apache.org/licenses/LICENSE-2.0 15 | # 16 | # Unless required by applicable law or agreed to in writing, software 17 | # distributed under the License is distributed on an "AS IS" BASIS, 18 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 | # See the License for the specific language governing permissions and 20 | # limitations under the License. 21 | # ============================================================================== 22 | 23 | from setuptools import find_packages, setup 24 | 25 | setup( 26 | name='openlanev2', 27 | version='0.1.0', 28 | author='The OpenLane-V2 Dataset Authors', 29 | author_email='wanghuijie@pjlab.org.cn', 30 | description='The official devkit of the OpenLane-V2 dataset.', 31 | url='https://github.com/OpenDriveLab/OpenLane-V2', 32 | packages=find_packages(), 33 | license='Apache License 2.0', 34 | ) 35 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/tools/create_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | export PYTHONPATH=`pwd`:$PYTHONPATH 5 | 6 | PARTITION=$1 7 | JOB_NAME=$2 8 | DATASET=$3 9 | GPUS=${GPUS:-1} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-1} 11 | SRUN_ARGS=${SRUN_ARGS:-""} 12 | JOB_NAME=create_data 13 | 14 | srun -p ${PARTITION} \ 15 | --job-name=${JOB_NAME} \ 16 | --gres=gpu:${GPUS_PER_NODE} \ 17 | --ntasks=${GPUS} \ 18 | --ntasks-per-node=${GPUS_PER_NODE} \ 19 | --kill-on-bad-exit=1 \ 20 | ${SRUN_ARGS} \ 21 | python -u tools/create_data.py ${DATASET} \ 22 | --root-path ./data/${DATASET} \ 23 | --out-dir ./data/${DATASET} \ 24 | --extra-tag ${DATASET} 25 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/tools/data_converter/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/tools/data_converter/lyft_data_fixer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os 4 | 5 | import numpy as np 6 | 7 | 8 | def fix_lyft(root_folder='./data/lyft', version='v1.01'): 9 | # refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000 # noqa 10 | lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin' 11 | root_folder = os.path.join(root_folder, f'{version}-train') 12 | lidar_path = os.path.join(root_folder, lidar_path) 13 | assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \ 14 | f'dataset and make sure {lidar_path} is present.' 15 | points = np.fromfile(lidar_path, dtype=np.float32, count=-1) 16 | try: 17 | points.reshape([-1, 5]) 18 | print(f'This fix is not required for version {version}.') 19 | except ValueError: 20 | new_points = np.array(list(points) + [100.0, 1.0], dtype='float32') 21 | new_points.tofile(lidar_path) 22 | print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.') 23 | 24 | 25 | parser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser') 26 | parser.add_argument( 27 | '--root-folder', 28 | type=str, 29 | default='./data/lyft', 30 | help='specify the root path of Lyft dataset') 31 | parser.add_argument( 32 | '--version', 33 | type=str, 34 | default='v1.01', 35 | help='specify Lyft dataset version') 36 | args = parser.parse_args() 37 | 38 | if __name__ == '__main__': 39 | fix_lyft(root_folder=args.root_folder, version=args.version) 40 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | NNODES=${NNODES:-1} 7 | NODE_RANK=${NODE_RANK:-0} 8 | PORT=${PORT:-29500} 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 10 | 11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 12 | python -m torch.distributed.launch \ 13 | --nnodes=$NNODES \ 14 | --node_rank=$NODE_RANK \ 15 | --master_addr=$MASTER_ADDR \ 16 | --nproc_per_node=$GPUS \ 17 | --master_port=$PORT \ 18 | $(dirname "$0")/test.py \ 19 | $CONFIG \ 20 | $CHECKPOINT \ 21 | --launcher pytorch \ 22 | ${@:4} 23 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | NNODES=${NNODES:-1} 6 | NODE_RANK=${NODE_RANK:-0} 7 | PORT=${PORT:-29500} 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 9 | 10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 11 | python -m torch.distributed.launch \ 12 | --nnodes=$NNODES \ 13 | --node_rank=$NODE_RANK \ 14 | --master_addr=$MASTER_ADDR \ 15 | --nproc_per_node=$GPUS \ 16 | --master_port=$PORT \ 17 | $(dirname "$0")/train.py \ 18 | $CONFIG \ 19 | --seed 0 \ 20 | --launcher pytorch ${@:3} 21 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/tools/misc/print_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | from mmcv import Config, DictAction 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser(description='Print the whole config') 9 | parser.add_argument('config', help='config file path') 10 | parser.add_argument( 11 | '--options', nargs='+', action=DictAction, help='arguments in dict') 12 | args = parser.parse_args() 13 | 14 | return args 15 | 16 | 17 | def main(): 18 | args = parse_args() 19 | 20 | cfg = Config.fromfile(args.config) 21 | if args.options is not None: 22 | cfg.merge_from_dict(args.options) 23 | print(f'Config:\n{cfg.pretty_text}') 24 | 25 | 26 | if __name__ == '__main__': 27 | main() 28 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/tools/misc/visualize_results.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | import mmcv 5 | from mmcv import Config 6 | from mmdet3d.datasets import build_dataset 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser( 11 | description='MMDet3D visualize the results') 12 | parser.add_argument('config', help='test config file path') 13 | parser.add_argument('--result', help='results file in pickle format') 14 | parser.add_argument( 15 | '--show-dir', help='directory where visualize results will be saved') 16 | args = parser.parse_args() 17 | 18 | return args 19 | 20 | 21 | def main(): 22 | args = parse_args() 23 | 24 | if args.result is not None and \ 25 | not args.result.endswith(('.pkl', '.pickle')): 26 | raise ValueError('The results file must be a pkl file.') 27 | 28 | cfg = Config.fromfile(args.config) 29 | cfg.data.test.test_mode = True 30 | 31 | # build the dataset 32 | dataset = build_dataset(cfg.data.test) 33 | results = mmcv.load(args.result) 34 | 35 | if getattr(dataset, 'show', None) is not None: 36 | # data loading pipeline for showing 37 | eval_pipeline = cfg.get('eval_pipeline', {}) 38 | if eval_pipeline: 39 | dataset.show(results, args.show_dir, pipeline=eval_pipeline) 40 | else: 41 | dataset.show(results, args.show_dir) # use default pipeline 42 | else: 43 | raise NotImplementedError( 44 | 'Show is not implemented for dataset {}!'.format( 45 | type(dataset).__name__)) 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/tools/model_converters/publish_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import subprocess 4 | 5 | import torch 6 | 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser( 10 | description='Process a checkpoint to be published') 11 | parser.add_argument('in_file', help='input checkpoint filename') 12 | parser.add_argument('out_file', help='output checkpoint filename') 13 | args = parser.parse_args() 14 | return args 15 | 16 | 17 | def process_checkpoint(in_file, out_file): 18 | checkpoint = torch.load(in_file, map_location='cpu') 19 | # remove optimizer for smaller file size 20 | if 'optimizer' in checkpoint: 21 | del checkpoint['optimizer'] 22 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 23 | # add the code here. 24 | torch.save(checkpoint, out_file) 25 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 26 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) 27 | subprocess.Popen(['mv', out_file, final_file]) 28 | 29 | 30 | def main(): 31 | args = parse_args() 32 | process_checkpoint(args.in_file, args.out_file) 33 | 34 | 35 | if __name__ == '__main__': 36 | main() 37 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${@:5} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /autonomous_driving/openlane-v2/tools/update_data_coords.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | export PYTHONPATH=`pwd`:$PYTHONPATH 5 | 6 | PARTITION=$1 7 | DATASET=$2 8 | GPUS=${GPUS:-1} 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-1} 10 | SRUN_ARGS=${SRUN_ARGS:-""} 11 | JOB_NAME=update_data_coords 12 | 13 | srun -p ${PARTITION} \ 14 | --job-name=${JOB_NAME} \ 15 | --gres=gpu:${GPUS_PER_NODE} \ 16 | --ntasks=${GPUS} \ 17 | --ntasks-per-node=${GPUS_PER_NODE} \ 18 | --kill-on-bad-exit=1 \ 19 | ${SRUN_ARGS} \ 20 | python -u tools/update_data_coords.py ${DATASET} \ 21 | --root-dir ./data/${DATASET} \ 22 | --out-dir ./data/${DATASET} 23 | -------------------------------------------------------------------------------- /classification/configs/accelerate/deepspeed/ds_config_zero1.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": true, 4 | "auto_cast": true 5 | }, 6 | "zero_optimization": { 7 | "stage": 1, 8 | "offload_optimizer": { 9 | "device": "none" 10 | }, 11 | "offload_param": { 12 | "device": "none" 13 | } 14 | }, 15 | "gradient_accumulation_steps": 4, 16 | "gradient_clipping": 5.0, 17 | "steps_per_print": "inf", 18 | "train_batch_size": "auto", 19 | "train_micro_batch_size_per_gpu": "auto" 20 | } 21 | -------------------------------------------------------------------------------- /classification/configs/accelerate/deepspeed/ds_config_zero1_wo_loss_scale.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": true, 4 | "auto_cast": true, 5 | "loss_scale": 1 6 | }, 7 | "zero_optimization": { 8 | "stage": 1, 9 | "offload_optimizer": { 10 | "device": "none" 11 | }, 12 | "offload_param": { 13 | "device": "none" 14 | } 15 | }, 16 | "gradient_accumulation_steps": 4, 17 | "gradient_clipping": 5.0, 18 | "steps_per_print": "inf", 19 | "train_batch_size": "auto", 20 | "train_micro_batch_size_per_gpu": "auto" 21 | } 22 | -------------------------------------------------------------------------------- /classification/configs/accelerate/deepspeed/ds_config_zero3_offload.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": true, 4 | "auto_cast": true 5 | }, 6 | "zero_optimization": { 7 | "stage": 3, 8 | "offload_optimizer": { 9 | "device": "cpu" 10 | }, 11 | "offload_param": { 12 | "device": "cpu" 13 | } 14 | }, 15 | "gradient_accumulation_steps": 4, 16 | "gradient_clipping": 5.0, 17 | "steps_per_print": "inf", 18 | "train_batch_size": "auto", 19 | "train_micro_batch_size_per_gpu": "auto" 20 | } 21 | -------------------------------------------------------------------------------- /classification/configs/accelerate/deepspeed/ds_config_zero3_offload_wo_loss_scale.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": true, 4 | "auto_cast": true, 5 | "loss_scale": 1 6 | }, 7 | "zero_optimization": { 8 | "stage": 3, 9 | "offload_optimizer": { 10 | "device": "cpu" 11 | }, 12 | "offload_param": { 13 | "device": "cpu" 14 | } 15 | }, 16 | "gradient_accumulation_steps": 4, 17 | "gradient_clipping": 5.0, 18 | "steps_per_print": "inf", 19 | "train_batch_size": "auto", 20 | "train_micro_batch_size_per_gpu": "auto" 21 | } 22 | -------------------------------------------------------------------------------- /classification/configs/accelerate/dist_8gpus_ddp_fp16.yaml: -------------------------------------------------------------------------------- 1 | compute_environment: LOCAL_MACHINE 2 | deepspeed_config: {} 3 | distributed_type: MULTI_GPU 4 | downcast_bf16: 'no' 5 | fsdp_config: {} 6 | machine_rank: 0 7 | main_process_ip: null 8 | main_process_port: 11922 9 | main_training_function: main 10 | mixed_precision: 'fp16' 11 | num_machines: 1 12 | num_processes: 8 13 | use_cpu: false 14 | -------------------------------------------------------------------------------- /classification/configs/accelerate/dist_8gpus_zero1.yaml: -------------------------------------------------------------------------------- 1 | compute_environment: LOCAL_MACHINE 2 | deepspeed_config: 3 | deepspeed_config_file: configs/accelerate/deepspeed/ds_config_zero1.json 4 | zero3_init_flag: false 5 | distributed_type: DEEPSPEED 6 | downcast_bf16: 'no' 7 | machine_rank: 0 8 | main_training_function: main 9 | num_machines: 1 10 | num_processes: 8 11 | rdzv_backend: static 12 | use_cpu: false 13 | -------------------------------------------------------------------------------- /classification/configs/accelerate/dist_8gpus_zero1_wo_loss_scale.yaml: -------------------------------------------------------------------------------- 1 | compute_environment: LOCAL_MACHINE 2 | deepspeed_config: 3 | deepspeed_config_file: configs/accelerate/deepspeed/ds_config_zero1_wo_loss_scale.json 4 | zero3_init_flag: false 5 | distributed_type: DEEPSPEED 6 | downcast_bf16: 'no' 7 | machine_rank: 0 8 | main_training_function: main 9 | num_machines: 1 10 | num_processes: 8 11 | rdzv_backend: static 12 | use_cpu: false 13 | -------------------------------------------------------------------------------- /classification/configs/accelerate/dist_8gpus_zero3_offload.yaml: -------------------------------------------------------------------------------- 1 | compute_environment: LOCAL_MACHINE 2 | deepspeed_config: 3 | deepspeed_config_file: configs/accelerate/deepspeed/ds_config_zero3_offload.json 4 | zero3_init_flag: false 5 | distributed_type: DEEPSPEED 6 | downcast_bf16: 'no' 7 | machine_rank: 0 8 | main_training_function: main 9 | num_machines: 1 10 | num_processes: 8 11 | rdzv_backend: static 12 | use_cpu: false 13 | -------------------------------------------------------------------------------- /classification/configs/accelerate/dist_8gpus_zero3_offload_wo_loss_scale.yaml: -------------------------------------------------------------------------------- 1 | compute_environment: LOCAL_MACHINE 2 | deepspeed_config: 3 | deepspeed_config_file: configs/accelerate/deepspeed/ds_config_zero3_wo_loss_scale.json 4 | zero3_init_flag: false 5 | distributed_type: DEEPSPEED 6 | downcast_bf16: 'no' 7 | machine_rank: 0 8 | main_training_function: main 9 | num_machines: 1 10 | num_processes: 8 11 | rdzv_backend: static 12 | use_cpu: false 13 | -------------------------------------------------------------------------------- /classification/configs/inaturalist2018/internimage_h_22ktoinat18_384.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | IMG_SIZE: 384 3 | IMG_ON_MEMORY: False 4 | DATASET: inat18 5 | AUG: 6 | MIXUP: 0.0 7 | CUTMIX: 0.0 8 | REPROB: 0.0 9 | MODEL: 10 | TYPE: intern_image_meta_former 11 | DROP_PATH_RATE: 0.6 12 | LABEL_SMOOTHING: 0.3 13 | INTERN_IMAGE: 14 | CORE_OP: 'DCNv3' 15 | DEPTHS: [6, 6, 32, 6] 16 | GROUPS: [10, 20, 40, 80] 17 | CHANNELS: 320 18 | DW_KERNEL_SIZE: 5 19 | LAYER_SCALE: None 20 | OFFSET_SCALE: 1.0 21 | MLP_RATIO: 4.0 22 | POST_NORM: False 23 | RES_POST_NORM: True 24 | LEVEL2_POST_NORM: True 25 | LEVEL2_POST_NORM_BLOCK_IDS: [5, 11, 17, 23, 29] 26 | CENTER_FEATURE_SCALE: True 27 | USE_CLIP_PROJECTOR: True 28 | PRETRAINED: 'pretrained/internimage_h_jointto22k_384.pth' 29 | TRAIN: 30 | EMA: 31 | ENABLE: false 32 | DECAY: 0.9999 33 | EPOCHS: 100 34 | WARMUP_EPOCHS: 0 35 | WEIGHT_DECAY: 0.05 36 | BASE_LR: 2e-05 # 512 37 | WARMUP_LR: .0 38 | MIN_LR: .0 39 | USE_CHECKPOINT: true 40 | RAND_INIT_FT_HEAD: true 41 | AMP_OPT_LEVEL: O0 42 | EVAL_FREQ: 1 43 | -------------------------------------------------------------------------------- /classification/configs/internimage_b_1k_224.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | IMG_ON_MEMORY: True 3 | MODEL: 4 | TYPE: intern_image 5 | DROP_PATH_RATE: 0.5 6 | INTERN_IMAGE: 7 | CORE_OP: 'DCNv3' 8 | DEPTHS: [4, 4, 21, 4] 9 | GROUPS: [7, 14, 28, 56] 10 | CHANNELS: 112 11 | LAYER_SCALE: 1e-5 12 | OFFSET_SCALE: 1.0 13 | MLP_RATIO: 4.0 14 | POST_NORM: True 15 | TRAIN: 16 | EMA: 17 | ENABLE: True 18 | DECAY: 0.9999 19 | BASE_LR: 5e-4 20 | -------------------------------------------------------------------------------- /classification/configs/internimage_g_22kto1k_512.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | IMG_SIZE: 512 3 | IMG_ON_MEMORY: True 4 | AUG: 5 | MIXUP: 0.0 6 | CUTMIX: 0.0 7 | REPROB: 0.0 8 | MODEL: 9 | TYPE: intern_image 10 | DROP_PATH_RATE: 0.4 11 | LABEL_SMOOTHING: 0.3 12 | INTERN_IMAGE: 13 | CORE_OP: 'DCNv3' 14 | DEPTHS: [2, 2, 48, 4] 15 | GROUPS: [16, 32, 64, 128] 16 | CHANNELS: 512 17 | DW_KERNEL_SIZE: 5 18 | LAYER_SCALE: None 19 | OFFSET_SCALE: 1.0 20 | MLP_RATIO: 4.0 21 | POST_NORM: True 22 | LEVEL2_POST_NORM: True 23 | LEVEL2_POST_NORM_BLOCK_IDS: [5, 11, 17, 23, 29, 35, 41, 47] 24 | CENTER_FEATURE_SCALE: True 25 | USE_CLIP_PROJECTOR: True 26 | PRETRAINED: 'pretrained/internimage_g_pretrainto22k_384.pth' 27 | TRAIN: 28 | EMA: 29 | ENABLE: true 30 | DECAY: 0.9999 31 | EPOCHS: 20 32 | WARMUP_EPOCHS: 2 33 | WEIGHT_DECAY: 0.05 34 | BASE_LR: 2e-05 # 512 35 | WARMUP_LR: .0 36 | MIN_LR: .0 37 | LR_LAYER_DECAY: true 38 | LR_LAYER_DECAY_RATIO: 0.9 39 | USE_CHECKPOINT: true 40 | OPTIMIZER: 41 | DCN_LR_MUL: 0.1 42 | AMP_OPT_LEVEL: O0 43 | EVAL_FREQ: 1 44 | -------------------------------------------------------------------------------- /classification/configs/internimage_h_22kto1k_384.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | IMG_SIZE: 384 3 | IMG_ON_MEMORY: True 4 | AUG: 5 | MIXUP: 0.0 6 | CUTMIX: 0.0 7 | REPROB: 0.0 8 | MODEL: 9 | TYPE: intern_image 10 | DROP_PATH_RATE: 0.2 11 | LABEL_SMOOTHING: 0.3 12 | INTERN_IMAGE: 13 | CORE_OP: 'DCNv3' 14 | DEPTHS: [6, 6, 32, 6] 15 | GROUPS: [10, 20, 40, 80] 16 | CHANNELS: 320 17 | DW_KERNEL_SIZE: 5 18 | LAYER_SCALE: None 19 | OFFSET_SCALE: 1.0 20 | MLP_RATIO: 4.0 21 | POST_NORM: False 22 | RES_POST_NORM: True 23 | LEVEL2_POST_NORM: True 24 | LEVEL2_POST_NORM_BLOCK_IDS: [5, 11, 17, 23, 29] 25 | CENTER_FEATURE_SCALE: True 26 | USE_CLIP_PROJECTOR: True 27 | PRETRAINED: 'pretrained/internimage_h_jointto22k_384.pth' 28 | TRAIN: 29 | EMA: 30 | ENABLE: true 31 | DECAY: 0.9999 32 | EPOCHS: 20 33 | WARMUP_EPOCHS: 2 34 | WEIGHT_DECAY: 0.05 35 | BASE_LR: 2e-05 # 512 36 | WARMUP_LR: .0 37 | MIN_LR: .0 38 | LR_LAYER_DECAY: true 39 | LR_LAYER_DECAY_RATIO: 0.9 40 | USE_CHECKPOINT: true 41 | OPTIMIZER: 42 | DCN_LR_MUL: 0.1 43 | AMP_OPT_LEVEL: O0 44 | EVAL_FREQ: 1 45 | -------------------------------------------------------------------------------- /classification/configs/internimage_h_22kto1k_640.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | IMG_SIZE: 640 3 | IMG_ON_MEMORY: True 4 | AUG: 5 | MIXUP: 0.0 6 | CUTMIX: 0.0 7 | REPROB: 0.0 8 | MODEL: 9 | TYPE: intern_image 10 | DROP_PATH_RATE: 0.2 11 | LABEL_SMOOTHING: 0.3 12 | INTERN_IMAGE: 13 | CORE_OP: 'DCNv3' 14 | DEPTHS: [6, 6, 32, 6] 15 | GROUPS: [10, 20, 40, 80] 16 | CHANNELS: 320 17 | DW_KERNEL_SIZE: 5 18 | LAYER_SCALE: None 19 | OFFSET_SCALE: 1.0 20 | MLP_RATIO: 4.0 21 | POST_NORM: False 22 | RES_POST_NORM: True 23 | LEVEL2_POST_NORM: True 24 | LEVEL2_POST_NORM_BLOCK_IDS: [5, 11, 17, 23, 29] 25 | CENTER_FEATURE_SCALE: True 26 | USE_CLIP_PROJECTOR: True 27 | PRETRAINED: 'pretrained/internimage_h_jointto22k_384.pth' 28 | TRAIN: 29 | EMA: 30 | ENABLE: true 31 | DECAY: 0.9999 32 | EPOCHS: 20 33 | WARMUP_EPOCHS: 2 34 | WEIGHT_DECAY: 0.05 35 | BASE_LR: 2e-05 # 512 36 | WARMUP_LR: .0 37 | MIN_LR: .0 38 | LR_LAYER_DECAY: true 39 | LR_LAYER_DECAY_RATIO: 0.9 40 | USE_CHECKPOINT: true 41 | OPTIMIZER: 42 | USE_ZERO: True 43 | DCN_LR_MUL: 0.1 44 | AMP_OPT_LEVEL: O0 45 | EVAL_FREQ: 1 46 | -------------------------------------------------------------------------------- /classification/configs/internimage_l_22kto1k_384.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | IMG_SIZE: 384 3 | IMG_ON_MEMORY: True 4 | AUG: 5 | MIXUP: 0.0 6 | CUTMIX: 0.0 7 | REPROB: 0.0 8 | MODEL: 9 | TYPE: intern_image 10 | DROP_PATH_RATE: 0.1 11 | LABEL_SMOOTHING: 0.3 12 | INTERN_IMAGE: 13 | CORE_OP: 'DCNv3' 14 | DEPTHS: [5, 5, 22, 5] 15 | GROUPS: [10, 20, 40, 80] 16 | CHANNELS: 160 17 | LAYER_SCALE: 1e-5 18 | OFFSET_SCALE: 2.0 19 | MLP_RATIO: 4.0 20 | POST_NORM: True 21 | PRETRAINED: 'pretrained/internimage_l_22k_192to384.pth' 22 | TRAIN: 23 | EMA: 24 | ENABLE: true 25 | DECAY: 0.9999 26 | EPOCHS: 20 27 | WARMUP_EPOCHS: 2 28 | WEIGHT_DECAY: 0.05 29 | BASE_LR: 2e-05 # 512 30 | WARMUP_LR: .0 31 | MIN_LR: .0 32 | LR_LAYER_DECAY: true 33 | LR_LAYER_DECAY_RATIO: 0.9 34 | USE_CHECKPOINT: true 35 | OPTIMIZER: 36 | DCN_LR_MUL: 0.1 37 | AMP_OPT_LEVEL: O0 38 | EVAL_FREQ: 1 39 | -------------------------------------------------------------------------------- /classification/configs/internimage_s_1k_224.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | IMG_ON_MEMORY: True 3 | MODEL: 4 | TYPE: intern_image 5 | DROP_PATH_RATE: 0.4 6 | INTERN_IMAGE: 7 | CORE_OP: 'DCNv3' 8 | DEPTHS: [4, 4, 21, 4] 9 | GROUPS: [5, 10, 20, 40] 10 | CHANNELS: 80 11 | LAYER_SCALE: 1e-5 12 | OFFSET_SCALE: 1.0 13 | MLP_RATIO: 4.0 14 | POST_NORM: True 15 | TRAIN: 16 | EMA: 17 | ENABLE: True 18 | DECAY: 0.9999 19 | BASE_LR: 5e-4 20 | -------------------------------------------------------------------------------- /classification/configs/internimage_t_1k_224.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | IMG_ON_MEMORY: True 3 | MODEL: 4 | TYPE: intern_image 5 | DROP_PATH_RATE: 0.1 6 | INTERN_IMAGE: 7 | CORE_OP: 'DCNv3' 8 | DEPTHS: [4, 4, 18, 4] 9 | GROUPS: [4, 8, 16, 32] 10 | CHANNELS: 64 11 | OFFSET_SCALE: 1.0 12 | MLP_RATIO: 4.0 13 | TRAIN: 14 | EMA: 15 | ENABLE: True 16 | DECAY: 0.9999 17 | BASE_LR: 5e-4 18 | -------------------------------------------------------------------------------- /classification/configs/internimage_xl_22kto1k_384.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | IMG_SIZE: 384 3 | IMG_ON_MEMORY: True 4 | AUG: 5 | MIXUP: 0.0 6 | CUTMIX: 0.0 7 | REPROB: 0.0 8 | MODEL: 9 | TYPE: intern_image 10 | DROP_PATH_RATE: 0.2 11 | LABEL_SMOOTHING: 0.3 12 | INTERN_IMAGE: 13 | CORE_OP: 'DCNv3' 14 | DEPTHS: [5, 5, 24, 5] 15 | GROUPS: [12, 24, 48, 96] 16 | CHANNELS: 192 17 | LAYER_SCALE: 1e-5 18 | OFFSET_SCALE: 2.0 19 | MLP_RATIO: 4.0 20 | POST_NORM: True 21 | PRETRAINED: 'pretrained/internimage_xl_22k_192to384.pth' 22 | TRAIN: 23 | EMA: 24 | ENABLE: true 25 | DECAY: 0.9999 26 | EPOCHS: 20 27 | WARMUP_EPOCHS: 2 28 | WEIGHT_DECAY: 0.05 29 | BASE_LR: 2e-05 # 512 30 | WARMUP_LR: .0 31 | MIN_LR: .0 32 | LR_LAYER_DECAY: true 33 | LR_LAYER_DECAY_RATIO: 0.9 34 | USE_CHECKPOINT: true 35 | OPTIMIZER: 36 | DCN_LR_MUL: 0.1 37 | AMP_OPT_LEVEL: O0 38 | EVAL_FREQ: 1 39 | -------------------------------------------------------------------------------- /classification/configs/without_lr_decay/internimage_b_1k_224.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | IMG_ON_MEMORY: True 3 | MODEL: 4 | TYPE: intern_image 5 | DROP_PATH_RATE: 0.5 6 | INTERN_IMAGE: 7 | CORE_OP: 'DCNv3' 8 | DEPTHS: [4, 4, 21, 4] 9 | GROUPS: [7, 14, 28, 56] 10 | CHANNELS: 112 11 | LAYER_SCALE: 1e-5 12 | OFFSET_SCALE: 1.0 13 | MLP_RATIO: 4.0 14 | POST_NORM: True 15 | TRAIN: 16 | EMA: 17 | ENABLE: True 18 | DECAY: 0.9999 19 | BASE_LR: 5e-4 20 | -------------------------------------------------------------------------------- /classification/configs/without_lr_decay/internimage_g_22kto1k_512.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | IMG_SIZE: 512 3 | IMG_ON_MEMORY: True 4 | AUG: 5 | MIXUP: 0.0 6 | CUTMIX: 0.0 7 | REPROB: 0.0 8 | MODEL: 9 | TYPE: intern_image 10 | DROP_PATH_RATE: 0.4 11 | LABEL_SMOOTHING: 0.3 12 | INTERN_IMAGE: 13 | CORE_OP: 'DCNv3' 14 | DEPTHS: [2, 2, 48, 4] 15 | GROUPS: [16, 32, 64, 128] 16 | CHANNELS: 512 17 | DW_KERNEL_SIZE: 5 18 | LAYER_SCALE: None 19 | OFFSET_SCALE: 1.0 20 | MLP_RATIO: 4.0 21 | POST_NORM: True 22 | LEVEL2_POST_NORM: True 23 | LEVEL2_POST_NORM_BLOCK_IDS: [5, 11, 17, 23, 29, 35, 41, 47] 24 | CENTER_FEATURE_SCALE: True 25 | USE_CLIP_PROJECTOR: True 26 | PRETRAINED: 'pretrained/internimage_g_pretrainto22k_384.pth' 27 | TRAIN: 28 | EMA: 29 | ENABLE: true 30 | DECAY: 0.9999 31 | EPOCHS: 20 32 | WARMUP_EPOCHS: 2 33 | WEIGHT_DECAY: 0.05 34 | BASE_LR: 2e-05 # 512 35 | WARMUP_LR: .0 36 | MIN_LR: .0 37 | USE_CHECKPOINT: true 38 | OPTIMIZER: 39 | DCN_LR_MUL: 0.1 40 | AMP_OPT_LEVEL: O0 41 | EVAL_FREQ: 1 42 | -------------------------------------------------------------------------------- /classification/configs/without_lr_decay/internimage_h_22kto1k_640.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | IMG_SIZE: 640 3 | IMG_ON_MEMORY: True 4 | AUG: 5 | MIXUP: 0.0 6 | CUTMIX: 0.0 7 | REPROB: 0.0 8 | MODEL: 9 | TYPE: intern_image 10 | DROP_PATH_RATE: 0.2 11 | LABEL_SMOOTHING: 0.3 12 | INTERN_IMAGE: 13 | CORE_OP: 'DCNv3' 14 | DEPTHS: [6, 6, 32, 6] 15 | GROUPS: [10, 20, 40, 80] 16 | CHANNELS: 320 17 | DW_KERNEL_SIZE: 5 18 | LAYER_SCALE: None 19 | OFFSET_SCALE: 1.0 20 | MLP_RATIO: 4.0 21 | POST_NORM: False 22 | RES_POST_NORM: True 23 | LEVEL2_POST_NORM: True 24 | LEVEL2_POST_NORM_BLOCK_IDS: [5, 11, 17, 23, 29] 25 | CENTER_FEATURE_SCALE: True 26 | USE_CLIP_PROJECTOR: True 27 | PRETRAINED: 'pretrained/internimage_h_jointto22k_384.pth' 28 | TRAIN: 29 | EMA: 30 | ENABLE: true 31 | DECAY: 0.9999 32 | EPOCHS: 20 33 | WARMUP_EPOCHS: 2 34 | WEIGHT_DECAY: 0.05 35 | BASE_LR: 2e-05 # 512 36 | WARMUP_LR: .0 37 | MIN_LR: .0 38 | USE_CHECKPOINT: true 39 | OPTIMIZER: 40 | USE_ZERO: True 41 | DCN_LR_MUL: 0.1 42 | AMP_OPT_LEVEL: O0 43 | EVAL_FREQ: 1 44 | -------------------------------------------------------------------------------- /classification/configs/without_lr_decay/internimage_l_22kto1k_384.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | IMG_SIZE: 384 3 | IMG_ON_MEMORY: True 4 | AUG: 5 | MIXUP: 0.0 6 | CUTMIX: 0.0 7 | REPROB: 0.0 8 | MODEL: 9 | TYPE: intern_image 10 | DROP_PATH_RATE: 0.1 11 | LABEL_SMOOTHING: 0.3 12 | INTERN_IMAGE: 13 | CORE_OP: 'DCNv3' 14 | DEPTHS: [5, 5, 22, 5] 15 | GROUPS: [10, 20, 40, 80] 16 | CHANNELS: 160 17 | LAYER_SCALE: 1e-5 18 | OFFSET_SCALE: 2.0 19 | MLP_RATIO: 4.0 20 | POST_NORM: True 21 | PRETRAINED: 'pretrained/internimage_l_22k_192to384.pth' 22 | TRAIN: 23 | EMA: 24 | ENABLE: true 25 | DECAY: 0.9999 26 | EPOCHS: 20 27 | WARMUP_EPOCHS: 2 28 | WEIGHT_DECAY: 0.05 29 | BASE_LR: 2e-05 # 512 30 | WARMUP_LR: .0 31 | MIN_LR: .0 32 | USE_CHECKPOINT: true 33 | OPTIMIZER: 34 | DCN_LR_MUL: 0.1 35 | AMP_OPT_LEVEL: O0 36 | EVAL_FREQ: 1 37 | -------------------------------------------------------------------------------- /classification/configs/without_lr_decay/internimage_s_1k_224.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | IMG_ON_MEMORY: True 3 | MODEL: 4 | TYPE: intern_image 5 | DROP_PATH_RATE: 0.4 6 | INTERN_IMAGE: 7 | CORE_OP: 'DCNv3' 8 | DEPTHS: [4, 4, 21, 4] 9 | GROUPS: [5, 10, 20, 40] 10 | CHANNELS: 80 11 | LAYER_SCALE: 1e-5 12 | OFFSET_SCALE: 1.0 13 | MLP_RATIO: 4.0 14 | POST_NORM: True 15 | TRAIN: 16 | EMA: 17 | ENABLE: True 18 | DECAY: 0.9999 19 | BASE_LR: 5e-4 20 | -------------------------------------------------------------------------------- /classification/configs/without_lr_decay/internimage_t_1k_224.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | IMG_ON_MEMORY: True 3 | MODEL: 4 | TYPE: intern_image 5 | DROP_PATH_RATE: 0.1 6 | INTERN_IMAGE: 7 | CORE_OP: 'DCNv3' 8 | DEPTHS: [4, 4, 18, 4] 9 | GROUPS: [4, 8, 16, 32] 10 | CHANNELS: 64 11 | OFFSET_SCALE: 1.0 12 | MLP_RATIO: 4.0 13 | TRAIN: 14 | EMA: 15 | ENABLE: True 16 | DECAY: 0.9999 17 | BASE_LR: 5e-4 18 | -------------------------------------------------------------------------------- /classification/configs/without_lr_decay/internimage_xl_22kto1k_384.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | IMG_SIZE: 384 3 | IMG_ON_MEMORY: True 4 | AUG: 5 | MIXUP: 0.0 6 | CUTMIX: 0.0 7 | REPROB: 0.0 8 | MODEL: 9 | TYPE: intern_image 10 | DROP_PATH_RATE: 0.2 11 | LABEL_SMOOTHING: 0.3 12 | INTERN_IMAGE: 13 | CORE_OP: 'DCNv3' 14 | DEPTHS: [5, 5, 24, 5] 15 | GROUPS: [12, 24, 48, 96] 16 | CHANNELS: 192 17 | LAYER_SCALE: 1e-5 18 | OFFSET_SCALE: 2.0 19 | MLP_RATIO: 4.0 20 | POST_NORM: True 21 | PRETRAINED: 'pretrained/internimage_xl_22k_192to384.pth' 22 | TRAIN: 23 | EMA: 24 | ENABLE: true 25 | DECAY: 0.9999 26 | EPOCHS: 20 27 | WARMUP_EPOCHS: 2 28 | WEIGHT_DECAY: 0.05 29 | BASE_LR: 2e-05 # 512 30 | WARMUP_LR: .0 31 | MIN_LR: .0 32 | USE_CHECKPOINT: true 33 | OPTIMIZER: 34 | DCN_LR_MUL: 0.1 35 | AMP_OPT_LEVEL: O0 36 | EVAL_FREQ: 1 37 | -------------------------------------------------------------------------------- /classification/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .build import build_loader, build_loader2 8 | -------------------------------------------------------------------------------- /classification/huggingface/22k_model/internimage_g_jointto22k_384/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "OpenGVLab/internimage_g_jointto22k_384", 3 | "act_layer": "GELU", 4 | "architectures": [ 5 | "InternImageModel" 6 | ], 7 | "auto_map": { 8 | "AutoConfig": "configuration_internimage.InternImageConfig", 9 | "AutoModel": "modeling_internimage.InternImageModel", 10 | "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification" 11 | }, 12 | "center_feature_scale": true, 13 | "channels": 512, 14 | "cls_scale": 1.5, 15 | "core_op": "DCNv3", 16 | "depths": [ 17 | 2, 18 | 2, 19 | 48, 20 | 4 21 | ], 22 | "drop_path_rate": 0.0, 23 | "drop_path_type": "linear", 24 | "drop_rate": 0.0, 25 | "dw_kernel_size": 5, 26 | "groups": [ 27 | 16, 28 | 32, 29 | 64, 30 | 128 31 | ], 32 | "layer_scale": null, 33 | "level2_post_norm": true, 34 | "level2_post_norm_block_ids": [ 35 | 5, 36 | 11, 37 | 17, 38 | 23, 39 | 29, 40 | 35, 41 | 41, 42 | 47 43 | ], 44 | "mlp_ratio": 4.0, 45 | "model_type": "internimage", 46 | "norm_layer": "LN", 47 | "num_classes": 21841, 48 | "offset_scale": 1.0, 49 | "post_norm": true, 50 | "remove_center": false, 51 | "res_post_norm": false, 52 | "torch_dtype": "float32", 53 | "transformers_version": "4.37.2", 54 | "use_clip_projector": true, 55 | "with_cp": false 56 | } 57 | -------------------------------------------------------------------------------- /classification/huggingface/22k_model/internimage_g_jointto22k_384/preprocessor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "crop_size": 384, 3 | "do_center_crop": true, 4 | "do_normalize": true, 5 | "do_resize": true, 6 | "feature_extractor_type": "CLIPFeatureExtractor", 7 | "image_mean": [ 8 | 0.485, 9 | 0.456, 10 | 0.406 11 | ], 12 | "image_std": [ 13 | 0.229, 14 | 0.224, 15 | 0.225 16 | ], 17 | "resample": 3, 18 | "size": 384 19 | } 20 | -------------------------------------------------------------------------------- /classification/huggingface/22k_model/internimage_h_jointto22k_384/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "OpenGVLab/internimage_h_jointto22k_384", 3 | "act_layer": "GELU", 4 | "architectures": [ 5 | "InternImageModel" 6 | ], 7 | "auto_map": { 8 | "AutoConfig": "configuration_internimage.InternImageConfig", 9 | "AutoModel": "modeling_internimage.InternImageModel", 10 | "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification" 11 | }, 12 | "center_feature_scale": true, 13 | "channels": 320, 14 | "cls_scale": 1.5, 15 | "core_op": "DCNv3", 16 | "depths": [ 17 | 6, 18 | 6, 19 | 32, 20 | 6 21 | ], 22 | "drop_path_rate": 0.0, 23 | "drop_path_type": "linear", 24 | "drop_rate": 0.0, 25 | "dw_kernel_size": 5, 26 | "groups": [ 27 | 10, 28 | 20, 29 | 40, 30 | 80 31 | ], 32 | "layer_scale": null, 33 | "level2_post_norm": true, 34 | "level2_post_norm_block_ids": [ 35 | 5, 36 | 11, 37 | 17, 38 | 23, 39 | 29 40 | ], 41 | "mlp_ratio": 4.0, 42 | "model_type": "internimage", 43 | "norm_layer": "LN", 44 | "num_classes": 21841, 45 | "offset_scale": 1.0, 46 | "post_norm": false, 47 | "remove_center": false, 48 | "res_post_norm": true, 49 | "torch_dtype": "float32", 50 | "transformers_version": "4.37.2", 51 | "use_clip_projector": true, 52 | "with_cp": false 53 | } 54 | -------------------------------------------------------------------------------- /classification/huggingface/22k_model/internimage_h_jointto22k_384/preprocessor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "crop_size": 384, 3 | "do_center_crop": true, 4 | "do_normalize": true, 5 | "do_resize": true, 6 | "feature_extractor_type": "CLIPFeatureExtractor", 7 | "image_mean": [ 8 | 0.485, 9 | 0.456, 10 | 0.406 11 | ], 12 | "image_std": [ 13 | 0.229, 14 | 0.224, 15 | 0.225 16 | ], 17 | "resample": 3, 18 | "size": 384 19 | } 20 | -------------------------------------------------------------------------------- /classification/huggingface/22k_model/internimage_l_22k_384/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "OpenGVLab/internimage_l_22k_384", 3 | "act_layer": "GELU", 4 | "architectures": [ 5 | "InternImageModel" 6 | ], 7 | "auto_map": { 8 | "AutoConfig": "configuration_internimage.InternImageConfig", 9 | "AutoModel": "modeling_internimage.InternImageModel", 10 | "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification" 11 | }, 12 | "center_feature_scale": false, 13 | "channels": 160, 14 | "cls_scale": 1.5, 15 | "core_op": "DCNv3", 16 | "depths": [ 17 | 5, 18 | 5, 19 | 22, 20 | 5 21 | ], 22 | "drop_path_rate": 0.0, 23 | "drop_path_type": "linear", 24 | "drop_rate": 0.0, 25 | "dw_kernel_size": null, 26 | "groups": [ 27 | 10, 28 | 20, 29 | 40, 30 | 80 31 | ], 32 | "layer_scale": 1e-05, 33 | "level2_post_norm": false, 34 | "level2_post_norm_block_ids": null, 35 | "mlp_ratio": 4.0, 36 | "model_type": "internimage", 37 | "norm_layer": "LN", 38 | "num_classes": 21841, 39 | "offset_scale": 2.0, 40 | "post_norm": true, 41 | "remove_center": false, 42 | "res_post_norm": false, 43 | "torch_dtype": "float32", 44 | "transformers_version": "4.37.2", 45 | "use_clip_projector": false, 46 | "with_cp": false 47 | } 48 | -------------------------------------------------------------------------------- /classification/huggingface/22k_model/internimage_l_22k_384/preprocessor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "crop_size": 384, 3 | "do_center_crop": true, 4 | "do_normalize": true, 5 | "do_resize": true, 6 | "feature_extractor_type": "CLIPFeatureExtractor", 7 | "image_mean": [ 8 | 0.485, 9 | 0.456, 10 | 0.406 11 | ], 12 | "image_std": [ 13 | 0.229, 14 | 0.224, 15 | 0.225 16 | ], 17 | "resample": 3, 18 | "size": 384 19 | } 20 | -------------------------------------------------------------------------------- /classification/huggingface/22k_model/internimage_xl_22k_384/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "OpenGVLab/internimage_xl_22k_384", 3 | "act_layer": "GELU", 4 | "architectures": [ 5 | "InternImageModel" 6 | ], 7 | "auto_map": { 8 | "AutoConfig": "configuration_internimage.InternImageConfig", 9 | "AutoModel": "modeling_internimage.InternImageModel", 10 | "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification" 11 | }, 12 | "center_feature_scale": false, 13 | "channels": 192, 14 | "cls_scale": 1.5, 15 | "core_op": "DCNv3", 16 | "depths": [ 17 | 5, 18 | 5, 19 | 24, 20 | 5 21 | ], 22 | "drop_path_rate": 0.0, 23 | "drop_path_type": "linear", 24 | "drop_rate": 0.0, 25 | "dw_kernel_size": null, 26 | "groups": [ 27 | 12, 28 | 24, 29 | 48, 30 | 96 31 | ], 32 | "layer_scale": 1e-05, 33 | "level2_post_norm": false, 34 | "level2_post_norm_block_ids": null, 35 | "mlp_ratio": 4.0, 36 | "model_type": "internimage", 37 | "norm_layer": "LN", 38 | "num_classes": 21841, 39 | "offset_scale": 2.0, 40 | "post_norm": true, 41 | "remove_center": false, 42 | "res_post_norm": false, 43 | "torch_dtype": "float32", 44 | "transformers_version": "4.37.2", 45 | "use_clip_projector": false, 46 | "with_cp": false 47 | } 48 | -------------------------------------------------------------------------------- /classification/huggingface/22k_model/internimage_xl_22k_384/preprocessor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "crop_size": 384, 3 | "do_center_crop": true, 4 | "do_normalize": true, 5 | "do_resize": true, 6 | "feature_extractor_type": "CLIPFeatureExtractor", 7 | "image_mean": [ 8 | 0.485, 9 | 0.456, 10 | 0.406 11 | ], 12 | "image_std": [ 13 | 0.229, 14 | 0.224, 15 | 0.225 16 | ], 17 | "resample": 3, 18 | "size": 384 19 | } 20 | -------------------------------------------------------------------------------- /classification/huggingface/in1k_model/internimage_b_1k_224/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "OpenGVLab/internimage_b_1k_224", 3 | "act_layer": "GELU", 4 | "architectures": [ 5 | "InternImageModel" 6 | ], 7 | "auto_map": { 8 | "AutoConfig": "configuration_internimage.InternImageConfig", 9 | "AutoModel": "modeling_internimage.InternImageModel", 10 | "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification" 11 | }, 12 | "center_feature_scale": false, 13 | "channels": 112, 14 | "cls_scale": 1.5, 15 | "core_op": "DCNv3", 16 | "depths": [ 17 | 4, 18 | 4, 19 | 21, 20 | 4 21 | ], 22 | "drop_path_rate": 0.0, 23 | "drop_path_type": "linear", 24 | "drop_rate": 0.0, 25 | "dw_kernel_size": null, 26 | "groups": [ 27 | 7, 28 | 14, 29 | 28, 30 | 56 31 | ], 32 | "layer_scale": 1e-05, 33 | "level2_post_norm": false, 34 | "level2_post_norm_block_ids": null, 35 | "mlp_ratio": 4.0, 36 | "model_type": "internimage", 37 | "norm_layer": "LN", 38 | "num_classes": 1000, 39 | "offset_scale": 1.0, 40 | "post_norm": true, 41 | "remove_center": false, 42 | "res_post_norm": false, 43 | "torch_dtype": "float32", 44 | "transformers_version": "4.37.2", 45 | "use_clip_projector": false, 46 | "with_cp": false 47 | } 48 | -------------------------------------------------------------------------------- /classification/huggingface/in1k_model/internimage_b_1k_224/preprocessor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "crop_size": 224, 3 | "do_center_crop": true, 4 | "do_normalize": true, 5 | "do_resize": true, 6 | "feature_extractor_type": "CLIPFeatureExtractor", 7 | "image_mean": [ 8 | 0.485, 9 | 0.456, 10 | 0.406 11 | ], 12 | "image_std": [ 13 | 0.229, 14 | 0.224, 15 | 0.225 16 | ], 17 | "resample": 3, 18 | "size": 224 19 | } 20 | -------------------------------------------------------------------------------- /classification/huggingface/in1k_model/internimage_g_22kto1k_512/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "OpenGVLab/internimage_g_22kto1k_512", 3 | "act_layer": "GELU", 4 | "architectures": [ 5 | "InternImageModel" 6 | ], 7 | "auto_map": { 8 | "AutoConfig": "configuration_internimage.InternImageConfig", 9 | "AutoModel": "modeling_internimage.InternImageModel", 10 | "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification" 11 | }, 12 | "center_feature_scale": true, 13 | "channels": 512, 14 | "cls_scale": 1.5, 15 | "core_op": "DCNv3", 16 | "depths": [ 17 | 2, 18 | 2, 19 | 48, 20 | 4 21 | ], 22 | "drop_path_rate": 0.0, 23 | "drop_path_type": "linear", 24 | "drop_rate": 0.0, 25 | "dw_kernel_size": 5, 26 | "groups": [ 27 | 16, 28 | 32, 29 | 64, 30 | 128 31 | ], 32 | "layer_scale": null, 33 | "level2_post_norm": true, 34 | "level2_post_norm_block_ids": [ 35 | 5, 36 | 11, 37 | 17, 38 | 23, 39 | 29, 40 | 35, 41 | 41, 42 | 47 43 | ], 44 | "mlp_ratio": 4.0, 45 | "model_type": "internimage", 46 | "norm_layer": "LN", 47 | "num_classes": 1000, 48 | "offset_scale": 1.0, 49 | "post_norm": true, 50 | "remove_center": false, 51 | "res_post_norm": false, 52 | "torch_dtype": "float32", 53 | "transformers_version": "4.37.2", 54 | "use_clip_projector": true, 55 | "with_cp": false 56 | } 57 | -------------------------------------------------------------------------------- /classification/huggingface/in1k_model/internimage_g_22kto1k_512/preprocessor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "crop_size": 512, 3 | "do_center_crop": true, 4 | "do_normalize": true, 5 | "do_resize": true, 6 | "feature_extractor_type": "CLIPFeatureExtractor", 7 | "image_mean": [ 8 | 0.485, 9 | 0.456, 10 | 0.406 11 | ], 12 | "image_std": [ 13 | 0.229, 14 | 0.224, 15 | 0.225 16 | ], 17 | "resample": 3, 18 | "size": 512 19 | } 20 | -------------------------------------------------------------------------------- /classification/huggingface/in1k_model/internimage_h_22kto1k_640/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "OpenGVLab/internimage_h_22kto1k_640", 3 | "act_layer": "GELU", 4 | "architectures": [ 5 | "InternImageModel" 6 | ], 7 | "auto_map": { 8 | "AutoConfig": "configuration_internimage.InternImageConfig", 9 | "AutoModel": "modeling_internimage.InternImageModel", 10 | "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification" 11 | }, 12 | "center_feature_scale": true, 13 | "channels": 320, 14 | "cls_scale": 1.5, 15 | "core_op": "DCNv3", 16 | "depths": [ 17 | 6, 18 | 6, 19 | 32, 20 | 6 21 | ], 22 | "drop_path_rate": 0.0, 23 | "drop_path_type": "linear", 24 | "drop_rate": 0.0, 25 | "dw_kernel_size": 5, 26 | "groups": [ 27 | 10, 28 | 20, 29 | 40, 30 | 80 31 | ], 32 | "layer_scale": null, 33 | "level2_post_norm": true, 34 | "level2_post_norm_block_ids": [ 35 | 5, 36 | 11, 37 | 17, 38 | 23, 39 | 29 40 | ], 41 | "mlp_ratio": 4.0, 42 | "model_type": "internimage", 43 | "norm_layer": "LN", 44 | "num_classes": 1000, 45 | "offset_scale": 1.0, 46 | "post_norm": false, 47 | "remove_center": false, 48 | "res_post_norm": true, 49 | "torch_dtype": "float32", 50 | "transformers_version": "4.37.2", 51 | "use_clip_projector": true, 52 | "with_cp": false 53 | } 54 | -------------------------------------------------------------------------------- /classification/huggingface/in1k_model/internimage_h_22kto1k_640/preprocessor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "crop_size": 640, 3 | "do_center_crop": true, 4 | "do_normalize": true, 5 | "do_resize": true, 6 | "feature_extractor_type": "CLIPFeatureExtractor", 7 | "image_mean": [ 8 | 0.485, 9 | 0.456, 10 | 0.406 11 | ], 12 | "image_std": [ 13 | 0.229, 14 | 0.224, 15 | 0.225 16 | ], 17 | "resample": 3, 18 | "size": 640 19 | } 20 | -------------------------------------------------------------------------------- /classification/huggingface/in1k_model/internimage_l_22kto1k_384/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "OpenGVLab/internimage_l_22kto1k_384", 3 | "act_layer": "GELU", 4 | "architectures": [ 5 | "InternImageModel" 6 | ], 7 | "auto_map": { 8 | "AutoConfig": "configuration_internimage.InternImageConfig", 9 | "AutoModel": "modeling_internimage.InternImageModel", 10 | "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification" 11 | }, 12 | "center_feature_scale": false, 13 | "channels": 160, 14 | "cls_scale": 1.5, 15 | "core_op": "DCNv3", 16 | "depths": [ 17 | 5, 18 | 5, 19 | 22, 20 | 5 21 | ], 22 | "drop_path_rate": 0.0, 23 | "drop_path_type": "linear", 24 | "drop_rate": 0.0, 25 | "dw_kernel_size": null, 26 | "groups": [ 27 | 10, 28 | 20, 29 | 40, 30 | 80 31 | ], 32 | "layer_scale": 1e-05, 33 | "level2_post_norm": false, 34 | "level2_post_norm_block_ids": null, 35 | "mlp_ratio": 4.0, 36 | "model_type": "internimage", 37 | "norm_layer": "LN", 38 | "num_classes": 1000, 39 | "offset_scale": 2.0, 40 | "post_norm": true, 41 | "remove_center": false, 42 | "res_post_norm": false, 43 | "torch_dtype": "float32", 44 | "transformers_version": "4.37.2", 45 | "use_clip_projector": false, 46 | "with_cp": false 47 | } 48 | -------------------------------------------------------------------------------- /classification/huggingface/in1k_model/internimage_l_22kto1k_384/preprocessor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "crop_size": 384, 3 | "do_center_crop": true, 4 | "do_normalize": true, 5 | "do_resize": true, 6 | "feature_extractor_type": "CLIPFeatureExtractor", 7 | "image_mean": [ 8 | 0.485, 9 | 0.456, 10 | 0.406 11 | ], 12 | "image_std": [ 13 | 0.229, 14 | 0.224, 15 | 0.225 16 | ], 17 | "resample": 3, 18 | "size": 384 19 | } 20 | -------------------------------------------------------------------------------- /classification/huggingface/in1k_model/internimage_s_1k_224/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "OpenGVLab/internimage_s_1k_224", 3 | "act_layer": "GELU", 4 | "architectures": [ 5 | "InternImageModel" 6 | ], 7 | "auto_map": { 8 | "AutoConfig": "configuration_internimage.InternImageConfig", 9 | "AutoModel": "modeling_internimage.InternImageModel", 10 | "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification" 11 | }, 12 | "center_feature_scale": false, 13 | "channels": 80, 14 | "cls_scale": 1.5, 15 | "core_op": "DCNv3", 16 | "depths": [ 17 | 4, 18 | 4, 19 | 21, 20 | 4 21 | ], 22 | "drop_path_rate": 0.0, 23 | "drop_path_type": "linear", 24 | "drop_rate": 0.0, 25 | "dw_kernel_size": null, 26 | "groups": [ 27 | 5, 28 | 10, 29 | 20, 30 | 40 31 | ], 32 | "layer_scale": 1e-05, 33 | "level2_post_norm": false, 34 | "level2_post_norm_block_ids": null, 35 | "mlp_ratio": 4.0, 36 | "model_type": "internimage", 37 | "norm_layer": "LN", 38 | "num_classes": 1000, 39 | "offset_scale": 1.0, 40 | "post_norm": true, 41 | "remove_center": false, 42 | "res_post_norm": false, 43 | "torch_dtype": "float32", 44 | "transformers_version": "4.37.2", 45 | "use_clip_projector": false, 46 | "with_cp": false 47 | } 48 | -------------------------------------------------------------------------------- /classification/huggingface/in1k_model/internimage_s_1k_224/preprocessor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "crop_size": 224, 3 | "do_center_crop": true, 4 | "do_normalize": true, 5 | "do_resize": true, 6 | "feature_extractor_type": "CLIPFeatureExtractor", 7 | "image_mean": [ 8 | 0.485, 9 | 0.456, 10 | 0.406 11 | ], 12 | "image_std": [ 13 | 0.229, 14 | 0.224, 15 | 0.225 16 | ], 17 | "resample": 3, 18 | "size": 224 19 | } 20 | -------------------------------------------------------------------------------- /classification/huggingface/in1k_model/internimage_t_1k_224/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "OpenGVLab/internimage_t_1k_224", 3 | "act_layer": "GELU", 4 | "architectures": [ 5 | "InternImageModel" 6 | ], 7 | "auto_map": { 8 | "AutoConfig": "configuration_internimage.InternImageConfig", 9 | "AutoModel": "modeling_internimage.InternImageModel", 10 | "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification" 11 | }, 12 | "center_feature_scale": false, 13 | "channels": 64, 14 | "cls_scale": 1.5, 15 | "core_op": "DCNv3", 16 | "depths": [ 17 | 4, 18 | 4, 19 | 18, 20 | 4 21 | ], 22 | "drop_path_rate": 0.0, 23 | "drop_path_type": "linear", 24 | "drop_rate": 0.0, 25 | "dw_kernel_size": null, 26 | "groups": [ 27 | 4, 28 | 8, 29 | 16, 30 | 32 31 | ], 32 | "layer_scale": null, 33 | "level2_post_norm": false, 34 | "level2_post_norm_block_ids": null, 35 | "mlp_ratio": 4.0, 36 | "model_type": "internimage", 37 | "norm_layer": "LN", 38 | "num_classes": 1000, 39 | "offset_scale": 1.0, 40 | "post_norm": false, 41 | "remove_center": false, 42 | "res_post_norm": false, 43 | "torch_dtype": "float32", 44 | "transformers_version": "4.37.2", 45 | "use_clip_projector": false, 46 | "with_cp": false 47 | } 48 | -------------------------------------------------------------------------------- /classification/huggingface/in1k_model/internimage_t_1k_224/preprocessor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "crop_size": 224, 3 | "do_center_crop": true, 4 | "do_normalize": true, 5 | "do_resize": true, 6 | "feature_extractor_type": "CLIPFeatureExtractor", 7 | "image_mean": [ 8 | 0.485, 9 | 0.456, 10 | 0.406 11 | ], 12 | "image_std": [ 13 | 0.229, 14 | 0.224, 15 | 0.225 16 | ], 17 | "resample": 3, 18 | "size": 224 19 | } 20 | -------------------------------------------------------------------------------- /classification/huggingface/in1k_model/internimage_xl_22kto1k_384/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "OpenGVLab/internimage_xl_22kto1k_384", 3 | "act_layer": "GELU", 4 | "architectures": [ 5 | "InternImageModel" 6 | ], 7 | "auto_map": { 8 | "AutoConfig": "configuration_internimage.InternImageConfig", 9 | "AutoModel": "modeling_internimage.InternImageModel", 10 | "AutoModelForImageClassification": "modeling_internimage.InternImageModelForImageClassification" 11 | }, 12 | "center_feature_scale": false, 13 | "channels": 192, 14 | "cls_scale": 1.5, 15 | "core_op": "DCNv3", 16 | "depths": [ 17 | 5, 18 | 5, 19 | 24, 20 | 5 21 | ], 22 | "drop_path_rate": 0.0, 23 | "drop_path_type": "linear", 24 | "drop_rate": 0.0, 25 | "dw_kernel_size": null, 26 | "groups": [ 27 | 12, 28 | 24, 29 | 48, 30 | 96 31 | ], 32 | "layer_scale": 1e-05, 33 | "level2_post_norm": false, 34 | "level2_post_norm_block_ids": null, 35 | "mlp_ratio": 4.0, 36 | "model_type": "internimage", 37 | "norm_layer": "LN", 38 | "num_classes": 1000, 39 | "offset_scale": 2.0, 40 | "post_norm": true, 41 | "remove_center": false, 42 | "res_post_norm": false, 43 | "torch_dtype": "float32", 44 | "transformers_version": "4.37.2", 45 | "use_clip_projector": false, 46 | "with_cp": false 47 | } 48 | -------------------------------------------------------------------------------- /classification/huggingface/in1k_model/internimage_xl_22kto1k_384/preprocessor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "crop_size": 384, 3 | "do_center_crop": true, 4 | "do_normalize": true, 5 | "do_resize": true, 6 | "feature_extractor_type": "CLIPFeatureExtractor", 7 | "image_mean": [ 8 | 0.485, 9 | 0.456, 10 | 0.406 11 | ], 12 | "image_std": [ 13 | 0.229, 14 | 0.224, 15 | 0.225 16 | ], 17 | "resample": 3, 18 | "size": 384 19 | } 20 | -------------------------------------------------------------------------------- /classification/logger.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | import functools 8 | import logging 9 | import os 10 | import sys 11 | 12 | from termcolor import colored 13 | 14 | 15 | @functools.lru_cache() 16 | def create_logger(output_dir, dist_rank=0, name=''): 17 | # create logger 18 | logger = logging.getLogger(name) 19 | logger.setLevel(logging.DEBUG) 20 | logger.propagate = False 21 | 22 | # create formatter 23 | fmt = '[%(asctime)s %(name)s] (%(filename)s %(lineno)d): %(levelname)s %(message)s' 24 | color_fmt = colored('[%(asctime)s %(name)s]', 'green') + \ 25 | colored('(%(filename)s %(lineno)d)', 'yellow') + \ 26 | ': %(levelname)s %(message)s' 27 | 28 | # create console handlers for master process 29 | if dist_rank == 0: 30 | console_handler = logging.StreamHandler(sys.stdout) 31 | console_handler.setLevel(logging.DEBUG) 32 | console_handler.setFormatter( 33 | logging.Formatter(fmt=color_fmt, datefmt='%Y-%m-%d %H:%M:%S')) 34 | logger.addHandler(console_handler) 35 | 36 | # create file handlers 37 | file_handler = logging.FileHandler(os.path.join( 38 | output_dir, f'log_rank{dist_rank}.txt'), 39 | mode='a') 40 | file_handler.setLevel(logging.DEBUG) 41 | file_handler.setFormatter( 42 | logging.Formatter(fmt=fmt, datefmt='%Y-%m-%d %H:%M:%S')) 43 | logger.addHandler(file_handler) 44 | 45 | return logger 46 | -------------------------------------------------------------------------------- /classification/meta_data/train.txt.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/classification/meta_data/train.txt.zip -------------------------------------------------------------------------------- /classification/meta_data/val.txt.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/classification/meta_data/val.txt.zip -------------------------------------------------------------------------------- /classification/models/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .build import build_model 8 | -------------------------------------------------------------------------------- /classification/ops_dcnv3/functions/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .dcnv3_func import DCNv3Function, dcnv3_core_pytorch 8 | -------------------------------------------------------------------------------- /classification/ops_dcnv3/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -------------------------------------------------------- 3 | # InternImage 4 | # Copyright (c) 2022 OpenGVLab 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | python setup.py build install 9 | -------------------------------------------------------------------------------- /classification/ops_dcnv3/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .dcnv3 import DCNv3, DCNv3_pytorch 8 | -------------------------------------------------------------------------------- /classification/ops_dcnv3/src/cpu/dcnv3_cpu.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * InternImage 4 | * Copyright (c) 2022 OpenGVLab 5 | * Licensed under The MIT License [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 9 | ************************************************************************************************** 10 | */ 11 | 12 | #pragma once 13 | #include 14 | 15 | at::Tensor dcnv3_cpu_forward(const at::Tensor &input, const at::Tensor &offset, 16 | const at::Tensor &mask, const int kernel_h, 17 | const int kernel_w, const int stride_h, 18 | const int stride_w, const int pad_h, 19 | const int pad_w, const int dilation_h, 20 | const int dilation_w, const int group, 21 | const int group_channels, const float offset_scale, 22 | const int im2col_step); 23 | 24 | std::vector 25 | dcnv3_cpu_backward(const at::Tensor &input, const at::Tensor &offset, 26 | const at::Tensor &mask, const int kernel_h, 27 | const int kernel_w, const int stride_h, const int stride_w, 28 | const int pad_h, const int pad_w, const int dilation_h, 29 | const int dilation_w, const int group, 30 | const int group_channels, const float offset_scale, 31 | const at::Tensor &grad_output, const int im2col_step); 32 | -------------------------------------------------------------------------------- /classification/ops_dcnv3/src/vision.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * InternImage 4 | * Copyright (c) 2022 OpenGVLab 5 | * Licensed under The MIT License [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 9 | ************************************************************************************************** 10 | */ 11 | 12 | #include "dcnv3.h" 13 | 14 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 15 | m.def("dcnv3_forward", &dcnv3_forward, "dcnv3_forward"); 16 | m.def("dcnv3_backward", &dcnv3_backward, "dcnv3_backward"); 17 | } 18 | -------------------------------------------------------------------------------- /classification/train_in1k.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | GPUS=${GPUS:-8} 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 10 | CPUS_PER_TASK=${CPUS_PER_TASK:-12} 11 | SRUN_ARGS=${SRUN_ARGS:-""} 12 | 13 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 14 | srun -p ${PARTITION} \ 15 | --job-name=${JOB_NAME} \ 16 | --gres=gpu:${GPUS_PER_NODE} \ 17 | --ntasks=${GPUS} \ 18 | --ntasks-per-node=${GPUS_PER_NODE} \ 19 | --cpus-per-task=${CPUS_PER_TASK} \ 20 | --kill-on-bad-exit=1 \ 21 | --quotatype=reserved \ 22 | ${SRUN_ARGS} \ 23 | python -u main.py \ 24 | --cfg ${CONFIG} \ 25 | --accumulation-steps 1 \ 26 | --local-rank 0 \ 27 | --data-path data/imagenet \ 28 | --output work_dirs ${@:4} 29 | -------------------------------------------------------------------------------- /classification/train_in1k_deepspeed.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | GPUS=${GPUS:-8} 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 10 | CPUS_PER_TASK=${CPUS_PER_TASK:-12} 11 | SRUN_ARGS=${SRUN_ARGS:-""} 12 | 13 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 14 | srun -p ${PARTITION} \ 15 | --job-name=${JOB_NAME} \ 16 | --gres=gpu:${GPUS_PER_NODE} \ 17 | --ntasks=${GPUS} \ 18 | --ntasks-per-node=${GPUS_PER_NODE} \ 19 | --cpus-per-task=${CPUS_PER_TASK} \ 20 | --kill-on-bad-exit=1 \ 21 | --quotatype=spot \ 22 | ${SRUN_ARGS} \ 23 | python -u main_deepspeed.py \ 24 | --cfg ${CONFIG} \ 25 | --local-rank 0 \ 26 | --data-path data/imagenet \ 27 | --output work_dirs_deepspeed ${@:4} 28 | -------------------------------------------------------------------------------- /classification/train_inat18.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | GPUS=${GPUS:-8} 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 10 | CPUS_PER_TASK=${CPUS_PER_TASK:-12} 11 | SRUN_ARGS=${SRUN_ARGS:-""} 12 | 13 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 14 | srun -p ${PARTITION} \ 15 | --job-name=${JOB_NAME} \ 16 | --gres=gpu:${GPUS_PER_NODE} \ 17 | --ntasks=${GPUS} \ 18 | --ntasks-per-node=${GPUS_PER_NODE} \ 19 | --cpus-per-task=${CPUS_PER_TASK} \ 20 | --kill-on-bad-exit=1 \ 21 | --quotatype=reserved \ 22 | ${SRUN_ARGS} \ 23 | python -u main.py \ 24 | --cfg ${CONFIG} \ 25 | --accumulation-steps 1 \ 26 | --local-rank 0 \ 27 | --data-path data/inat2018 \ 28 | --output work_dirs ${@:4} 29 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/lvis_v0.5_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | _base_ = 'coco_instance.py' 3 | dataset_type = 'LVISV05Dataset' 4 | data_root = 'data/lvis_v0.5/' 5 | data = dict( 6 | samples_per_gpu=2, 7 | workers_per_gpu=2, 8 | train=dict( 9 | _delete_=True, 10 | type='ClassBalancedDataset', 11 | oversample_thr=1e-3, 12 | dataset=dict( 13 | type=dataset_type, 14 | ann_file=data_root + 'annotations/lvis_v0.5_train.json', 15 | img_prefix=data_root + 'train2017/')), 16 | val=dict( 17 | type=dataset_type, 18 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 19 | img_prefix=data_root + 'val2017/'), 20 | test=dict( 21 | type=dataset_type, 22 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 23 | img_prefix=data_root + 'val2017/')) 24 | evaluation = dict(metric=['bbox', 'segm']) 25 | -------------------------------------------------------------------------------- /detection/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable 3 | log_config = dict( 4 | interval=50, 5 | hooks=[ 6 | dict(type='TextLoggerHook'), 7 | # dict(type='TensorboardLoggerHook') 8 | ]) 9 | # yapf:enable 10 | custom_hooks = [dict(type='NumClassCheckHook')] 11 | 12 | dist_params = dict(backend='nccl') 13 | log_level = 'INFO' 14 | load_from = None 15 | resume_from = None 16 | workflow = [('train', 1)] 17 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_20e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 19]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=20) 12 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 22]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=24) 12 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_3x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[27, 33]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=36) 12 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_6x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=2000, 9 | warmup_ratio=0.001, 10 | step=[62, 68]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=72) 12 | -------------------------------------------------------------------------------- /detection/configs/lvis/README.md: -------------------------------------------------------------------------------- 1 | # LVIS 2 | 3 | ## Introduction 4 | 5 | LVIS is a dataset for long tail instance segmentation. It has annotations for over 1000 object categories in 164k images. 6 | 7 | ## Model Zoo 8 | 9 | ### DINO + CB-InternImage 10 | 11 | Here, we report the box AP on the minival set and the val set, respectively. 12 | 13 | | backbone | pretrain | minival (ss) | val (ss/ms) | #param | Config | Download | 14 | | :--------------: | :--------: | :----------: | :---------: | :----: | :-------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------: | 15 | | CB-InternImage-H | Objects365 | 65.8 | 62.3 / 63.2 | 2.18B | [config](./dino_4scale_cbinternimage_h_objects365_lvis_minival_ss.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_cbinternimage_h_objects365_lvis.pth) | 16 | -------------------------------------------------------------------------------- /detection/configs/openimages/README.md: -------------------------------------------------------------------------------- 1 | # OpenImages 2 | 3 | ## Introduction 4 | 5 | OpenImages V6 is a large-scale dataset , consists of 9 million training images, 41,620 validation samples, and 125,456 test samples. It is a partially annotated dataset, with 9,600 trainable classes. 6 | 7 | ## Model Zoo 8 | 9 | ### DINO + CB-InternImage 10 | 11 | | backbone | pretrain | mAP (ss) | #param | Config | Download | 12 | | :--------------: | :--------: | :------: | :----: | :-----------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------: | 13 | | CB-InternImage-H | Objects365 | 74.1 | 2.18B | [config](./dino_4scale_cbinternimage_h_objects365_openimages_ss.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/dino_4scale_cbinternimage_h_objects365_openimages.pth) | 14 | -------------------------------------------------------------------------------- /detection/deploy/configs/_base_/backends/tensorrt-fp16.py: -------------------------------------------------------------------------------- 1 | backend_config = dict( 2 | type='tensorrt', common_config=dict(fp16_mode=True, max_workspace_size=0)) 3 | -------------------------------------------------------------------------------- /detection/deploy/configs/_base_/backends/tensorrt.py: -------------------------------------------------------------------------------- 1 | backend_config = dict( 2 | type='tensorrt', common_config=dict(fp16_mode=False, max_workspace_size=0)) 3 | -------------------------------------------------------------------------------- /detection/deploy/configs/_base_/onnx_config.py: -------------------------------------------------------------------------------- 1 | onnx_config = dict( 2 | type='onnx', 3 | export_params=True, 4 | keep_initializers_as_inputs=False, 5 | opset_version=11, 6 | save_file='end2end.onnx', 7 | input_names=['input'], 8 | output_names=['output'], 9 | input_shape=None, 10 | optimize=True) 11 | -------------------------------------------------------------------------------- /detection/deploy/configs/mmdet/_base_/base_dynamic.py: -------------------------------------------------------------------------------- 1 | _base_ = ['./base_static.py'] 2 | onnx_config = dict( 3 | dynamic_axes={ 4 | 'input': { 5 | 0: 'batch', 6 | 2: 'height', 7 | 3: 'width' 8 | }, 9 | 'dets': { 10 | 0: 'batch', 11 | 1: 'num_dets', 12 | }, 13 | 'labels': { 14 | 0: 'batch', 15 | 1: 'num_dets', 16 | }, 17 | }, ) 18 | -------------------------------------------------------------------------------- /detection/deploy/configs/mmdet/_base_/base_instance-seg_dynamic.py: -------------------------------------------------------------------------------- 1 | _base_ = ['./base_instance-seg_static.py'] 2 | onnx_config = dict( 3 | dynamic_axes={ 4 | 'input': { 5 | 0: 'batch', 6 | 2: 'height', 7 | 3: 'width' 8 | }, 9 | 'dets': { 10 | 0: 'batch', 11 | 1: 'num_dets', 12 | }, 13 | 'labels': { 14 | 0: 'batch', 15 | 1: 'num_dets', 16 | }, 17 | 'masks': { 18 | 0: 'batch', 19 | 1: 'num_dets', 20 | 2: 'height', 21 | 3: 'width' 22 | }, 23 | }) 24 | -------------------------------------------------------------------------------- /detection/deploy/configs/mmdet/_base_/base_instance-seg_static.py: -------------------------------------------------------------------------------- 1 | _base_ = ['./base_static.py'] 2 | 3 | onnx_config = dict(output_names=['dets', 'labels', 'masks']) 4 | codebase_config = dict(post_processing=dict(export_postprocess_mask=False)) 5 | -------------------------------------------------------------------------------- /detection/deploy/configs/mmdet/_base_/base_static.py: -------------------------------------------------------------------------------- 1 | _base_ = ['../../_base_/onnx_config.py'] 2 | 3 | onnx_config = dict(output_names=['dets', 'labels'], input_shape=None) 4 | codebase_config = dict( 5 | type='mmdet', 6 | task='ObjectDetection', 7 | model_type='end2end', 8 | post_processing=dict( 9 | score_threshold=0.05, 10 | confidence_threshold=0.005, # for YOLOv3 11 | iou_threshold=0.5, 12 | max_output_boxes_per_class=200, 13 | pre_top_k=5000, 14 | keep_top_k=100, 15 | background_label_id=-1, 16 | )) 17 | -------------------------------------------------------------------------------- /detection/deploy/configs/mmdet/instance-seg/instance-seg_tensorrt_dynamic-320x320-1344x1344.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/base_instance-seg_dynamic.py', 3 | '../../_base_/backends/tensorrt.py' 4 | ] 5 | 6 | backend_config = dict( 7 | common_config=dict(max_workspace_size=1 << 30), 8 | model_inputs=[ 9 | dict( 10 | input_shapes=dict( 11 | input=dict( 12 | min_shape=[1, 3, 320, 320], 13 | opt_shape=[1, 3, 800, 1344], 14 | max_shape=[1, 3, 1344, 1344]))) 15 | ]) 16 | -------------------------------------------------------------------------------- /detection/deploy/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/detection/deploy/demo.jpg -------------------------------------------------------------------------------- /detection/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29511} 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 10 | -------------------------------------------------------------------------------- /detection/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-29500} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=63667 \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /detection/mmdet_custom/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .datasets import * 8 | from .models import * # noqa: F401,F403 9 | -------------------------------------------------------------------------------- /detection/mmdet_custom/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .crowd_human import CrowdHumanDataset 8 | -------------------------------------------------------------------------------- /detection/mmdet_custom/models/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .backbones import * # noqa: F401,F403 8 | from .dense_heads import * # noqa: F401,F403 9 | from .detectors import * # noqa: F401,F403 10 | from .necks import * # noqa: F401,F403 11 | from .utils import * # noqa: F401,F403 12 | -------------------------------------------------------------------------------- /detection/mmdet_custom/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .cbnet import CBInternImage 8 | from .intern_image import InternImage 9 | 10 | __all__ = ['InternImage', 'CBInternImage'] 11 | -------------------------------------------------------------------------------- /detection/mmdet_custom/models/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .cbdino_head import CBDINOHead 8 | from .deformable_detr_head import DeformableDETRHead 9 | from .detr_head import DETRHead 10 | from .dino_head import DINOHead 11 | 12 | __all__ = ['DeformableDETRHead', 'DETRHead', 'DINOHead', 'CBDINOHead'] 13 | -------------------------------------------------------------------------------- /detection/mmdet_custom/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .cbnet_dino import CBDINO 8 | from .dino import DINO 9 | 10 | __all__ = ['DINO', 'CBDINO'] 11 | -------------------------------------------------------------------------------- /detection/mmdet_custom/models/detectors/dino.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.models.builder import DETECTORS 3 | from mmdet.models.detectors.detr import DETR 4 | 5 | 6 | @DETECTORS.register_module() 7 | class DINO(DETR): 8 | 9 | def __init__(self, *args, **kwargs): 10 | super(DETR, self).__init__(*args, **kwargs) 11 | -------------------------------------------------------------------------------- /detection/mmdet_custom/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .cbnet_channel_mapper import CBChannelMapper 2 | 3 | __all__ = ['CBChannelMapper'] 4 | -------------------------------------------------------------------------------- /detection/mmdet_custom/models/necks/cbnet_channel_mapper.py: -------------------------------------------------------------------------------- 1 | from mmdet.models.builder import NECKS 2 | from mmdet.models.necks import ChannelMapper 3 | 4 | 5 | @NECKS.register_module() 6 | class CBChannelMapper(ChannelMapper): 7 | 8 | def __init__(self, cb_idx=1, **kwargs): 9 | super(CBChannelMapper, self).__init__(**kwargs) 10 | self.cb_idx = cb_idx 11 | 12 | def forward(self, inputs): 13 | if not isinstance(inputs[0], (list, tuple)): 14 | inputs = [inputs] 15 | 16 | if self.training: 17 | outs = [] 18 | # from IPython import embed; embed() 19 | for x in inputs: 20 | out = super().forward(x) 21 | outs.append(out) 22 | return outs 23 | else: 24 | out = super().forward(inputs[self.cb_idx]) 25 | return out 26 | -------------------------------------------------------------------------------- /detection/mmdet_custom/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .query_denoising import build_dn_generator 2 | from .transformer import DinoTransformer, DinoTransformerDecoder 3 | 4 | __all__ = ['build_dn_generator', 'DinoTransformer', 'DinoTransformerDecoder'] 5 | -------------------------------------------------------------------------------- /detection/ops_dcnv3/functions/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .dcnv3_func import DCNv3Function, dcnv3_core_pytorch 8 | -------------------------------------------------------------------------------- /detection/ops_dcnv3/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -------------------------------------------------------- 3 | # InternImage 4 | # Copyright (c) 2022 OpenGVLab 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | python setup.py build install 9 | -------------------------------------------------------------------------------- /detection/ops_dcnv3/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .dcnv3 import DCNv3, DCNv3_pytorch 8 | -------------------------------------------------------------------------------- /detection/ops_dcnv3/src/cpu/dcnv3_cpu.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * InternImage 4 | * Copyright (c) 2022 OpenGVLab 5 | * Licensed under The MIT License [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 9 | ************************************************************************************************** 10 | */ 11 | 12 | #pragma once 13 | #include 14 | 15 | at::Tensor dcnv3_cpu_forward(const at::Tensor &input, const at::Tensor &offset, 16 | const at::Tensor &mask, const int kernel_h, 17 | const int kernel_w, const int stride_h, 18 | const int stride_w, const int pad_h, 19 | const int pad_w, const int dilation_h, 20 | const int dilation_w, const int group, 21 | const int group_channels, const float offset_scale, 22 | const int im2col_step); 23 | 24 | std::vector 25 | dcnv3_cpu_backward(const at::Tensor &input, const at::Tensor &offset, 26 | const at::Tensor &mask, const int kernel_h, 27 | const int kernel_w, const int stride_h, const int stride_w, 28 | const int pad_h, const int pad_w, const int dilation_h, 29 | const int dilation_w, const int group, 30 | const int group_channels, const float offset_scale, 31 | const at::Tensor &grad_output, const int im2col_step); 32 | -------------------------------------------------------------------------------- /detection/ops_dcnv3/src/cuda/dcnv3_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * InternImage 4 | * Copyright (c) 2022 OpenGVLab 5 | * Licensed under The MIT License [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 9 | ************************************************************************************************** 10 | */ 11 | 12 | #pragma once 13 | #include 14 | 15 | at::Tensor dcnv3_cuda_forward(const at::Tensor &input, const at::Tensor &offset, 16 | const at::Tensor &mask, const int kernel_h, 17 | const int kernel_w, const int stride_h, 18 | const int stride_w, const int pad_h, 19 | const int pad_w, const int dilation_h, 20 | const int dilation_w, const int group, 21 | const int group_channels, 22 | const float offset_scale, const int im2col_step); 23 | 24 | std::vector 25 | dcnv3_cuda_backward(const at::Tensor &input, const at::Tensor &offset, 26 | const at::Tensor &mask, const int kernel_h, 27 | const int kernel_w, const int stride_h, const int stride_w, 28 | const int pad_h, const int pad_w, const int dilation_h, 29 | const int dilation_w, const int group, 30 | const int group_channels, const float offset_scale, 31 | const at::Tensor &grad_output, const int im2col_step); 32 | -------------------------------------------------------------------------------- /detection/ops_dcnv3/src/vision.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * InternImage 4 | * Copyright (c) 2022 OpenGVLab 5 | * Licensed under The MIT License [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 9 | ************************************************************************************************** 10 | */ 11 | 12 | #include "dcnv3.h" 13 | 14 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 15 | m.def("dcnv3_forward", &dcnv3_forward, "dcnv3_forward"); 16 | m.def("dcnv3_backward", &dcnv3_backward, "dcnv3_backward"); 17 | } 18 | -------------------------------------------------------------------------------- /detection/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | --quotatype=spot \ 24 | ${SRUN_ARGS} \ 25 | python -u test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 26 | -------------------------------------------------------------------------------- /detection/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-10} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${@:5} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | --quotatype=spot \ 24 | ${SRUN_ARGS} \ 25 | python -u train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 26 | -------------------------------------------------------------------------------- /detection/tools/evaluate/__init__.py: -------------------------------------------------------------------------------- 1 | from .compute_APMR import compute_APMR 2 | from .compute_JI import compute_JI_with_ignore 3 | -------------------------------------------------------------------------------- /docs/figs/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/docs/figs/arch.png -------------------------------------------------------------------------------- /docs/figs/intern_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/docs/figs/intern_pipeline.png -------------------------------------------------------------------------------- /docs/figs/intern_pipeline_en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/docs/figs/intern_pipeline_en.png -------------------------------------------------------------------------------- /docs/figs/log.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/docs/figs/log.png -------------------------------------------------------------------------------- /docs/figs/network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/docs/figs/network.png -------------------------------------------------------------------------------- /segmentation/configs/_base_/datasets/cityscapes_1024x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes.py' 2 | img_norm_cfg = dict( 3 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 4 | crop_size = (1024, 1024) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), 9 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 10 | dict(type='RandomFlip', prob=0.5), 11 | dict(type='PhotoMetricDistortion'), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2048, 1024), 22 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 23 | flip=False, 24 | transforms=[ 25 | dict(type='Resize', keep_ratio=True), 26 | dict(type='RandomFlip'), 27 | dict(type='Normalize', **img_norm_cfg), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | train=dict(pipeline=train_pipeline), 34 | val=dict(pipeline=test_pipeline), 35 | test=dict(pipeline=test_pipeline)) 36 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/datasets/cityscapes_extra_1024x1024.py: -------------------------------------------------------------------------------- 1 | _base_ = './cityscapes_extra.py' 2 | img_norm_cfg = dict( 3 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 4 | crop_size = (1024, 1024) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), 9 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 10 | dict(type='RandomFlip', prob=0.5), 11 | dict(type='PhotoMetricDistortion'), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2048, 1024), 22 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 23 | flip=False, 24 | transforms=[ 25 | dict(type='Resize', keep_ratio=True), 26 | dict(type='RandomFlip'), 27 | dict(type='Normalize', **img_norm_cfg), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | train=dict(pipeline=train_pipeline), 34 | val=dict(pipeline=test_pipeline), 35 | test=dict(pipeline=test_pipeline)) 36 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/datasets/pascal_voc12_aug.py: -------------------------------------------------------------------------------- 1 | _base_ = './pascal_voc12.py' 2 | # dataset settings 3 | data = dict( 4 | train=dict( 5 | ann_dir=['SegmentationClass', 'SegmentationClassAug'], 6 | split=[ 7 | 'ImageSets/Segmentation/train.txt', 8 | 'ImageSets/Segmentation/aug.txt' 9 | ])) 10 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/datasets/potsdam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/segmentation/configs/_base_/datasets/potsdam.py -------------------------------------------------------------------------------- /segmentation/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | # yapf:disable 2 | log_config = dict( 3 | interval=50, 4 | hooks=[ 5 | dict(type='TextLoggerHook', by_epoch=False), 6 | # dict(type='TensorboardLoggerHook') 7 | ]) 8 | # yapf:enable 9 | dist_params = dict(backend='nccl') 10 | log_level = 'INFO' 11 | load_from = None 12 | resume_from = None 13 | workflow = [('train', 1)] 14 | cudnn_benchmark = True 15 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/segformer_mit-b0.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained=None, 6 | backbone=dict( 7 | type='MixVisionTransformer', 8 | in_channels=3, 9 | embed_dims=32, 10 | num_stages=4, 11 | num_layers=[2, 2, 2, 2], 12 | num_heads=[1, 2, 5, 8], 13 | patch_sizes=[7, 3, 3, 3], 14 | sr_ratios=[8, 4, 2, 1], 15 | out_indices=(0, 1, 2, 3), 16 | mlp_ratio=4, 17 | qkv_bias=True, 18 | drop_rate=0.0, 19 | attn_drop_rate=0.0, 20 | drop_path_rate=0.1), 21 | decode_head=dict( 22 | type='SegformerHead', 23 | in_channels=[32, 64, 160, 256], 24 | in_index=[0, 1, 2, 3], 25 | channels=256, 26 | dropout_ratio=0.1, 27 | num_classes=19, 28 | norm_cfg=norm_cfg, 29 | align_corners=False, 30 | loss_decode=dict( 31 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 32 | # model training and testing settings 33 | train_cfg=dict(), 34 | test_cfg=dict(mode='whole')) 35 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/upernet_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 1, 1), 12 | strides=(1, 2, 2, 2), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='UPerHead', 19 | in_channels=[256, 512, 1024, 2048], 20 | in_index=[0, 1, 2, 3], 21 | pool_scales=(1, 2, 3, 6), 22 | channels=512, 23 | dropout_ratio=0.1, 24 | num_classes=19, 25 | norm_cfg=norm_cfg, 26 | align_corners=False, 27 | loss_decode=dict( 28 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 29 | auxiliary_head=dict( 30 | type='FCNHead', 31 | in_channels=1024, 32 | in_index=2, 33 | channels=256, 34 | num_convs=1, 35 | concat_input=False, 36 | dropout_ratio=0.1, 37 | num_classes=19, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict( 41 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_160k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=160000) 8 | checkpoint_config = dict(by_epoch=False, interval=16000) 9 | evaluation = dict(interval=16000, metric='mIoU', pre_eval=True) 10 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_20k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=20000) 8 | checkpoint_config = dict(by_epoch=False, interval=2000) 9 | evaluation = dict(interval=2000, metric='mIoU', pre_eval=True) 10 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_320k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=320000) 8 | checkpoint_config = dict(by_epoch=False, interval=32000) 9 | evaluation = dict(interval=32000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_40k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=40000) 8 | checkpoint_config = dict(by_epoch=False, interval=4000) 9 | evaluation = dict(interval=4000, metric='mIoU', pre_eval=True) 10 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_80k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=80000) 8 | checkpoint_config = dict(by_epoch=False, interval=8000) 9 | evaluation = dict(interval=8000, metric='mIoU', pre_eval=True) 10 | -------------------------------------------------------------------------------- /segmentation/configs/coco_stuff10k/README.md: -------------------------------------------------------------------------------- 1 | # COCO-Stuff-10K 2 | 3 | 4 | 5 | ## Introduction 6 | 7 | COCO-Stuff-10K is a dataset designed to enhance scene understanding tasks in computer vision by providing pixel-level annotations for both "things" (discrete objects with well-defined shapes, like cars and people) and "stuff" (amorphous background regions, such as grass and sky). This dataset augments 10,000 images from the original COCO dataset, offering detailed labels across 182 classes—91 "thing" classes and 91 "stuff" classes. 8 | 9 | ## Model Zoo 10 | 11 | ### Mask2Former + InternImage 12 | 13 | | backbone | resolution | mIoU (ss/ms) | #param | FLOPs | Config | Download | 14 | | :-----------: | :--------: | :----------: | :----: | :---: | :-------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | 15 | | InternImage-H | 512x512 | 59.2 / 59.6 | 1.28B | 1528G | [config](./mask2former_internimage_h_512_40k_cocostuff164k_to_10k.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask2former_internimage_h_512_40k_cocostuff164k_to_10k.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/mask2former_internimage_h_512_40k_cocostuff164k_to_10k.log.json) | 16 | -------------------------------------------------------------------------------- /segmentation/configs/nyu_depth_v2/README.md: -------------------------------------------------------------------------------- 1 | # NYU-Depth-V2 2 | 3 | 4 | 5 | ## Introduction 6 | 7 | The NYU Depth V2 dataset is a comprehensive collection of indoor scene data captured using a Microsoft Kinect device. It is widely utilized in computer vision research, particularly for tasks such as depth estimation and semantic segmentation. 8 | 9 | ## Model Zoo 10 | 11 | ### Mask2Former + InternImage 12 | 13 | | backbone | resolution | mIoU (ss/ms) | #param | FLOPs | Config | Download | 14 | | :-----------: | :--------: | :----------: | :----: | :---: | :--------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | 15 | | InternImage-H | 480x480 | 67.1 / 68.1 | 1.07B | 867G | [config](./mask2former_internimage_h_480_40k_nyu.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask2former_internimage_h_480_40k_nyu.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/mask2former_internimage_h_480_40k_nyu.log.json) | 16 | -------------------------------------------------------------------------------- /segmentation/configs/pascal_context/README.md: -------------------------------------------------------------------------------- 1 | # Pascal Context 59 2 | 3 | 4 | 5 | ## Introduction 6 | 7 | The PASCAL Context dataset is an extension of the PASCAL VOC 2010 dataset, providing comprehensive pixel-wise annotations for over 400 classes, including the original 20 object categories and additional background classes. Due to the sparsity of many object categories, a subset of the 59 most frequent classes is commonly used for tasks like semantic segmentation. 8 | 9 | ## Model Zoo 10 | 11 | ### Mask2Former + InternImage 12 | 13 | | backbone | resolution | mIoU (ss/ms) | #param | FLOPs | Config | Download | 14 | | :-----------: | :--------: | :----------: | :----: | :---: | :----------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | 15 | | InternImage-H | 480x480 | 69.7 / 70.3 | 1.07B | 867G | [config](./mask2former_internimage_h_480_40k_pascal_context_59.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask2former_internimage_h_480_40k_pascal_context_59.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/mask2former_internimage_h_480_40k_pascal_context_59.log.json) | 16 | -------------------------------------------------------------------------------- /segmentation/deploy/configs/_base_/backends/tensorrt.py: -------------------------------------------------------------------------------- 1 | backend_config = dict( 2 | type='tensorrt', common_config=dict(fp16_mode=False, max_workspace_size=0)) 3 | -------------------------------------------------------------------------------- /segmentation/deploy/configs/_base_/onnx_config.py: -------------------------------------------------------------------------------- 1 | onnx_config = dict( 2 | type='onnx', 3 | export_params=True, 4 | keep_initializers_as_inputs=False, 5 | opset_version=11, 6 | save_file='end2end.onnx', 7 | input_names=['input'], 8 | output_names=['output'], 9 | input_shape=None, 10 | optimize=True) 11 | -------------------------------------------------------------------------------- /segmentation/deploy/configs/mmseg/segmentation_static.py: -------------------------------------------------------------------------------- 1 | _base_ = ['../_base_/onnx_config.py'] 2 | codebase_config = dict(type='mmseg', task='Segmentation', with_argmax=True) 3 | -------------------------------------------------------------------------------- /segmentation/deploy/configs/mmseg/segmentation_tensorrt_static-512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = ['./segmentation_static.py', '../_base_/backends/tensorrt.py'] 2 | 3 | onnx_config = dict(input_shape=[512, 512]) 4 | backend_config = dict( 5 | common_config=dict(max_workspace_size=1 << 30), 6 | model_inputs=[ 7 | dict( 8 | input_shapes=dict( 9 | input=dict( 10 | min_shape=[1, 3, 512, 512], 11 | opt_shape=[1, 3, 512, 512], 12 | max_shape=[1, 3, 512, 512]))) 13 | ]) 14 | -------------------------------------------------------------------------------- /segmentation/deploy/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/InternImage/31c962dc6c1ceb23e580772f7daaa6944694fbe6/segmentation/deploy/demo.png -------------------------------------------------------------------------------- /segmentation/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29510} 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 10 | -------------------------------------------------------------------------------- /segmentation/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-29300} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /segmentation/mmcv_custom/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | # -*- coding: utf-8 -*- 8 | from .custom_layer_decay_optimizer_constructor import \ 9 | CustomLayerDecayOptimizerConstructor 10 | 11 | __all__ = ['CustomLayerDecayOptimizerConstructor',] 12 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .core import * # noqa: F401,F403 8 | from .datasets import * # noqa: F401,F403 9 | from .models import * # noqa: F401,F403 10 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Shanghai AI Lab. All rights reserved. 2 | from mmseg.core.evaluation import * # noqa: F401, F403 3 | from mmseg.core.seg import * # noqa: F401, F403 4 | 5 | from .anchor import * # noqa: F401,F403 6 | from .box import * # noqa: F401,F403 7 | from .evaluation import * # noqa: F401,F403 8 | from .mask import * # noqa: F401,F403 9 | from .utils import * # noqa: F401, F403 10 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Shanghai AI Lab. All rights reserved. 2 | from .point_generator import MlvlPointGenerator # noqa: F401,F403 3 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/core/anchor/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings 3 | 4 | from mmcv.utils import Registry, build_from_cfg 5 | 6 | PRIOR_GENERATORS = Registry('Generator for anchors and points') 7 | 8 | ANCHOR_GENERATORS = PRIOR_GENERATORS 9 | 10 | 11 | def build_prior_generator(cfg, default_args=None): 12 | return build_from_cfg(cfg, PRIOR_GENERATORS, default_args) 13 | 14 | 15 | def build_anchor_generator(cfg, default_args=None): 16 | warnings.warn( 17 | '``build_anchor_generator`` would be deprecated soon, please use ' 18 | '``build_prior_generator`` ') 19 | return build_prior_generator(cfg, default_args=default_args) 20 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/core/box/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Shanghai AI Lab. All rights reserved. 2 | from .builder import * # noqa: F401,F403 3 | from .samplers import MaskPseudoSampler # noqa: F401,F403 4 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/core/box/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import Registry, build_from_cfg 3 | 4 | BBOX_SAMPLERS = Registry('bbox_sampler') 5 | BBOX_CODERS = Registry('bbox_coder') 6 | 7 | 8 | def build_sampler(cfg, **default_args): 9 | """Builder of box sampler.""" 10 | return build_from_cfg(cfg, BBOX_SAMPLERS, default_args) 11 | 12 | 13 | def build_bbox_coder(cfg, **default_args): 14 | """Builder of box coder.""" 15 | return build_from_cfg(cfg, BBOX_CODERS, default_args) 16 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/core/box/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Shanghai AI Lab. All rights reserved. 2 | from .mask_pseudo_sampler import MaskPseudoSampler # noqa: F401,F403 3 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Shanghai AI Lab. All rights reserved. 2 | from .panoptic_utils import INSTANCE_OFFSET # noqa: F401,F403 3 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/core/evaluation/panoptic_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # A custom value to distinguish instance ID and category ID; need to 3 | # be greater than the number of categories. 4 | # For a pixel in the panoptic result map: 5 | # pan_id = ins_id * INSTANCE_OFFSET + cat_id 6 | INSTANCE_OFFSET = 1000 7 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Shanghai AI Lab. All rights reserved. 2 | from .utils import mask2bbox # noqa: F401,F403 3 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .dist_utils import (DistOptimizerHook, all_reduce_dict, allreduce_grads, 3 | reduce_mean) 4 | from .misc import add_prefix, multi_apply 5 | 6 | __all__ = [ 7 | 'add_prefix', 'multi_apply', 'DistOptimizerHook', 'allreduce_grads', 8 | 'all_reduce_dict', 'reduce_mean' 9 | ] 10 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | def multi_apply(func, *args, **kwargs): 3 | """Apply function to a list of arguments. 4 | 5 | Note: 6 | This function applies the ``func`` to multiple inputs and 7 | map the multiple outputs of the ``func`` into different 8 | list. Each list contains the same type of outputs corresponding 9 | to different inputs. 10 | 11 | Args: 12 | func (Function): A function that will be applied to a list of 13 | arguments 14 | 15 | Returns: 16 | tuple(list): A tuple containing multiple list, each list contains \ 17 | a kind of returned results by the function 18 | """ 19 | pfunc = partial(func, **kwargs) if kwargs else func 20 | map_results = map(pfunc, *args) 21 | return tuple(map(list, zip(*map_results))) 22 | 23 | 24 | def add_prefix(inputs, prefix): 25 | """Add prefix for dict. 26 | 27 | Args: 28 | inputs (dict): The input dict with str keys. 29 | prefix (str): The prefix to add. 30 | 31 | Returns: 32 | 33 | dict: The dict with keys updated with ``prefix``. 34 | """ 35 | 36 | outputs = dict() 37 | for name, value in inputs.items(): 38 | outputs[f'{prefix}.{name}'] = value 39 | 40 | return outputs 41 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .dataset_wrappers import ConcatDataset 3 | from .mapillary import MapillaryDataset # noqa: F401,F403 4 | from .nyu_depth_v2 import NYUDepthV2Dataset # noqa: F401,F403 5 | from .pipelines import * # noqa: F401,F403 6 | 7 | __all__ = [ 8 | 'MapillaryDataset', 'NYUDepthV2Dataset', 'ConcatDataset' 9 | ] 10 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .formatting import DefaultFormatBundle, ToMask 3 | from .transform import MapillaryHack, PadShortSide, SETR_Resize 4 | 5 | __all__ = [ 6 | 'DefaultFormatBundle', 'ToMask', 'SETR_Resize', 7 | 'PadShortSide', 'MapillaryHack' 8 | ] 9 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/models/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .backbones import * # noqa: F401,F403 8 | from .decode_heads import * # noqa: F401,F403 9 | from .losses import * # noqa: F401,F403 10 | from .plugins import * # noqa: F401,F403 11 | from .segmentors import * # noqa: F401,F403 12 | from .utils import * # noqa: F401,F403 13 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .intern_image import InternImage 8 | 9 | __all__ = ['InternImage'] 10 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/models/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings # noqa: F401,F403 3 | 4 | from mmcv.utils import Registry 5 | 6 | TRANSFORMER = Registry('Transformer') 7 | MASK_ASSIGNERS = Registry('mask_assigner') 8 | MATCH_COST = Registry('match_cost') 9 | 10 | 11 | def build_match_cost(cfg): 12 | """Build Match Cost.""" 13 | return MATCH_COST.build(cfg) 14 | 15 | 16 | def build_assigner(cfg): 17 | """Build Assigner.""" 18 | return MASK_ASSIGNERS.build(cfg) 19 | 20 | 21 | def build_transformer(cfg): 22 | """Build Transformer.""" 23 | return TRANSFORMER.build(cfg) 24 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/models/decode_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .mask2former_head import Mask2FormerHead 3 | from .maskformer_head import MaskFormerHead 4 | 5 | __all__ = [ 6 | 'MaskFormerHead', 7 | 'Mask2FormerHead', 8 | ] 9 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, 3 | cross_entropy, mask_cross_entropy) 4 | from .dice_loss import DiceLoss 5 | from .focal_loss import FocalLoss 6 | from .match_costs import (ClassificationCost, CrossEntropyLossCost, DiceCost, 7 | MaskFocalLossCost) 8 | 9 | __all__ = [ 10 | 'cross_entropy', 'binary_cross_entropy', 'mask_cross_entropy', 11 | 'CrossEntropyLoss', 'DiceLoss', 'FocalLoss', 'ClassificationCost', 12 | 'MaskFocalLossCost', 'DiceCost', 'CrossEntropyLossCost' 13 | ] 14 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/models/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Shanghai AI Lab. All rights reserved. 2 | from .msdeformattn_pixel_decoder import MSDeformAttnPixelDecoder 3 | from .pixel_decoder import PixelDecoder, TransformerEncoderPixelDecoder 4 | 5 | __all__ = [ 6 | 'PixelDecoder', 'TransformerEncoderPixelDecoder', 7 | 'MSDeformAttnPixelDecoder' 8 | ] 9 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/models/segmentors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .encoder_decoder_mask2former import EncoderDecoderMask2Former 3 | from .encoder_decoder_mask2former_aug import EncoderDecoderMask2FormerAug 4 | 5 | __all__ = ['EncoderDecoderMask2Former', 'EncoderDecoderMask2FormerAug'] 6 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Shanghai AI Lab. All rights reserved. 2 | from .assigner import MaskHungarianAssigner 3 | from .point_sample import get_uncertain_point_coords_with_randomness 4 | from .positional_encoding import (LearnedPositionalEncoding, 5 | SinePositionalEncoding) 6 | from .transformer import (DetrTransformerDecoder, DetrTransformerDecoderLayer, 7 | DynamicConv, Transformer) 8 | 9 | __all__ = [ 10 | 'DetrTransformerDecoderLayer', 'DetrTransformerDecoder', 'DynamicConv', 11 | 'Transformer', 'LearnedPositionalEncoding', 'SinePositionalEncoding', 12 | 'MaskHungarianAssigner', 'get_uncertain_point_coords_with_randomness' 13 | ] 14 | -------------------------------------------------------------------------------- /segmentation/ops_dcnv3/functions/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .dcnv3_func import DCNv3Function, dcnv3_core_pytorch 8 | -------------------------------------------------------------------------------- /segmentation/ops_dcnv3/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -------------------------------------------------------- 3 | # InternImage 4 | # Copyright (c) 2022 OpenGVLab 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | python setup.py build install 9 | -------------------------------------------------------------------------------- /segmentation/ops_dcnv3/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # InternImage 3 | # Copyright (c) 2022 OpenGVLab 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # -------------------------------------------------------- 6 | 7 | from .dcnv3 import DCNv3, DCNv3_pytorch 8 | -------------------------------------------------------------------------------- /segmentation/ops_dcnv3/src/cpu/dcnv3_cpu.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * InternImage 4 | * Copyright (c) 2022 OpenGVLab 5 | * Licensed under The MIT License [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 9 | ************************************************************************************************** 10 | */ 11 | 12 | #pragma once 13 | #include 14 | 15 | at::Tensor dcnv3_cpu_forward(const at::Tensor &input, const at::Tensor &offset, 16 | const at::Tensor &mask, const int kernel_h, 17 | const int kernel_w, const int stride_h, 18 | const int stride_w, const int pad_h, 19 | const int pad_w, const int dilation_h, 20 | const int dilation_w, const int group, 21 | const int group_channels, const float offset_scale, 22 | const int im2col_step); 23 | 24 | std::vector 25 | dcnv3_cpu_backward(const at::Tensor &input, const at::Tensor &offset, 26 | const at::Tensor &mask, const int kernel_h, 27 | const int kernel_w, const int stride_h, const int stride_w, 28 | const int pad_h, const int pad_w, const int dilation_h, 29 | const int dilation_w, const int group, 30 | const int group_channels, const float offset_scale, 31 | const at::Tensor &grad_output, const int im2col_step); 32 | -------------------------------------------------------------------------------- /segmentation/ops_dcnv3/src/vision.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * InternImage 4 | * Copyright (c) 2022 OpenGVLab 5 | * Licensed under The MIT License [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from 8 | *https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 9 | ************************************************************************************************** 10 | */ 11 | 12 | #include "dcnv3.h" 13 | 14 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 15 | m.def("dcnv3_forward", &dcnv3_forward, "dcnv3_forward"); 16 | m.def("dcnv3_backward", &dcnv3_backward, "dcnv3_backward"); 17 | } 18 | -------------------------------------------------------------------------------- /segmentation/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | --quotatype=auto \ 24 | ${SRUN_ARGS} \ 25 | python -u test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 26 | -------------------------------------------------------------------------------- /segmentation/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | GPUS=${GPUS:-8} 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 10 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 11 | SRUN_ARGS=${SRUN_ARGS:-""} 12 | PY_ARGS=${@:4} 13 | 14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --quotatype=spot \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u train.py ${CONFIG} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /tensorrt/modulated_deform_conv_v3/trt_deform_conv_v3_kernel.hpp: -------------------------------------------------------------------------------- 1 | #ifndef TRT_DEFORM_CONV_V3_KERNEL_HPP 2 | #define TRT_DEFORM_CONV_V3_KERNEL_HPP 3 | #include 4 | 5 | #include "common_cuda_helper.hpp" 6 | 7 | template 8 | void DeformConvv3ForwardCUDAKernelLauncher(const scalar_t* input, const scalar_t* offset, 9 | const scalar_t* mask, scalar_t* output, void* workspace, 10 | int batch, int channels, int height, int width, 11 | int channels_out, int kernel_w, int kernel_h, 12 | int stride_w, int stride_h, int pad_w, int pad_h, 13 | int dilation_w, int dilation_h, int group, 14 | int group_channel, float offset_scale, int im2col_step, 15 | cudaStream_t stream); 16 | 17 | #endif // TRT_DEFORM_CONV_V3_KERNEL_HPP 18 | --------------------------------------------------------------------------------