├── .gitignore ├── LICENSE ├── configs ├── Base-YOLOF.yaml ├── Base-YOLOv7.yaml ├── canaries │ ├── detrt_256_6_6_regnetx_0.4g.yaml │ ├── detrt_256_6_6_torchvision.yaml │ ├── regnetx_0.2g.yaml │ ├── yolomask_2gpu.yaml │ └── yolomask_m_8gpu.yaml ├── coco-instance │ ├── solov2_lite.yaml │ ├── yolomask.yaml │ └── yolomask_8gpu.yaml ├── coco-keypoints │ └── yolox_kpts.yaml ├── coco │ ├── cspdarknet53.yaml │ ├── darknet53.yaml │ ├── detr │ │ ├── anchordetr_origin.yaml │ │ ├── anchordetr_origin_bs64.yaml │ │ ├── d2go │ │ │ ├── detr_bs16.yaml │ │ │ ├── detr_fbv3_bs16.yaml │ │ │ ├── smca_bs16.yaml │ │ │ ├── smca_bs64.yaml │ │ │ ├── smca_fbv3.yaml │ │ │ └── smca_regnetx_0.4g.yaml │ │ ├── detr_256_6_6_regnetx_0.4g.yaml │ │ ├── detr_256_6_6_torchvision.yaml │ │ ├── detr_256_6_6_torchvision_mask.yaml │ │ └── smcadetr_origin.yaml │ ├── pvt_v2_b0.yaml │ ├── r2_50.yaml │ ├── r2_50_l.yaml │ ├── r2next_50.yaml │ ├── r50.yaml │ ├── regnetx_0.4g.yaml │ ├── sparseinst │ │ ├── Base-SparseInst.yaml │ │ ├── sparse_inst_r50_base.yaml │ │ ├── sparse_inst_r50_dcn_giam_aug.yaml │ │ ├── sparse_inst_r50_giam.yaml │ │ ├── sparse_inst_r50_giam_aug.yaml │ │ ├── sparse_inst_r50vd_base.yaml │ │ ├── sparse_inst_r50vd_dcn_giam.yaml │ │ ├── sparse_inst_r50vd_dcn_giam_aug.yaml │ │ ├── sparse_inst_r50vd_giam.yaml │ │ └── sparse_inst_r50vd_giam_aug.yaml │ ├── swin_s.yaml │ ├── swin_t.yaml │ ├── yolof │ │ ├── yolof_CSP_D_53_DC5_3x.yaml │ │ └── yolof_R_50_DC5_1x.yaml │ ├── yolov6 │ │ ├── yolov6_m.yaml │ │ ├── yolov6_s.yaml │ │ └── yolov6_tiny.yaml │ ├── yolox │ │ └── yolox_convnext.yaml │ ├── yolox_regnetx_s.yaml │ └── yolox_s.yaml ├── common │ ├── coco_schedule.py │ ├── data │ │ ├── coco.py │ │ ├── coco_keypoint.py │ │ └── coco_panoptic_separated.py │ ├── models │ │ ├── mask_rcnn_fpn.py │ │ ├── mask_rcnn_fpn_kps.py │ │ └── panoptic_fpn.py │ ├── optim.py │ ├── readme.md │ └── train.py ├── facemask │ ├── cspdarknet53_1gpu.yaml │ ├── r2_50_1gpu.yaml │ ├── r50_1gpu.yaml │ ├── r50_pan_1gpu.yaml │ ├── swin_tiny_1gpu.yaml │ └── yolov5_s.yaml ├── new_baselines │ ├── mask_rcnn_R_50_FPN_100ep_LSJ.py │ ├── mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py │ ├── maskrcnn_kps_regnetx_0.4g.py │ ├── maskrcnn_regnetx_0.4g.py │ ├── panoptic_fpn_regnetx_0.4g.py │ └── panoptic_fpn_regnetx_0.4g_s.py ├── taco │ ├── darknet53.yaml │ └── r50.yaml ├── tidal_plate │ └── yolox_s.yaml ├── tl │ ├── cspdarknet.yaml │ ├── darknet53.yaml │ ├── detr │ │ └── detrt_256_6_6_regnetx_0.4g.yaml │ ├── r2_50.yaml │ ├── r50.yaml │ ├── res2net_bifpn.yaml │ ├── res2net_fpn.yaml │ ├── x_s_pafpn_1gpu.yaml │ ├── yolov5_s.yaml │ ├── yolox_s.yaml │ └── yolox_s_1gpu.yaml ├── visdrone │ ├── r2_50_1gpu.yaml │ ├── yolov5_s.yaml │ └── yolox_s_1gpu.yaml ├── voc │ ├── darknet53_1gpu.yaml │ ├── r2_50_1gpu.yaml │ ├── x_s_pafpn_1gpu.yaml │ ├── yolov5_s.yaml │ └── yolox_s_1gpu.yaml └── wearmask │ ├── cspdarknet53.yaml │ ├── cspdarknet53_1gpu.yaml │ ├── darknet53.yaml │ ├── efficient_b2.yaml │ ├── r50.yaml │ ├── r50_1gpu.yaml │ ├── r50_bifpn.yaml │ ├── r50_pan.yaml │ └── yolov5_s.yaml ├── demo.py ├── demo_lazyconfig.py ├── deploy ├── configs │ └── r18.yaml ├── demo_quantized_int8.py ├── ort_infer.py ├── prune_nni.py ├── quant_atom │ ├── .gitignore │ └── qt_ppq_sinst.py ├── quant_fx │ ├── .gitignore │ ├── calib.py │ ├── configs │ │ └── r18.yaml │ ├── fx_ptq_test.py │ ├── qt_mq_test.py │ ├── qt_q_test.py │ ├── quant_ptq_test.py │ ├── r18.onnx_clip_ranges.json │ └── test.py ├── quant_onnx │ ├── .gitignore │ ├── prepare_onnx.py │ ├── qt_atom_pose.py │ ├── qt_atom_r18.py │ ├── qt_atom_sparseinst.py │ ├── qt_ort_cpu.py │ ├── qt_ort_yolox.py │ ├── qt_trt.py │ ├── readme.md │ └── test_images │ │ ├── daisy.jpg │ │ ├── rose.jpg │ │ └── tulip.jpg ├── quant_tvm.py └── trt_cc │ ├── .gitignore │ ├── CMakeLists.txt │ ├── demo_yolox.cc │ ├── demo_yolox_origin.cc │ ├── logging.h │ └── readme.md ├── docs ├── install.md └── usage.md ├── export.py ├── images ├── COCO_val2014_000000001722.jpg ├── COCO_val2014_000000001856.jpg ├── COCO_val2014_000000001869.jpg ├── COCO_val2014_000000001960.jpg ├── COCO_val2014_000000002149.jpg ├── COCO_val2014_000000002153.jpg ├── COCO_val2014_000000002171.jpg ├── COCO_val2014_000000002315.jpg ├── COCO_val2014_000000002532.jpg ├── dog.jpg └── mask │ ├── u=1506317376,3450613040&fm=26&fmt=auto&gp=0.jpg │ ├── u=3352497688,3286290828&fm=26&fmt=auto&gp=0.jpg │ ├── u=3557104275,359021270&fm=26&fmt=auto&gp=0.jpg │ ├── u=4153583989,584404369&fm=26&fmt=auto&gp=0.jpg │ └── u=724341885,3385420344&fm=26&fmt=auto&gp=0.jpg ├── readme.md ├── requirements.txt ├── setup.py ├── tests.py ├── tools ├── benchmark.py ├── cleandata.py ├── compute_anchors.py ├── convert_anchordetr_to_d2.py ├── convert_detr_to_d2.py ├── convert_smcadetr_to_d2.py ├── demo_onnx_detr.py ├── demo_trt_detr.py ├── eval_coco.py ├── lazyconfig_train_net.py ├── quantize_d2.py └── train_detr.py ├── train_custom_datasets.py ├── train_det.py ├── train_inseg.py ├── train_transformer.py ├── upload_pypi.sh ├── weights ├── .gitignore └── get_models.sh └── yolov7 ├── __init__.py ├── config.py ├── configs ├── config_convnext.py └── config_sparseinst.py ├── data ├── config.py ├── dataset_mapper.py ├── detection_utils.py └── transforms │ ├── __init__.py │ ├── augmentation_impl.py │ ├── data_augment.py │ └── transform.py ├── evaluation └── coco_evaluation.py ├── modeling ├── __init__.py ├── backbone │ ├── __init__.py │ ├── anchordetr_backbone.py │ ├── cfg.py │ ├── convnext.py │ ├── cspdarknet.py │ ├── cspresnet.py │ ├── dabdetr_backbone.py │ ├── darknet.py │ ├── darknetx.py │ ├── detr_backbone.py │ ├── dla.py │ ├── dlafpn.py │ ├── efficientnet.py │ ├── efficientrep.py │ ├── fbnet_v2.py │ ├── fbnet_v3.py │ ├── layers │ │ ├── __init__.py │ │ ├── activations.py │ │ ├── row_column_decoupled_attention.py │ │ ├── smca_attention.py │ │ ├── utils.py │ │ └── wrappers.py │ ├── mobilevit.py │ ├── pvt_v2.py │ ├── regnet.py │ ├── res2nets │ │ ├── __init__.py │ │ ├── res2net.py │ │ ├── res2net_v1b.py │ │ ├── res2next.py │ │ └── wrapper.py │ ├── resnetvd.py │ ├── smcadetr_backbone.py │ ├── swin_transformer.py │ ├── volo.py │ └── yolov5_backbone.py ├── head │ ├── __init__.py │ ├── box_regression.py │ ├── decoder.py │ ├── effidehead.py │ ├── encoder.py │ ├── sem_seg_head.py │ ├── solov2_head.py │ ├── uniform_matcher.py │ ├── yolov6_head.py │ ├── yolox_head.py │ └── yolox_kpts_head.py ├── loss │ ├── loss.py │ ├── seg.py │ ├── setcriterion.py │ └── sparseinst_loss.py ├── meta_arch │ ├── __init__.py │ ├── anchor_detr.py │ ├── dab_detr.py │ ├── detr.py │ ├── detr_d2go.py │ ├── detr_seg.py │ ├── maskrcnn_seg.py │ ├── smca_detr.py │ ├── solov2.py │ ├── sparseinst.py │ ├── utils.py │ ├── vidtplus.py │ ├── yolo.py │ ├── yolof.py │ ├── yolomask.py │ ├── yolov5.py │ ├── yolov6.py │ ├── yolov7.py │ ├── yolov7p.py │ ├── yolox.py │ └── yolox_kpts.py ├── neck │ ├── bifpn.py │ ├── ppyolo_pan.py │ ├── reppan.py │ ├── yolo_fpn.py │ └── yolo_pafpn.py └── transcoders │ ├── __init__.py │ ├── decoder_sparseinst.py │ └── encoder_sparseinst.py ├── optimizer ├── __init__.py └── build.py ├── utils ├── __init__.py ├── allreduce_norm.py ├── boxes.py ├── checkpoint.py ├── d2overrides.py ├── detr_utils.py ├── get_default_cfg.py ├── misc.py ├── qat_utils.py ├── solov2_utils.py └── wandb │ ├── __init__.py │ └── wandb_logger.py └── version.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | a.txt 30 | a.py 31 | b.py 32 | results/ 33 | *.plan 34 | *.pth 35 | 36 | # tests/ 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .nox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *.cover 58 | *.py,cover 59 | .hypothesis/ 60 | .pytest_cache/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | db.sqlite3 70 | db.sqlite3-journal 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | target/ 84 | 85 | output_val/ 86 | # Jupyter Notebook 87 | .ipynb_checkpoints 88 | 89 | # IPython 90 | profile_default/ 91 | ipython_config.py 92 | 93 | # pyenv 94 | .python-version 95 | 96 | # pipenv 97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 100 | # install all needed dependencies. 101 | #Pipfile.lock 102 | 103 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 104 | __pypackages__/ 105 | tests/ 106 | # Celery stuff 107 | celerybeat-schedule 108 | celerybeat.pid 109 | 110 | # SageMath parsed files 111 | *.sage.py 112 | 113 | # Environments 114 | .env 115 | .venv 116 | env/ 117 | venv/ 118 | ENV/ 119 | env.bak/ 120 | venv.bak/ 121 | 122 | # Spyder project settings 123 | .spyderproject 124 | .spyproject 125 | 126 | # Rope project settings 127 | .ropeproject 128 | 129 | # mkdocs documentation 130 | /site 131 | 132 | # mypy 133 | .mypy_cache/ 134 | .dmypy.json 135 | dmypy.json 136 | 137 | # Pyre type checker 138 | .pyre/ 139 | 140 | .idea/ 141 | *.pth 142 | *.onnx 143 | 144 | .vscode/ 145 | output/ 146 | datasets/ 147 | vendor/vendor/ 148 | vendor/ 149 | log2.md 150 | vendor/ 151 | .DS_Store 152 | wandb/ 153 | artifacts/ 154 | -------------------------------------------------------------------------------- /configs/Base-YOLOF.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "YOLOF" 3 | BACKBONE: 4 | NAME: "build_resnet_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res5"] 7 | DATASETS: 8 | TRAIN: ("coco_2017_train",) 9 | TEST: ("coco_2017_val",) 10 | DATALOADER: 11 | NUM_WORKERS: 8 12 | SOLVER: 13 | IMS_PER_BATCH: 64 14 | BASE_LR: 0.12 15 | WARMUP_FACTOR: 0.00066667 16 | WARMUP_ITERS: 1500 17 | STEPS: (15000, 20000) 18 | MAX_ITER: 22500 19 | CHECKPOINT_PERIOD: 2500 20 | INPUT: 21 | MIN_SIZE_TRAIN: (800,) -------------------------------------------------------------------------------- /configs/Base-YOLOv7.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "YOLOV7" # default is YOLO, can be YOLOV7, YOLOX, YOLOMASK as well 3 | PIXEL_MEAN: [0.406, 0.485, 0.456] # same value as PP-YOLOv2, BGR order 4 | PIXEL_STD: [0.225, 0.229, 0.224] 5 | PADDED_VALUE: 114.0 6 | 7 | DATASETS: 8 | TRAIN: ("coco_2017_train",) 9 | TEST: ("coco_2017_val",) 10 | SOLVER: 11 | IMS_PER_BATCH: 16 12 | BASE_LR: 0.02 13 | STEPS: (60000, 80000) 14 | MAX_ITER: 90000 15 | LR_SCHEDULER_NAME: "WarmupCosineLR" 16 | 17 | INPUT: 18 | FORMAT: "BGR" # let's force BGR here, since our mean and std are BGR order. if PIL read it, it should convert to BGR 19 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 20 | # MIN_SIZE_TEST: 608 21 | 22 | VERSION: 2 23 | -------------------------------------------------------------------------------- /configs/canaries/detrt_256_6_6_regnetx_0.4g.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "Detr" 3 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 4 | PIXEL_MEAN: [123.675, 116.280, 103.530] 5 | PIXEL_STD: [58.395, 57.120, 57.375] 6 | MASK_ON: False 7 | 8 | BACKBONE: 9 | NAME: "build_regnet_backbone" 10 | REGNETS: 11 | TYPE: "RegNetX_400MF" 12 | OUT_FEATURES: ["s2", "s3", "s4"] # fpn produce 4 levels, only using 3 for now 13 | # RESNETS: 14 | # DEPTH: 50 15 | # STRIDE_IN_1X1: False 16 | # OUT_FEATURES: ["res2", "res3", "res4", "res5"] 17 | DETR: 18 | GIOU_WEIGHT: 2.0 19 | L1_WEIGHT: 5.0 20 | NUM_OBJECT_QUERIES: 100 21 | ENC_LAYERS: 6 22 | DEC_LAYERS: 6 23 | HIDDEN_DIM: 256 24 | 25 | DATASETS: 26 | TRAIN: ("coco_2017_train",) 27 | TEST: ("coco_2017_val",) 28 | 29 | SOLVER: 30 | IMS_PER_BATCH: 28 31 | BASE_LR: 0.00005 32 | STEPS: (369600,) 33 | MAX_ITER: 554400 34 | WARMUP_FACTOR: 1.0 35 | WARMUP_ITERS: 10 36 | WEIGHT_DECAY: 0.0001 37 | OPTIMIZER: "ADAMW" 38 | BACKBONE_MULTIPLIER: 0.1 39 | CLIP_GRADIENTS: 40 | ENABLED: True 41 | CLIP_TYPE: "full_model" 42 | CLIP_VALUE: 0.01 43 | NORM_TYPE: 2.0 44 | INPUT: 45 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 46 | CROP: 47 | ENABLED: True 48 | TYPE: "absolute_range" 49 | SIZE: (384, 600) 50 | FORMAT: "RGB" 51 | TEST: 52 | EVAL_PERIOD: 4000 53 | DATALOADER: 54 | FILTER_EMPTY_ANNOTATIONS: False 55 | NUM_WORKERS: 2 56 | VERSION: 2 57 | OUTPUT_DIR: "output/coco_detr_regx" -------------------------------------------------------------------------------- /configs/canaries/detrt_256_6_6_torchvision.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "Detr" 3 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 4 | PIXEL_MEAN: [123.675, 116.280, 103.530] 5 | PIXEL_STD: [58.395, 57.120, 57.375] 6 | MASK_ON: False 7 | RESNETS: 8 | DEPTH: 50 9 | STRIDE_IN_1X1: False 10 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 11 | DETR: 12 | GIOU_WEIGHT: 2.0 13 | L1_WEIGHT: 5.0 14 | NUM_OBJECT_QUERIES: 100 15 | ENC_LAYERS: 6 16 | DEC_LAYERS: 6 17 | HIDDEN_DIM: 256 18 | 19 | DATASETS: 20 | TRAIN: ("coco_2017_train",) 21 | TEST: ("coco_2017_val",) 22 | 23 | SOLVER: 24 | IMS_PER_BATCH: 8 25 | BASE_LR: 0.0001 26 | STEPS: (369600,) 27 | MAX_ITER: 554400 28 | WARMUP_FACTOR: 1.0 29 | WARMUP_ITERS: 10 30 | WEIGHT_DECAY: 0.0001 31 | OPTIMIZER: "ADAMW" 32 | BACKBONE_MULTIPLIER: 0.1 33 | CLIP_GRADIENTS: 34 | ENABLED: True 35 | # CLIP_TYPE: "full_model" 36 | CLIP_TYPE: "norm" 37 | CLIP_VALUE: 0.01 38 | NORM_TYPE: 2.0 39 | INPUT: 40 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 41 | CROP: 42 | ENABLED: True 43 | TYPE: "absolute_range" 44 | SIZE: (384, 600) 45 | FORMAT: "RGB" 46 | TEST: 47 | EVAL_PERIOD: 4000 48 | DATALOADER: 49 | FILTER_EMPTY_ANNOTATIONS: False 50 | NUM_WORKERS: 4 51 | VERSION: 2 52 | -------------------------------------------------------------------------------- /configs/canaries/regnetx_0.2g.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_regnet_backbone" 8 | REGNETS: 9 | TYPE: "RegNetX_200MF" 10 | OUT_FEATURES: ["s2", "s3", "s4"] # fpn produce 4 levels, only using 3 for now 11 | 12 | YOLO: 13 | ANCHORS: 14 | [ 15 | [[116, 90], [156, 198], [373, 326]], 16 | [[30, 61], [62, 45], [42, 119]], 17 | [[10, 13], [16, 30], [33, 23]], 18 | ] 19 | CLASSES: 80 20 | IN_FEATURES: ["s2", "s3", "s4"] # p3, p4, p5 from FPN 21 | CONF_THRESHOLD: 0.001 22 | NMS_THRESHOLD: 0.65 23 | IGNORE_THRESHOLD: 0.7 24 | VARIANT: "yolov7" 25 | WIDTH_MUL: 1.0 # for FPN neck 26 | # WIDTH_MUL: 0.5 # for FPN neck 27 | LOSS_TYPE: "v7" 28 | LOSS: 29 | LAMBDA_IOU: 1.1 30 | NECK: 31 | TYPE: "pafpn" 32 | 33 | DATASETS: 34 | TRAIN: ("coco_2017_train_mini",) 35 | TEST: ("coco_2014_val_mini",) 36 | # TEST: ("coco_2017_val",) 37 | 38 | 39 | INPUT: 40 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 41 | MAX_SIZE_TRAIN: 800 # force max size train to 800? 42 | MIN_SIZE_TEST: 640 43 | MAX_SIZE_TEST: 800 44 | # open all augmentations 45 | JITTER_CROP: 46 | ENABLED: False 47 | RESIZE: 48 | ENABLED: False 49 | # SHAPE: (540, 960) 50 | DISTORTION: 51 | ENABLED: False 52 | # MOSAIC: 53 | # ENABLED: True 54 | # NUM_IMAGES: 4 55 | # DEBUG_VIS: True 56 | # # MOSAIC_WIDTH: 960 57 | # # MOSAIC_HEIGHT: 540 58 | MOSAIC_AND_MIXUP: 59 | ENABLED: True 60 | DEBUG_VIS: False 61 | ENABLE_MIXUP: False 62 | 63 | 64 | SOLVER: 65 | AMP: 66 | ENABLED: true 67 | IMS_PER_BATCH: 64 # 1/5 bs than YOLOX 68 | # IMS_PER_BATCH: 5 # 1/5 bs than YOLOX 69 | # it can be 0.016 maybe 70 | BASE_LR: 0.008 71 | STEPS: (60000, 80000) 72 | WARMUP_FACTOR: 0.00033333 73 | WARMUP_ITERS: 1500 74 | MAX_ITER: 190000 75 | LR_SCHEDULER_NAME: "WarmupCosineLR" 76 | 77 | 78 | TEST: 79 | EVAL_PERIOD: 10000 80 | # EVAL_PERIOD: 0 81 | OUTPUT_DIR: "output/coco_mini_regnetx_0.2g" 82 | 83 | DATALOADER: 84 | # proposals are part of the dataset_dicts, and take a lot of RAM 85 | NUM_WORKERS: 1 86 | -------------------------------------------------------------------------------- /configs/canaries/yolomask_2gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOMask" 4 | WEIGHTS: "weights/coco_yoloxs_map36.6.pth" 5 | MASK_ON: True 6 | BACKBONE: 7 | NAME: "build_cspdarknetx_backbone" 8 | DARKNET: 9 | WEIGHTS: "" 10 | DEPTH_WISE: False 11 | OUT_FEATURES: ["dark2", "dark3", "dark4", "dark5"] 12 | 13 | YOLO: 14 | ANCHORS: 15 | # yolomask anchors slightly different than YOLOv7 16 | [ 17 | [142, 110], 18 | [192, 243], 19 | [459, 401], 20 | 21 | [36, 75], 22 | [76, 55], 23 | [72, 146], 24 | 25 | [12, 16], 26 | [19, 36], 27 | [40, 28], 28 | ] 29 | ANCHOR_MASK: [[0, 1, 2], [3, 4, 5], [6, 7, 8]] 30 | CLASSES: 80 31 | IN_FEATURES: ["dark2", "dark3", "dark4", "dark5"] # p3, p4, p5 from FPN 32 | WIDTH_MUL: 0.50 33 | # WIDTH_MUL: 1. 34 | DEPTH_MUL: 0.33 35 | # DEPTH_MUL: 1. 36 | 37 | CONF_THRESHOLD: 0.21 38 | NMS_THRESHOLD: 0.65 39 | IGNORE_THRESHOLD: 0.7 40 | VARIANT: "yolov7" 41 | LOSS_TYPE: "v7" 42 | LOSS: 43 | LAMBDA_IOU: 1.1 44 | NECK: 45 | TYPE: "fpn" 46 | WITH_SPP: true 47 | ORIEN_HEAD: 48 | UP_CHANNELS: 128 49 | 50 | DATASETS: 51 | TRAIN: ("coco_2017_train",) 52 | # TEST: ("coco_2014_val_mini",) 53 | TEST: ("coco_2017_val",) 54 | 55 | INPUT: 56 | MASK_FORMAT: "bitmask" 57 | MIN_SIZE_TRAIN: (416, 512, 608, 768, 800) 58 | MAX_SIZE_TRAIN: 900 # force max size train to 800? 59 | MIN_SIZE_TEST: 640 60 | MAX_SIZE_TEST: 800 61 | # open all augmentations 62 | RANDOM_FLIP: "horizontal" 63 | CROP: 64 | ENABLED: False 65 | JITTER_CROP: 66 | ENABLED: False 67 | RESIZE: 68 | ENABLED: False 69 | # SHAPE: (540, 960) 70 | DISTORTION: 71 | ENABLED: True 72 | COLOR_JITTER: 73 | BRIGHTNESS: True 74 | SATURATION: True 75 | # MOSAIC: 76 | # ENABLED: True 77 | # NUM_IMAGES: 4 78 | # DEBUG_VIS: True 79 | # # MOSAIC_WIDTH: 960 80 | # # MOSAIC_HEIGHT: 540 81 | MOSAIC_AND_MIXUP: 82 | ENABLED: False 83 | DEBUG_VIS: False 84 | ENABLE_MIXUP: False 85 | 86 | SOLVER: 87 | # AMP: 88 | # ENABLED: true 89 | IMS_PER_BATCH: 16 # 1/5 bs than YOLOX 90 | # it can be 0.016 maybe 91 | BASE_LR: 0.003 92 | STEPS: (60000, 80000) 93 | WARMUP_FACTOR: 0.00033333 94 | WARMUP_ITERS: 1500 95 | MAX_ITER: 210000 96 | LR_SCHEDULER_NAME: "WarmupCosineLR" 97 | 98 | TEST: 99 | # EVAL_PERIOD: 10000 100 | EVAL_PERIOD: 5000 101 | OUTPUT_DIR: "output/coco_yolomask_canary" 102 | 103 | DATALOADER: 104 | # proposals are part of the dataset_dicts, and take a lot of RAM 105 | NUM_WORKERS: 1 106 | -------------------------------------------------------------------------------- /configs/canaries/yolomask_m_8gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOMask" 4 | WEIGHTS: "weights/coco_yoloxs_map36.6.pth" 5 | MASK_ON: True 6 | BACKBONE: 7 | NAME: "build_cspdarknetx_backbone" 8 | DARKNET: 9 | WEIGHTS: "" 10 | DEPTH_WISE: False 11 | OUT_FEATURES: ["dark2", "dark3", "dark4", "dark5"] 12 | 13 | YOLO: 14 | ANCHORS: 15 | # yolomask anchors slightly different than YOLOv7 16 | [ 17 | [142, 110], 18 | [192, 243], 19 | [459, 401], 20 | 21 | [36, 75], 22 | [76, 55], 23 | [72, 146], 24 | 25 | [12, 16], 26 | [19, 36], 27 | [40, 28], 28 | ] 29 | ANCHOR_MASK: [[0, 1, 2], [3, 4, 5], [6, 7, 8]] 30 | CLASSES: 80 31 | IN_FEATURES: ["dark2", "dark3", "dark4", "dark5"] # p3, p4, p5 from FPN 32 | # WIDTH_MUL: 0.50 33 | WIDTH_MUL: 1. 34 | # DEPTH_MUL: 0.33 35 | DEPTH_MUL: 1. 36 | 37 | CONF_THRESHOLD: 0.21 38 | NMS_THRESHOLD: 0.65 39 | IGNORE_THRESHOLD: 0.7 40 | VARIANT: "yolov7" 41 | LOSS_TYPE: "v7" 42 | LOSS: 43 | LAMBDA_IOU: 1.1 44 | NECK: 45 | TYPE: "fpn" 46 | WITH_SPP: true 47 | ORIEN_HEAD: 48 | UP_CHANNELS: 80 49 | 50 | DATASETS: 51 | TRAIN: ("coco_2017_train",) 52 | # TEST: ("coco_2014_val_mini",) 53 | TEST: ("coco_2017_val",) 54 | 55 | INPUT: 56 | MASK_FORMAT: "bitmask" 57 | MIN_SIZE_TRAIN: (416, 512, 608, 768, 800) 58 | MAX_SIZE_TRAIN: 900 # force max size train to 800? 59 | MIN_SIZE_TEST: 640 60 | MAX_SIZE_TEST: 800 61 | # open all augmentations 62 | RANDOM_FLIP: "horizontal" 63 | CROP: 64 | ENABLED: False 65 | JITTER_CROP: 66 | ENABLED: False 67 | RESIZE: 68 | ENABLED: False 69 | # SHAPE: (540, 960) 70 | DISTORTION: 71 | ENABLED: True 72 | COLOR_JITTER: 73 | BRIGHTNESS: True 74 | SATURATION: True 75 | # MOSAIC: 76 | # ENABLED: True 77 | # NUM_IMAGES: 4 78 | # DEBUG_VIS: True 79 | # # MOSAIC_WIDTH: 960 80 | # # MOSAIC_HEIGHT: 540 81 | MOSAIC_AND_MIXUP: 82 | ENABLED: False 83 | DEBUG_VIS: False 84 | ENABLE_MIXUP: False 85 | 86 | SOLVER: 87 | # AMP: 88 | # ENABLED: true 89 | IMS_PER_BATCH: 48 # 1/5 bs than YOLOX 90 | # it can be 0.016 maybe 91 | BASE_LR: 0.009 92 | STEPS: (60000, 80000) 93 | WARMUP_FACTOR: 0.00033333 94 | WARMUP_ITERS: 1500 95 | MAX_ITER: 210000 96 | LR_SCHEDULER_NAME: "WarmupCosineLR" 97 | 98 | TEST: 99 | # EVAL_PERIOD: 10000 100 | EVAL_PERIOD: 5000 101 | OUTPUT_DIR: "output/coco_yolomask_canary2" 102 | 103 | DATALOADER: 104 | # proposals are part of the dataset_dicts, and take a lot of RAM 105 | NUM_WORKERS: 1 106 | -------------------------------------------------------------------------------- /configs/coco-instance/solov2_lite.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "SOLOv2" 3 | MASK_ON: True 4 | BACKBONE: 5 | NAME: "build_resnet_fpn_backbone" 6 | RESNETS: 7 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 8 | FPN: 9 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 10 | SOLOV2: 11 | FPN_SCALE_RANGES: ((1, 56), (28, 112), (56, 224), (112, 448), (224, 896)) 12 | NUM_GRIDS: [40, 36, 24, 16, 12] 13 | NUM_INSTANCE_CONVS: 2 14 | NUM_KERNELS: 256 15 | INSTANCE_IN_CHANNELS: 256 16 | INSTANCE_CHANNELS: 128 17 | MASK_IN_CHANNELS: 256 18 | MASK_CHANNELS: 128 19 | NORM: "SyncBN" 20 | DATASETS: 21 | TRAIN: ("coco_2017_train",) 22 | TEST: ("coco_2017_val",) 23 | SOLVER: 24 | IMS_PER_BATCH: 4 25 | BASE_LR: 0.01 26 | WARMUP_FACTOR: 0.01 27 | WARMUP_ITERS: 1000 28 | STEPS: (60000, 80000) 29 | MAX_ITER: 90000 30 | INPUT: 31 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 32 | MASK_FORMAT: "bitmask" 33 | VERSION: 2 34 | 35 | 36 | -------------------------------------------------------------------------------- /configs/coco-instance/yolomask.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOMask" 4 | WEIGHTS: "" 5 | MASK_ON: True 6 | BACKBONE: 7 | NAME: "build_cspdarknetx_backbone" 8 | DARKNET: 9 | WEIGHTS: "" 10 | DEPTH_WISE: False 11 | OUT_FEATURES: ["dark2", "dark3", "dark4", "dark5"] 12 | 13 | YOLO: 14 | ANCHORS: 15 | # yolomask anchors slightly different than YOLOv7 16 | [ 17 | [142, 110], 18 | [192, 243], 19 | [459, 401], 20 | 21 | [36, 75], 22 | [76, 55], 23 | [72, 146], 24 | 25 | [12, 16], 26 | [19, 36], 27 | [40, 28], 28 | ] 29 | ANCHOR_MASK: [[0, 1, 2], [3, 4, 5], [6, 7, 8]] 30 | CLASSES: 80 31 | IN_FEATURES: ["dark2", "dark3", "dark4", "dark5"] # p3, p4, p5 from FPN 32 | WIDTH_MUL: 0.50 33 | DEPTH_MUL: 0.33 34 | 35 | CONF_THRESHOLD: 0.001 36 | NMS_THRESHOLD: 0.65 37 | IGNORE_THRESHOLD: 0.7 38 | VARIANT: "yolov7" 39 | LOSS_TYPE: "v7" 40 | LOSS: 41 | LAMBDA_IOU: 1.1 42 | NECK: 43 | TYPE: "fpn" 44 | WITH_SPP: true 45 | 46 | DATASETS: 47 | TRAIN: ("coco_2017_train",) 48 | # TEST: ("coco_2014_val_mini",) 49 | TEST: ("coco_2017_val",) 50 | 51 | INPUT: 52 | MASK_FORMAT: "bitmask" 53 | MIN_SIZE_TRAIN: (416, 512, 608) 54 | MAX_SIZE_TRAIN: 608 # force max size train to 800? 55 | MIN_SIZE_TEST: 416 56 | MAX_SIZE_TEST: 608 57 | # open all augmentations 58 | RANDOM_FLIP_HORIZONTAL: 59 | ENABLED: False 60 | RANDOM_FLIP_VERTICAL: 61 | ENABLED: False 62 | JITTER_CROP: 63 | ENABLED: False 64 | RESIZE: 65 | ENABLED: False 66 | # SHAPE: (540, 960) 67 | DISTORTION: 68 | ENABLED: False 69 | # MOSAIC: 70 | # ENABLED: True 71 | # NUM_IMAGES: 4 72 | # DEBUG_VIS: True 73 | # # MOSAIC_WIDTH: 960 74 | # # MOSAIC_HEIGHT: 540 75 | MOSAIC_AND_MIXUP: 76 | ENABLED: False 77 | DEBUG_VIS: False 78 | ENABLE_MIXUP: False 79 | 80 | SOLVER: 81 | # AMP: 82 | # ENABLED: true 83 | IMS_PER_BATCH: 3 # 1/5 bs than YOLOX 84 | # it can be 0.016 maybe 85 | BASE_LR: 0.0009 86 | STEPS: (60000, 80000) 87 | WARMUP_FACTOR: 0.00033333 88 | WARMUP_ITERS: 1500 89 | MAX_ITER: 190000 90 | LR_SCHEDULER_NAME: "WarmupCosineLR" 91 | 92 | TEST: 93 | # EVAL_PERIOD: 10000 94 | EVAL_PERIOD: 0 95 | OUTPUT_DIR: "output/coco_yolomask" 96 | 97 | DATALOADER: 98 | # proposals are part of the dataset_dicts, and take a lot of RAM 99 | NUM_WORKERS: 1 100 | -------------------------------------------------------------------------------- /configs/coco-instance/yolomask_8gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOMask" 4 | # WEIGHTS: "weights/coco_yoloxs_map36.6.pth" 5 | MASK_ON: True 6 | BACKBONE: 7 | NAME: "build_cspdarknetx_backbone" 8 | DARKNET: 9 | WEIGHTS: "" 10 | DEPTH_WISE: False 11 | OUT_FEATURES: ["dark2", "dark3", "dark4", "dark5"] 12 | 13 | YOLO: 14 | ANCHORS: 15 | # yolomask anchors slightly different than YOLOv7 16 | [ 17 | [142, 110], 18 | [192, 243], 19 | [459, 401], 20 | 21 | [36, 75], 22 | [76, 55], 23 | [72, 146], 24 | 25 | [12, 16], 26 | [19, 36], 27 | [40, 28], 28 | ] 29 | ANCHOR_MASK: [[0, 1, 2], [3, 4, 5], [6, 7, 8]] 30 | CLASSES: 80 31 | IN_FEATURES: ["dark2", "dark3", "dark4", "dark5"] # p3, p4, p5 from FPN 32 | WIDTH_MUL: 0.50 33 | DEPTH_MUL: 0.33 34 | 35 | CONF_THRESHOLD: 0.001 36 | NMS_THRESHOLD: 0.65 37 | IGNORE_THRESHOLD: 0.7 38 | VARIANT: "yolov7" 39 | LOSS_TYPE: "v7" 40 | LOSS: 41 | LAMBDA_IOU: 1.1 42 | NECK: 43 | TYPE: "fpn" 44 | WITH_SPP: true 45 | 46 | DATASETS: 47 | TRAIN: ("coco_2017_train",) 48 | # TEST: ("coco_2014_val_mini",) 49 | TEST: ("coco_2017_val",) 50 | 51 | INPUT: 52 | MASK_FORMAT: "bitmask" 53 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 54 | MAX_SIZE_TRAIN: 800 # force max size train to 800? 55 | MIN_SIZE_TEST: 640 56 | MAX_SIZE_TEST: 800 57 | # open all augmentations 58 | RANDOM_FLIP: "horizontal" 59 | CROP: 60 | ENABLED: False 61 | JITTER_CROP: 62 | ENABLED: False 63 | RESIZE: 64 | ENABLED: False 65 | # SHAPE: (540, 960) 66 | DISTORTION: 67 | ENABLED: False 68 | # MOSAIC: 69 | # ENABLED: True 70 | # NUM_IMAGES: 4 71 | # DEBUG_VIS: True 72 | # # MOSAIC_WIDTH: 960 73 | # # MOSAIC_HEIGHT: 540 74 | MOSAIC_AND_MIXUP: 75 | ENABLED: False 76 | DEBUG_VIS: False 77 | ENABLE_MIXUP: False 78 | 79 | SOLVER: 80 | # AMP: 81 | # ENABLED: true 82 | IMS_PER_BATCH: 96 # 1/5 bs than YOLOX 83 | # it can be 0.016 maybe 84 | BASE_LR: 0.006 85 | STEPS: (60000, 80000) 86 | WARMUP_FACTOR: 0.00033333 87 | WARMUP_ITERS: 1500 88 | MAX_ITER: 190000 89 | LR_SCHEDULER_NAME: "WarmupCosineLR" 90 | 91 | TEST: 92 | # EVAL_PERIOD: 10000 93 | EVAL_PERIOD: 0 94 | OUTPUT_DIR: "output/coco_yolomask" 95 | 96 | DATALOADER: 97 | # proposals are part of the dataset_dicts, and take a lot of RAM 98 | NUM_WORKERS: 1 99 | -------------------------------------------------------------------------------- /configs/coco-keypoints/yolox_kpts.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [0.485, 0.456, 0.406] # same value as PP-YOLOv2, RGB order 4 | PIXEL_STD: [0.229, 0.224, 0.225] 5 | 6 | WEIGHTS: "" 7 | KEYPOINT_ON: True 8 | META_ARCHITECTURE: "YOLOX" 9 | BACKBONE: 10 | NAME: "build_cspdarknetx_backbone" 11 | 12 | DARKNET: 13 | WEIGHTS: "" 14 | DEPTH_WISE: False 15 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 16 | 17 | YOLO: 18 | CLASSES: 80 19 | IN_FEATURES: ["dark3", "dark4", "dark5"] 20 | CONF_THRESHOLD: 0.001 21 | NMS_THRESHOLD: 0.65 22 | IGNORE_THRESHOLD: 0.7 23 | WIDTH_MUL: 0.50 24 | DEPTH_MUL: 0.33 25 | LOSS_TYPE: "v7" 26 | LOSS: 27 | LAMBDA_IOU: 1.5 28 | 29 | DATASETS: 30 | TRAIN: ("coco_2017_train",) 31 | # TEST: ("coco_2014_val_mini",) 32 | TEST: ("coco_2017_val",) 33 | 34 | INPUT: 35 | # FORMAT: "RGB" # using BGR default 36 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 37 | MAX_SIZE_TRAIN: 800 # force max size train to 800? 38 | MIN_SIZE_TEST: 640 39 | MAX_SIZE_TEST: 800 40 | # open all augmentations 41 | JITTER_CROP: 42 | ENABLED: False 43 | RESIZE: 44 | ENABLED: False 45 | # SHAPE: (540, 960) 46 | DISTORTION: 47 | ENABLED: True 48 | COLOR_JITTER: 49 | BRIGHTNESS: True 50 | SATURATION: True 51 | # MOSAIC: 52 | # ENABLED: True 53 | # NUM_IMAGES: 4 54 | # DEBUG_VIS: True 55 | # # MOSAIC_WIDTH: 960 56 | # # MOSAIC_HEIGHT: 540 57 | MOSAIC_AND_MIXUP: 58 | ENABLED: True 59 | # ENABLED: False 60 | DEBUG_VIS: False 61 | ENABLE_MIXUP: False 62 | DISABLE_AT_ITER: 120000 63 | 64 | 65 | SOLVER: 66 | # enable fp16 training 67 | AMP: 68 | ENABLED: true 69 | IMS_PER_BATCH: 112 70 | BASE_LR: 0.027 71 | STEPS: (60000, 80000) 72 | WARMUP_FACTOR: 0.00033333 73 | WARMUP_ITERS: 1200 74 | MAX_ITER: 230000 75 | LR_SCHEDULER_NAME: "WarmupCosineLR" 76 | 77 | TEST: 78 | EVAL_PERIOD: 10000 79 | # EVAL_PERIOD: 0 80 | OUTPUT_DIR: "output/coco_yolox_s_kpts" 81 | VIS_PERIOD: 5000 82 | 83 | DATALOADER: 84 | # proposals are part of the dataset_dicts, and take a lot of RAM 85 | NUM_WORKERS: 3 86 | -------------------------------------------------------------------------------- /configs/coco/cspdarknet53.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_cspdarknet_backbone" 8 | DARKNET: 9 | WEIGHTS: "weights/cspdarknet53_ra_256-d05c7c21.pth" # manually down from: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/cspdarknet53_ra_256-d05c7c21.pth 10 | DEPTH: 53 11 | WITH_CSP: True 12 | STEM_OUT_CHANNELS: 32 13 | WEIGHTS: "" 14 | OUT_FEATURES: ["layer3", "layer4", "layer5"] 15 | YOLO: 16 | ANCHORS: 17 | [ 18 | [[116, 90], [156, 198], [373, 326]], 19 | [[30, 61], [62, 45], [42, 119]], 20 | [[10, 13], [16, 30], [33, 23]], 21 | ] 22 | CLASSES: 80 23 | IN_FEATURES: ["layer3", "layer4", "layer5"] 24 | CONF_THRESHOLD: 0.01 25 | NMS_THRESHOLD: 0.5 26 | IGNORE_THRESHOLD: 0.5 27 | LOSS_TYPE: "v7" 28 | 29 | DATASETS: 30 | TRAIN: ("coco_2017_train",) 31 | TEST: ("coco_2017_val",) 32 | 33 | SOLVER: 34 | IMS_PER_BATCH: 32 35 | BASE_LR: 0.0009 36 | WARMUP_ITERS: 1000 37 | STEPS: (60000, 80000) 38 | MAX_ITER: 120000 39 | CLIP_GRADIENTS: 40 | ENABLED: True 41 | CHECKPOINT_PERIOD: 5000 42 | 43 | TEST: 44 | EVAL_PERIOD: 10000 45 | OUTPUT_DIR: "output/coco_cspdarknet53" 46 | VIS_PERIOD: 5000 47 | 48 | DATALOADER: 49 | # proposals are part of the dataset_dicts, and take a lot of RAM 50 | NUM_WORKERS: 2 51 | -------------------------------------------------------------------------------- /configs/coco/darknet53.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | MASK_ON: False 5 | BACKBONE: 6 | NAME: "build_darknet_backbone" 7 | 8 | DARKNET: 9 | WEIGHTS: "weights/yolov3.pt" # using pytorch weights from ultralytics's yolov3 10 | DEPTH: 53 11 | STEM_OUT_CHANNELS: 32 12 | WEIGHTS: "" 13 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 14 | YOLO: 15 | ANCHORS: 16 | [ 17 | [[116, 90], [156, 198], [373, 326]], 18 | [[30, 61], [62, 45], [42, 119]], 19 | [[10, 13], [16, 30], [33, 23]], 20 | ] 21 | CLASSES: 80 22 | IN_FEATURES: ["dark3", "dark4", "dark5"] 23 | CONF_THRESHOLD: 0.2 24 | NMS_THRESHOLD: 0.1 25 | IGNORE_THRESHOLD: 0.6 26 | NECK: 27 | TYPE: "fpn" 28 | WITH_SPP: True 29 | DATASETS: 30 | TRAIN: ("coco_2017_train",) 31 | TEST: ("coco_2017_val",) 32 | 33 | SOLVER: 34 | IMS_PER_BATCH: 32 35 | BASE_LR: 0.01 36 | WARMUP_ITERS: 1500 37 | STEPS: (60000, 80000) 38 | MAX_ITER: 90000 39 | CLIP_GRADIENTS: 40 | ENABLED: True 41 | CHECKPOINT_PERIOD: 1000 42 | 43 | TEST: 44 | EVAL_PERIOD: 10000 45 | OUTPUT_DIR: "output/coco_darknet53" 46 | VIS_PERIOD: 5000 47 | 48 | 49 | DATALOADER: 50 | # proposals are part of the dataset_dicts, and take a lot of RAM 51 | NUM_WORKERS: 2 52 | -------------------------------------------------------------------------------- /configs/coco/detr/anchordetr_origin.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "AnchorDetr" 3 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 4 | # WEIGHTS: "weights/R-50.pkl" 5 | PIXEL_MEAN: [123.675, 116.280, 103.530] 6 | PIXEL_STD: [58.395, 57.120, 57.375] 7 | MASK_ON: False 8 | RESNETS: 9 | DEPTH: 50 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | DETR: 13 | GIOU_WEIGHT: 2.0 14 | L1_WEIGHT: 5.0 15 | NUM_QUERY_POSITION: 300 16 | DIM_FEEDFORWARD: 1024 17 | DROPOUT: 0.0 18 | ENC_LAYERS: 6 19 | DEC_LAYERS: 6 20 | HIDDEN_DIM: 256 21 | NUM_CLASSES: 80 22 | YOLO: 23 | CONF_THRESHOLD: 0.001 24 | IGNORE_THRESHOLD: 0.07 25 | 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | TEST: ("coco_2017_val",) 29 | 30 | SOLVER: 31 | AMP: 32 | ENABLED: true 33 | IMS_PER_BATCH: 16 34 | # BASE_LR: 0.0002 # 0.00025 is better 35 | BASE_LR: 0.0001 # 0.00025 is better 36 | STEPS: (295720, ) 37 | # MAX_ITER: 369650 38 | MAX_ITER: 409650 39 | WARMUP_FACTOR: 1.0 40 | WARMUP_ITERS: 10 41 | WEIGHT_DECAY: 0.0001 42 | OPTIMIZER: "ADAMW" 43 | LR_MULTIPLIER_OVERWRITE: 44 | [{ "backbone": 0.1 }, { "reference_points": 0.1, "sampling_offsets": 0.1 }] 45 | CLIP_GRADIENTS: 46 | ENABLED: True 47 | CLIP_TYPE: "full_model" 48 | # CLIP_TYPE: "norm" 49 | CLIP_VALUE: 0.1 50 | NORM_TYPE: 2.0 51 | INPUT: 52 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 53 | CROP: 54 | ENABLED: True 55 | TYPE: "absolute_range" 56 | SIZE: (384, 600) 57 | # SIZE: (384, 632) 58 | FORMAT: "RGB" 59 | TEST: 60 | EVAL_PERIOD: 7393 # 1 epoch same as bs=2 61 | DATALOADER: 62 | FILTER_EMPTY_ANNOTATIONS: False 63 | NUM_WORKERS: 2 64 | VERSION: 2 65 | 66 | VIS_PERIOD: 100 67 | OUTPUT_DIR: "output/coco_anchordetr" 68 | -------------------------------------------------------------------------------- /configs/coco/detr/anchordetr_origin_bs64.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "AnchorDetr" 3 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 4 | # WEIGHTS: "weights/R-50.pkl" 5 | PIXEL_MEAN: [123.675, 116.280, 103.530] 6 | PIXEL_STD: [58.395, 57.120, 57.375] 7 | MASK_ON: False 8 | RESNETS: 9 | DEPTH: 50 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | DETR: 13 | GIOU_WEIGHT: 2.0 14 | L1_WEIGHT: 5.0 15 | NUM_QUERY_POSITION: 300 16 | DIM_FEEDFORWARD: 1024 17 | DROPOUT: 0.0 18 | ENC_LAYERS: 6 19 | DEC_LAYERS: 6 20 | HIDDEN_DIM: 256 21 | NUM_CLASSES: 80 22 | YOLO: 23 | CONF_THRESHOLD: 0.001 24 | IGNORE_THRESHOLD: 0.07 25 | 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | TEST: ("coco_2017_val",) 29 | 30 | SOLVER: 31 | AMP: 32 | ENABLED: true 33 | IMS_PER_BATCH: 64 34 | # BASE_LR: 0.0002 # 0.00025 is better 35 | BASE_LR: 0.00025 # lr should be a little bit larger 36 | # STEPS: (295720, ) 37 | STEPS: (73930, ) 38 | # MAX_ITER: 369650 39 | MAX_ITER: 409650 40 | WARMUP_FACTOR: 1.0 41 | WARMUP_ITERS: 10 42 | WEIGHT_DECAY: 0.0001 43 | OPTIMIZER: "ADAMW" 44 | LR_MULTIPLIER_OVERWRITE: 45 | [{ "backbone": 0.1 }, { "reference_points": 0.1, "sampling_offsets": 0.1 }] 46 | CLIP_GRADIENTS: 47 | ENABLED: True 48 | CLIP_TYPE: "full_model" 49 | # CLIP_TYPE: "norm" 50 | CLIP_VALUE: 0.1 51 | NORM_TYPE: 2.0 52 | INPUT: 53 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 54 | CROP: 55 | ENABLED: True 56 | TYPE: "absolute_range" 57 | SIZE: (384, 600) 58 | # SIZE: (384, 632) 59 | FORMAT: "RGB" 60 | TEST: 61 | EVAL_PERIOD: 7393 # 1 epoch same as bs=2 62 | DATALOADER: 63 | FILTER_EMPTY_ANNOTATIONS: False 64 | NUM_WORKERS: 2 65 | VERSION: 2 66 | 67 | VIS_PERIOD: 100 68 | OUTPUT_DIR: "output/coco_anchordetr" 69 | -------------------------------------------------------------------------------- /configs/coco/detr/d2go/detr_bs16.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "DetrD2go" 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | STRIDE_IN_1X1: False 9 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 10 | DETR: 11 | NUM_CLASSES: 80 12 | CLS_WEIGHT: 2.0 13 | DIM_FEEDFORWARD: 1024 14 | GIOU_WEIGHT: 2.0 15 | L1_WEIGHT: 5.0 16 | NUM_OBJECT_QUERIES: 300 17 | CENTERED_POSITION_ENCODIND: True 18 | USE_FOCAL_LOSS: True 19 | NUM_FEATURE_LEVELS: 1 20 | ATTENTION_TYPE: 'DETR' 21 | 22 | DATASETS: 23 | TRAIN: ("coco_2017_train",) 24 | TEST: ("coco_2017_val",) 25 | 26 | SOLVER: 27 | AMP: 28 | ENABLED: true 29 | IMS_PER_BATCH: 16 30 | BASE_LR: 0.0001 31 | STEPS: (887040,) 32 | MAX_ITER: 1108800 33 | WARMUP_FACTOR: 1.0 34 | WARMUP_ITERS: 10 35 | WEIGHT_DECAY: 0.0001 36 | OPTIMIZER: "ADAMW" 37 | CLIP_GRADIENTS: 38 | ENABLED: True 39 | CLIP_TYPE: "full_model" 40 | CLIP_VALUE: 0.1 41 | NORM_TYPE: 2.0 42 | LR_MULTIPLIER_OVERWRITE: [{'backbone': 0.1}, {'reference_points': 0.1, 'sampling_offsets': 0.1}] 43 | # BACKBONE_MULTIPLIER: 0.1 44 | 45 | INPUT: 46 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 47 | CROP: 48 | ENABLED: True 49 | TYPE: "absolute_range" 50 | SIZE: (384, 600) 51 | FORMAT: "RGB" 52 | # D2GO_DATA: 53 | # MAPPER: 54 | # NAME: "DETRDatasetMapper" 55 | 56 | TEST: 57 | EVAL_PERIOD: 4000 58 | DATALOADER: 59 | FILTER_EMPTY_ANNOTATIONS: False 60 | NUM_WORKERS: 4 61 | VERSION: 2 62 | 63 | -------------------------------------------------------------------------------- /configs/coco/detr/d2go/detr_fbv3_bs16.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "DetrD2go" 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "FBNetV2C4Backbone" 8 | FBNET_V2: 9 | ARCH: "FBNetV3_A_dsmask_C5" 10 | NORM: "sync_bn" 11 | WIDTH_DIVISOR: 8 12 | SCALE_FACTOR: 1.0 13 | OUT_FEATURES: ["trunk4"] 14 | DETR: 15 | NUM_CLASSES: 80 16 | CLS_WEIGHT: 2.0 17 | DIM_FEEDFORWARD: 1024 18 | GIOU_WEIGHT: 2.0 19 | L1_WEIGHT: 5.0 20 | NUM_OBJECT_QUERIES: 300 21 | CENTERED_POSITION_ENCODIND: True 22 | USE_FOCAL_LOSS: True 23 | NUM_FEATURE_LEVELS: 1 24 | ATTENTION_TYPE: 'DETR' 25 | 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | TEST: ("coco_2017_val",) 29 | SOLVER: 30 | IMS_PER_BATCH: 16 31 | BASE_LR: 0.0002 32 | STEPS: (887040,) 33 | MAX_ITER: 1108800 34 | WARMUP_FACTOR: 1.0 35 | WARMUP_ITERS: 10 36 | WEIGHT_DECAY: 0.0001 37 | OPTIMIZER: "ADAMW" 38 | CLIP_GRADIENTS: 39 | ENABLED: True 40 | CLIP_TYPE: "full_model" 41 | CLIP_VALUE: 0.1 42 | NORM_TYPE: 2.0 43 | LR_MULTIPLIER_OVERWRITE: [{'backbone': 0.1}, {'reference_points': 0.1, 'sampling_offsets': 0.1}] 44 | 45 | INPUT: 46 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 47 | CROP: 48 | ENABLED: True 49 | TYPE: "absolute_range" 50 | SIZE: (384, 600) 51 | FORMAT: "RGB" 52 | D2GO_DATA: 53 | MAPPER: 54 | NAME: "DETRDatasetMapper" 55 | TEST: 56 | EVAL_PERIOD: 4000 57 | DATALOADER: 58 | FILTER_EMPTY_ANNOTATIONS: False 59 | NUM_WORKERS: 4 60 | VERSION: 2 61 | 62 | -------------------------------------------------------------------------------- /configs/coco/detr/d2go/smca_bs16.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "DetrD2go" 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | MASK_ON: False 6 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 7 | RESNETS: 8 | DEPTH: 50 9 | STRIDE_IN_1X1: False 10 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 11 | 12 | DETR: 13 | NUM_CLASSES: 80 14 | CLS_WEIGHT: 2.0 15 | DIM_FEEDFORWARD: 2048 # 1024 -> 2048 16 | GIOU_WEIGHT: 2.0 17 | L1_WEIGHT: 5.0 18 | NUM_OBJECT_QUERIES: 300 19 | CENTERED_POSITION_ENCODIND: True 20 | USE_FOCAL_LOSS: True 21 | NUM_FEATURE_LEVELS: 1 22 | ATTENTION_TYPE: "SMCA" 23 | 24 | DATASETS: 25 | TRAIN: ("coco_2017_train",) 26 | TEST: ("coco_2017_val",) 27 | 28 | SOLVER: 29 | AMP: 30 | ENABLED: true 31 | IMS_PER_BATCH: 16 32 | BASE_LR: 0.0001 33 | STEPS: (295720,) 34 | # MAX_ITER: 369650 35 | MAX_ITER: 429650 36 | WARMUP_FACTOR: 1.0 37 | WARMUP_ITERS: 10 38 | WEIGHT_DECAY: 0.0001 39 | OPTIMIZER: "ADAMW" 40 | CLIP_GRADIENTS: 41 | ENABLED: True 42 | CLIP_TYPE: "full_model" 43 | CLIP_VALUE: 0.1 44 | NORM_TYPE: 2.0 45 | LR_MULTIPLIER_OVERWRITE: 46 | [{ "backbone": 0.1 }, { "reference_points": 0.1, "sampling_offsets": 0.1 }] 47 | 48 | INPUT: 49 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 50 | CROP: 51 | ENABLED: True 52 | TYPE: "absolute_range" 53 | SIZE: (384, 600) 54 | FORMAT: "RGB" 55 | # D2GO_DATA: 56 | # MAPPER: 57 | # NAME: "DETRDatasetMapper" 58 | 59 | TEST: 60 | EVAL_PERIOD: 4000 61 | DATALOADER: 62 | FILTER_EMPTY_ANNOTATIONS: False 63 | NUM_WORKERS: 4 64 | VERSION: 2 65 | 66 | OUTPUT_DIR: "output/coco_smcadetr_d2go" 67 | -------------------------------------------------------------------------------- /configs/coco/detr/d2go/smca_bs64.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "DetrD2go" 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | MASK_ON: False 6 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 7 | RESNETS: 8 | DEPTH: 50 9 | STRIDE_IN_1X1: False 10 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 11 | 12 | DETR: 13 | NUM_CLASSES: 80 14 | CLS_WEIGHT: 2.0 15 | DIM_FEEDFORWARD: 2048 # 1024 -> 2048 16 | GIOU_WEIGHT: 2.0 17 | L1_WEIGHT: 5.0 18 | NUM_OBJECT_QUERIES: 300 19 | CENTERED_POSITION_ENCODIND: True 20 | USE_FOCAL_LOSS: True 21 | NUM_FEATURE_LEVELS: 1 22 | ATTENTION_TYPE: "SMCA" 23 | 24 | DATASETS: 25 | TRAIN: ("coco_2017_train",) 26 | TEST: ("coco_2017_val",) 27 | 28 | SOLVER: 29 | AMP: 30 | ENABLED: true 31 | IMS_PER_BATCH: 64 32 | BASE_LR: 0.00016 33 | # STEPS: (295720,) 34 | # MAX_ITER: 369650 35 | STEPS: (73930, ) 36 | # MAX_ITER: 369650 37 | MAX_ITER: 140000 # 14w we can get a 41 AP 38 | WARMUP_FACTOR: 1.0 39 | WARMUP_ITERS: 10 40 | WEIGHT_DECAY: 0.0001 41 | OPTIMIZER: "ADAMW" 42 | CLIP_GRADIENTS: 43 | ENABLED: True 44 | CLIP_TYPE: "full_model" 45 | CLIP_VALUE: 0.1 46 | NORM_TYPE: 2.0 47 | LR_MULTIPLIER_OVERWRITE: 48 | [{ "backbone": 0.1 }, { "reference_points": 0.1, "sampling_offsets": 0.1 }] 49 | 50 | INPUT: 51 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 52 | CROP: 53 | ENABLED: True 54 | TYPE: "absolute_range" 55 | SIZE: (384, 600) 56 | FORMAT: "RGB" 57 | # D2GO_DATA: 58 | # MAPPER: 59 | # NAME: "DETRDatasetMapper" 60 | 61 | TEST: 62 | EVAL_PERIOD: 4000 63 | DATALOADER: 64 | FILTER_EMPTY_ANNOTATIONS: False 65 | NUM_WORKERS: 4 66 | VERSION: 2 67 | 68 | OUTPUT_DIR: "output/coco_smcadetr_d2go" 69 | -------------------------------------------------------------------------------- /configs/coco/detr/d2go/smca_fbv3.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "DetrD2go" 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "FBNetV2C4Backbone" 8 | FBNET_V2: 9 | ARCH: "FBNetV3_A_dsmask_C5" 10 | NORM: "sync_bn" 11 | WIDTH_DIVISOR: 8 12 | SCALE_FACTOR: 1.0 13 | OUT_FEATURES: ["trunk4"] 14 | 15 | DETR: 16 | NUM_CLASSES: 80 17 | CLS_WEIGHT: 2.0 18 | DIM_FEEDFORWARD: 2048 # 1024 -> 2048 19 | GIOU_WEIGHT: 2.0 20 | L1_WEIGHT: 5.0 21 | NUM_OBJECT_QUERIES: 300 22 | CENTERED_POSITION_ENCODIND: True 23 | USE_FOCAL_LOSS: True 24 | NUM_FEATURE_LEVELS: 1 25 | ATTENTION_TYPE: "SMCA" 26 | 27 | DATASETS: 28 | TRAIN: ("coco_2017_train",) 29 | TEST: ("coco_2017_val",) 30 | 31 | SOLVER: 32 | AMP: 33 | ENABLED: true 34 | IMS_PER_BATCH: 64 35 | BASE_LR: 0.0001 36 | # STEPS: (295720,) 37 | # MAX_ITER: 369650 38 | STEPS: (73930, ) 39 | # MAX_ITER: 369650 40 | MAX_ITER: 429650 41 | WARMUP_FACTOR: 1.0 42 | WARMUP_ITERS: 10 43 | WEIGHT_DECAY: 0.0001 44 | OPTIMIZER: "ADAMW" 45 | CLIP_GRADIENTS: 46 | ENABLED: True 47 | CLIP_TYPE: "full_model" 48 | CLIP_VALUE: 0.1 49 | NORM_TYPE: 2.0 50 | LR_MULTIPLIER_OVERWRITE: 51 | [{ "backbone": 1.2 }, { "reference_points": 0.1, "sampling_offsets": 0.1 }] 52 | 53 | # 0.00012 0.1 25.17,23.09,21.96 54 | # 0.00012 0.9 24, 22.29, 21.88 55 | # 0.00019 0.9 25.32,23.81, 23.46 56 | # 0.0001 0.9 24.37, 22.35, 21.57, 21.02, 20.76 57 | # 0.0001 1.2 23.83, 22.09, 58 | 59 | INPUT: 60 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 61 | CROP: 62 | ENABLED: True 63 | TYPE: "absolute_range" 64 | SIZE: (384, 600) 65 | FORMAT: "RGB" 66 | # D2GO_DATA: 67 | # MAPPER: 68 | # NAME: "DETRDatasetMapper" 69 | 70 | TEST: 71 | EVAL_PERIOD: 4000 72 | DATALOADER: 73 | FILTER_EMPTY_ANNOTATIONS: False 74 | NUM_WORKERS: 4 75 | VERSION: 2 76 | 77 | OUTPUT_DIR: "output/coco_smcadetr_d2go_fbv3" 78 | -------------------------------------------------------------------------------- /configs/coco/detr/d2go/smca_regnetx_0.4g.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "DetrD2go" 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | MASK_ON: False 6 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 7 | BACKBONE: 8 | NAME: "build_regnet_backbone" 9 | SIMPLE: true 10 | STRIDE: 32 11 | CHANNEL: 384 12 | REGNETS: 13 | TYPE: "RegNetX_400MF" 14 | OUT_FEATURES: ["s2", "s3", "s4"] # fpn produce 4 levels, only using 3 for now 15 | 16 | DETR: 17 | NUM_CLASSES: 80 18 | CLS_WEIGHT: 2.0 19 | DIM_FEEDFORWARD: 2048 # 1024 -> 2048 20 | GIOU_WEIGHT: 2.0 21 | L1_WEIGHT: 5.0 22 | NUM_OBJECT_QUERIES: 300 23 | CENTERED_POSITION_ENCODIND: True 24 | USE_FOCAL_LOSS: True 25 | NUM_FEATURE_LEVELS: 1 26 | ATTENTION_TYPE: "SMCA" 27 | 28 | DATASETS: 29 | TRAIN: ("coco_2017_train",) 30 | TEST: ("coco_2017_val",) 31 | 32 | SOLVER: 33 | AMP: 34 | ENABLED: true 35 | IMS_PER_BATCH: 64 36 | BASE_LR: 0.0001 37 | # STEPS: (295720,) 38 | # MAX_ITER: 369650 39 | STEPS: (73930, ) 40 | # MAX_ITER: 369650 41 | MAX_ITER: 429650 42 | WARMUP_FACTOR: 1.0 43 | WARMUP_ITERS: 10 44 | WEIGHT_DECAY: 0.0001 45 | OPTIMIZER: "ADAMW" 46 | CLIP_GRADIENTS: 47 | ENABLED: True 48 | CLIP_TYPE: "full_model" 49 | CLIP_VALUE: 0.1 50 | NORM_TYPE: 2.0 51 | LR_MULTIPLIER_OVERWRITE: 52 | [{ "backbone": 0.6}, { "reference_points": 0.1, "sampling_offsets": 0.1 }] 53 | 54 | # 0.00013, 0.1, 29.9, 24.86, 23.57 55 | # 0.00013, 0.9, 27.26,23.95 56 | # 0.0001, 0.9, 26.38, 23.74, 23.01, 22.52 57 | # 0.0001, 1.2, 29.43 58 | 59 | INPUT: 60 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 61 | CROP: 62 | ENABLED: True 63 | TYPE: "absolute_range" 64 | SIZE: (384, 600) 65 | FORMAT: "RGB" 66 | # D2GO_DATA: 67 | # MAPPER: 68 | # NAME: "DETRDatasetMapper" 69 | 70 | TEST: 71 | EVAL_PERIOD: 4000 72 | DATALOADER: 73 | FILTER_EMPTY_ANNOTATIONS: False 74 | NUM_WORKERS: 4 75 | VERSION: 2 76 | 77 | OUTPUT_DIR: "output/coco_smcadetr_d2go_regnetx" 78 | -------------------------------------------------------------------------------- /configs/coco/detr/detr_256_6_6_regnetx_0.4g.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "Detr" 3 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 4 | PIXEL_MEAN: [123.675, 116.280, 103.530] 5 | PIXEL_STD: [58.395, 57.120, 57.375] 6 | MASK_ON: False 7 | 8 | BACKBONE: 9 | NAME: "build_regnet_backbone" 10 | REGNETS: 11 | TYPE: "RegNetX_400MF" 12 | OUT_FEATURES: ["s2", "s3", "s4"] # fpn produce 4 levels, only using 3 for now 13 | # RESNETS: 14 | # DEPTH: 50 15 | # STRIDE_IN_1X1: False 16 | # OUT_FEATURES: ["res2", "res3", "res4", "res5"] 17 | DETR: 18 | GIOU_WEIGHT: 2.0 19 | L1_WEIGHT: 5.0 20 | NUM_OBJECT_QUERIES: 100 21 | ENC_LAYERS: 6 22 | DEC_LAYERS: 6 23 | HIDDEN_DIM: 256 24 | 25 | DATASETS: 26 | TRAIN: ("coco_2017_train",) 27 | TEST: ("coco_2017_val",) 28 | 29 | SOLVER: 30 | IMS_PER_BATCH: 32 31 | BASE_LR: 0.0001 32 | STEPS: (369600,) 33 | MAX_ITER: 554400 34 | WARMUP_FACTOR: 1.0 35 | WARMUP_ITERS: 10 36 | WEIGHT_DECAY: 0.0001 37 | OPTIMIZER: "ADAMW" 38 | BACKBONE_MULTIPLIER: 0.1 39 | CLIP_GRADIENTS: 40 | ENABLED: True 41 | CLIP_TYPE: "full_model" 42 | CLIP_VALUE: 0.01 43 | NORM_TYPE: 2.0 44 | INPUT: 45 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 46 | CROP: 47 | ENABLED: True 48 | TYPE: "absolute_range" 49 | SIZE: (384, 600) 50 | FORMAT: "RGB" 51 | TEST: 52 | EVAL_PERIOD: 4000 53 | DATALOADER: 54 | FILTER_EMPTY_ANNOTATIONS: False 55 | NUM_WORKERS: 1 56 | VERSION: 2 57 | OUTPUT_DIR: "output/coco_detr_regx" -------------------------------------------------------------------------------- /configs/coco/detr/detr_256_6_6_torchvision.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "Detr" 3 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 4 | PIXEL_MEAN: [123.675, 116.280, 103.530] 5 | PIXEL_STD: [58.395, 57.120, 57.375] 6 | MASK_ON: False 7 | RESNETS: 8 | DEPTH: 50 9 | STRIDE_IN_1X1: False 10 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 11 | DETR: 12 | GIOU_WEIGHT: 2.0 13 | L1_WEIGHT: 5.0 14 | NUM_OBJECT_QUERIES: 100 15 | ENC_LAYERS: 6 16 | DEC_LAYERS: 6 17 | HIDDEN_DIM: 256 18 | 19 | DATASETS: 20 | TRAIN: ("coco_2017_train",) 21 | TEST: ("coco_2017_val",) 22 | 23 | SOLVER: 24 | AMP: 25 | ENABLED: true 26 | IMS_PER_BATCH: 56 27 | BASE_LR: 0.0001 28 | STEPS: (369600,) 29 | MAX_ITER: 554400 30 | WARMUP_FACTOR: 1.0 31 | WARMUP_ITERS: 10 32 | WEIGHT_DECAY: 0.0001 33 | OPTIMIZER: "ADAMW" 34 | BACKBONE_MULTIPLIER: 0.1 35 | CLIP_GRADIENTS: 36 | ENABLED: True 37 | CLIP_TYPE: "full_model" 38 | # CLIP_TYPE: "norm" 39 | CLIP_VALUE: 0.01 40 | NORM_TYPE: 2.0 41 | INPUT: 42 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832) 43 | CROP: 44 | ENABLED: True 45 | TYPE: "absolute_range" 46 | SIZE: (384, 600) 47 | FORMAT: "RGB" 48 | TEST: 49 | EVAL_PERIOD: 10000 50 | DATALOADER: 51 | FILTER_EMPTY_ANNOTATIONS: False 52 | NUM_WORKERS: 2 53 | VERSION: 2 54 | 55 | OUTPUT_DIR: "output/coco_detr" -------------------------------------------------------------------------------- /configs/coco/detr/detr_256_6_6_torchvision_mask.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "Detr" 3 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 4 | PIXEL_MEAN: [123.675, 116.280, 103.530] 5 | PIXEL_STD: [58.395, 57.120, 57.375] 6 | MASK_ON: True 7 | RESNETS: 8 | DEPTH: 50 9 | STRIDE_IN_1X1: False 10 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 11 | DETR: 12 | GIOU_WEIGHT: 2.0 13 | L1_WEIGHT: 5.0 14 | NUM_OBJECT_QUERIES: 100 15 | ENC_LAYERS: 6 16 | DEC_LAYERS: 6 17 | HIDDEN_DIM: 256 18 | NUM_CLASSES: 250 19 | # hard coded for mask 20 | # FROZEN_WEIGHTS: 'weights/detr_panoptic.pth' 21 | YOLO: 22 | CONF_THRESHOLD: 0.3 23 | 24 | DATASETS: 25 | TRAIN: ("coco_2017_train",) 26 | TEST: ("coco_2017_val",) 27 | 28 | SOLVER: 29 | IMS_PER_BATCH: 56 30 | BASE_LR: 0.0001 31 | STEPS: (369600,) 32 | MAX_ITER: 554400 33 | WARMUP_FACTOR: 1.0 34 | WARMUP_ITERS: 10 35 | WEIGHT_DECAY: 0.0001 36 | OPTIMIZER: "ADAMW" 37 | BACKBONE_MULTIPLIER: 0.1 38 | CLIP_GRADIENTS: 39 | ENABLED: True 40 | CLIP_TYPE: "full_model" 41 | # CLIP_TYPE: "norm" 42 | CLIP_VALUE: 0.01 43 | NORM_TYPE: 2.0 44 | INPUT: 45 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 46 | CROP: 47 | ENABLED: True 48 | TYPE: "absolute_range" 49 | SIZE: (384, 600) 50 | FORMAT: "RGB" 51 | TEST: 52 | EVAL_PERIOD: 4000 53 | DATALOADER: 54 | FILTER_EMPTY_ANNOTATIONS: False 55 | NUM_WORKERS: 2 56 | VERSION: 2 57 | 58 | OUTPUT_DIR: "output/coco_detr" -------------------------------------------------------------------------------- /configs/coco/detr/smcadetr_origin.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "SMCADetr" 3 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 4 | # WEIGHTS: "weights/R-50.pkl" 5 | PIXEL_MEAN: [123.675, 116.280, 103.530] 6 | PIXEL_STD: [58.395, 57.120, 57.375] 7 | MASK_ON: False 8 | RESNETS: 9 | DEPTH: 50 10 | STRIDE_IN_1X1: False 11 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 12 | DETR: 13 | # CONF_THRESHOLD: 0.1 14 | GIOU_WEIGHT: 2.0 15 | L1_WEIGHT: 5.0 16 | NUM_OBJECT_QUERIES: 300 17 | DIM_FEEDFORWARD: 2048 18 | DROPOUT: 0.1 19 | ENC_LAYERS: 6 20 | DEC_LAYERS: 6 21 | HIDDEN_DIM: 256 22 | NUM_CLASSES: 80 23 | # NUM_CLASSES: 81 24 | NUM_FEATURE_LEVELS: 1 25 | YOLO: 26 | CONF_THRESHOLD: 0.0001 27 | IGNORE_THRESHOLD: 0.001 28 | 29 | DATASETS: 30 | TRAIN: ("coco_2017_train",) 31 | TEST: ("coco_2017_val",) 32 | 33 | SOLVER: 34 | AMP: 35 | ENABLED: true 36 | IMS_PER_BATCH: 16 37 | # BASE_LR: 0.0001 38 | BASE_LR: 0.0001 39 | # STEPS: (369600,) 40 | # STEPS: (110880, 210039) 41 | # STEPS: (295720, ) 42 | # MAX_ITER: 369650 43 | STEPS: (325720, ) 44 | MAX_ITER: 409650 45 | # MAX_ITER: 469650 46 | # MAX_ITER: 162420 47 | WARMUP_FACTOR: 1.0 48 | # detr bs=64 is 10, we using 40 for 16 49 | WARMUP_ITERS: 10 50 | WEIGHT_DECAY: 0.0001 51 | OPTIMIZER: "ADAMW" 52 | LR_MULTIPLIER_OVERWRITE: 53 | [{ "backbone": 0.1 }, { "reference_points": 0.1, "sampling_offsets": 0.1 }] 54 | CLIP_GRADIENTS: 55 | ENABLED: True 56 | CLIP_TYPE: "full_model" 57 | # CLIP_TYPE: "norm" 58 | CLIP_VALUE: 0.1 59 | # NORM_TYPE: 2.0 60 | 61 | INPUT: 62 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 63 | CROP: 64 | ENABLED: True 65 | TYPE: "absolute_range" 66 | SIZE: (384, 600) 67 | FORMAT: "RGB" 68 | 69 | TEST: 70 | EVAL_PERIOD: 7393 71 | DATALOADER: 72 | FILTER_EMPTY_ANNOTATIONS: False 73 | NUM_WORKERS: 2 74 | VERSION: 2 75 | 76 | VIS_PERIOD: 100 77 | OUTPUT_DIR: "output/coco_smcadetr_origin" 78 | -------------------------------------------------------------------------------- /configs/coco/pvt_v2_b0.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YoloV7.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | LOAD_PROPOSALS: True 6 | BACKBONE: 7 | NAME: "build_resnet_fpn_backbone" 8 | RESNETS: 9 | DEPTH: 50 10 | OUT_FEATURES: ["res3", "res4", "res5"] # we might also want try res2, res3, res4, res5 11 | YOLO: 12 | ANCHORS: 13 | [ 14 | [[116, 90], [156, 198], [373, 326]], 15 | [[30, 61], [62, 45], [42, 119]], 16 | [[10, 13], [16, 30], [33, 23]], 17 | ] 18 | CLASSES: 80 19 | IN_FEATURES: ["res3", "res4", "res5"] 20 | CONF_THRESHOLD: 0.01 21 | NMS_THRESHOLD: 0.5 22 | IGNORE_THRESHOLD: 0.7 23 | VARIANT: "yolov7" 24 | DATASETS: 25 | TRAIN: ("coco_2017_train",) 26 | TEST: ("coco_2017_val",) 27 | 28 | SOLVER: 29 | IMS_PER_BATCH: 16 30 | BASE_LR: 0.02 31 | STEPS: (60000, 80000) 32 | MAX_ITER: 90000 33 | 34 | DATALOADER: 35 | # proposals are part of the dataset_dicts, and take a lot of RAM 36 | NUM_WORKERS: 2 -------------------------------------------------------------------------------- /configs/coco/r2_50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_res2net_backbone" 8 | RESNETS: 9 | R2TYPE: "res2net50_v1b" 10 | DEPTH: 50 11 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 12 | YOLO: 13 | ANCHORS: 14 | [ 15 | [[116, 90], [156, 198], [373, 326]], 16 | [[30, 61], [62, 45], [42, 119]], 17 | [[10, 13], [16, 30], [33, 23]], 18 | ] 19 | CLASSES: 80 20 | IN_FEATURES: ["res3", "res4", "res5"] # p3, p4, p5 from FPN 21 | CONF_THRESHOLD: 0.001 22 | NMS_THRESHOLD: 0.65 23 | IGNORE_THRESHOLD: 0.7 24 | VARIANT: "yolov7" 25 | LOSS_TYPE: "v7" 26 | WIDTH_MUL: 1.0 27 | # WIDTH_MUL: 1. 28 | LOSS: 29 | LAMBDA_IOU: 1.0 30 | NECK: 31 | TYPE: "fpn" 32 | WITH_SPP: true 33 | 34 | DATASETS: 35 | TRAIN: ("coco_2017_train",) 36 | # TEST: ("coco_2014_val_mini",) 37 | TEST: ("coco_2017_val",) 38 | 39 | 40 | INPUT: 41 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 42 | # MAX_SIZE_TRAIN: 1056 # force max size train to 800? 43 | MAX_SIZE_TRAIN: 800 # force max size train to 800? 44 | MIN_SIZE_TEST: 640 45 | MAX_SIZE_TEST: 800 46 | # open all augmentations 47 | JITTER_CROP: 48 | ENABLED: False 49 | RESIZE: 50 | ENABLED: False 51 | # SHAPE: (540, 960) 52 | DISTORTION: 53 | ENABLED: False 54 | # MOSAIC: 55 | # ENABLED: True 56 | # NUM_IMAGES: 4 57 | # DEBUG_VIS: True 58 | # # MOSAIC_WIDTH: 960 59 | # # MOSAIC_HEIGHT: 540 60 | MOSAIC_AND_MIXUP: 61 | ENABLED: True 62 | DEBUG_VIS: False 63 | ENABLE_MIXUP: True 64 | DISABLE_AT_ITER: 120000 65 | 66 | 67 | SOLVER: 68 | AMP: 69 | ENABLED: true 70 | # IMS_PER_BATCH: 32 # 1/5 bs than YOLOX 71 | IMS_PER_BATCH: 32 # 1/5 bs than YOLOX 72 | # it can be 0.016 maybe 73 | BASE_LR: 0.002 74 | STEPS: (60000, 80000) 75 | WARMUP_FACTOR: 0.00033333 76 | WARMUP_ITERS: 1500 77 | MAX_ITER: 210000 78 | LR_SCHEDULER_NAME: "WarmupCosineLR" 79 | 80 | 81 | TEST: 82 | EVAL_PERIOD: 10000 83 | # EVAL_PERIOD: 0 84 | OUTPUT_DIR: "output/coco_r2_50" 85 | 86 | DATALOADER: 87 | # proposals are part of the dataset_dicts, and take a lot of RAM 88 | NUM_WORKERS: 1 89 | -------------------------------------------------------------------------------- /configs/coco/r2_50_l.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_res2net_backbone" 8 | RESNETS: 9 | R2TYPE: "res2net50_v1b" 10 | DEPTH: 50 11 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 12 | YOLO: 13 | ANCHORS: 14 | [ 15 | [[116, 90], [156, 198], [373, 326]], 16 | [[30, 61], [62, 45], [42, 119]], 17 | [[10, 13], [16, 30], [33, 23]], 18 | ] 19 | CLASSES: 80 20 | IN_FEATURES: ["res3", "res4", "res5"] # p3, p4, p5 from FPN 21 | CONF_THRESHOLD: 0.001 22 | NMS_THRESHOLD: 0.65 23 | IGNORE_THRESHOLD: 0.7 24 | VARIANT: "yolov7" 25 | LOSS_TYPE: "v7" 26 | WIDTH_MUL: 2.0 # for FPN neck 27 | LOSS: 28 | LAMBDA_IOU: 1.1 29 | NECK: 30 | TYPE: "fpn" 31 | WITH_SPP: true 32 | 33 | DATASETS: 34 | TRAIN: ("coco_2017_train",) 35 | # TEST: ("coco_2014_val_mini",) 36 | TEST: ("coco_2017_val",) 37 | 38 | 39 | INPUT: 40 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 41 | MAX_SIZE_TRAIN: 1056 42 | MIN_SIZE_TEST: 672 43 | MAX_SIZE_TEST: 768 44 | # open all augmentations 45 | JITTER_CROP: 46 | ENABLED: False 47 | RESIZE: 48 | ENABLED: False 49 | # SHAPE: (540, 960) 50 | DISTORTION: 51 | ENABLED: False 52 | # MOSAIC: 53 | # ENABLED: True 54 | # NUM_IMAGES: 4 55 | # DEBUG_VIS: True 56 | # # MOSAIC_WIDTH: 960 57 | # # MOSAIC_HEIGHT: 540 58 | MOSAIC_AND_MIXUP: 59 | ENABLED: True 60 | DEBUG_VIS: False 61 | ENABLE_MIXUP: False 62 | 63 | 64 | SOLVER: 65 | AMP: 66 | ENABLED: true 67 | IMS_PER_BATCH: 32 # 1/5 bs than YOLOX 68 | # it can be 0.016 maybe 69 | BASE_LR: 0.0018 70 | STEPS: (60000, 80000) 71 | WARMUP_FACTOR: 0.00033333 72 | WARMUP_ITERS: 1500 73 | MAX_ITER: 190000 74 | LR_SCHEDULER_NAME: "WarmupCosineLR" 75 | 76 | 77 | TEST: 78 | EVAL_PERIOD: 10000 79 | # EVAL_PERIOD: 0 80 | OUTPUT_DIR: "output/coco_r2_50_l" 81 | 82 | DATALOADER: 83 | # proposals are part of the dataset_dicts, and take a lot of RAM 84 | NUM_WORKERS: 1 85 | -------------------------------------------------------------------------------- /configs/coco/r2next_50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_res2net_backbone" 8 | RESNETS: 9 | R2TYPE: "res2next50" 10 | DEPTH: 50 11 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 12 | YOLO: 13 | ANCHORS: 14 | [ 15 | [[116, 90], [156, 198], [373, 326]], 16 | [[30, 61], [62, 45], [42, 119]], 17 | [[10, 13], [16, 30], [33, 23]], 18 | ] 19 | CLASSES: 80 20 | IN_FEATURES: ["res3", "res4", "res5"] # p3, p4, p5 from FPN 21 | CONF_THRESHOLD: 0.001 22 | NMS_THRESHOLD: 0.65 23 | IGNORE_THRESHOLD: 0.7 24 | VARIANT: "yolov7" 25 | LOSS_TYPE: "v7" 26 | LOSS: 27 | LAMBDA_IOU: 1.1 28 | NECK: 29 | TYPE: "yolov3" 30 | WITH_SPP: True 31 | 32 | DATASETS: 33 | TRAIN: ("coco_2017_train",) 34 | # TEST: ("coco_2014_val_mini",) 35 | TEST: ("coco_2017_val",) 36 | 37 | 38 | INPUT: 39 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 40 | MAX_SIZE_TRAIN: 1056 41 | MIN_SIZE_TEST: 672 42 | MAX_SIZE_TEST: 768 43 | # open all augmentations 44 | JITTER_CROP: 45 | ENABLED: False 46 | RESIZE: 47 | ENABLED: False 48 | # SHAPE: (540, 960) 49 | DISTORTION: 50 | ENABLED: False 51 | # MOSAIC: 52 | # ENABLED: True 53 | # NUM_IMAGES: 4 54 | # DEBUG_VIS: True 55 | # # MOSAIC_WIDTH: 960 56 | # # MOSAIC_HEIGHT: 540 57 | MOSAIC_AND_MIXUP: 58 | ENABLED: True 59 | DEBUG_VIS: False 60 | ENABLE_MIXUP: False 61 | 62 | 63 | SOLVER: 64 | AMP: 65 | ENABLED: true 66 | IMS_PER_BATCH: 32 # 1/5 bs than YOLOX 67 | # it can be 0.016 maybe 68 | BASE_LR: 0.0016 69 | STEPS: (60000, 80000) 70 | WARMUP_FACTOR: 0.00033333 71 | WARMUP_ITERS: 1500 72 | MAX_ITER: 150000 73 | LR_SCHEDULER_NAME: "WarmupCosineLR" 74 | 75 | 76 | TEST: 77 | EVAL_PERIOD: 10000 78 | # EVAL_PERIOD: 0 79 | OUTPUT_DIR: "output/coco_r2next_50" 80 | 81 | DATALOADER: 82 | # proposals are part of the dataset_dicts, and take a lot of RAM 83 | NUM_WORKERS: 1 84 | -------------------------------------------------------------------------------- /configs/coco/r50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7P" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_resnet_fpn_backbone" 8 | RESNETS: 9 | DEPTH: 50 10 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 11 | FPN: 12 | IN_FEATURES: ["res3", "res4", "res5"] 13 | YOLO: 14 | ANCHORS: 15 | [ 16 | [[116, 90], [156, 198], [373, 326]], 17 | [[30, 61], [62, 45], [42, 119]], 18 | [[10, 13], [16, 30], [33, 23]], 19 | ] 20 | CLASSES: 80 21 | IN_FEATURES: ["p3", "p4", "p5"] # p3, p4, p5 from FPN 22 | CONF_THRESHOLD: 0.01 23 | NMS_THRESHOLD: 0.5 24 | IGNORE_THRESHOLD: 0.1 25 | VARIANT: "yolov7" 26 | LOSS_TYPE: "v4" 27 | DATASETS: 28 | TRAIN: ("coco_2017_train",) 29 | TEST: ("coco_2017_val",) 30 | 31 | SOLVER: 32 | IMS_PER_BATCH: 48 33 | BASE_LR: 0.0009 34 | STEPS: (70000, 90000) 35 | MAX_ITER: 100000 36 | 37 | DATALOADER: 38 | # proposals are part of the dataset_dicts, and take a lot of RAM 39 | NUM_WORKERS: 2 40 | -------------------------------------------------------------------------------- /configs/coco/regnetx_0.4g.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_regnet_backbone" 8 | REGNETS: 9 | TYPE: "RegNetX_400MF" 10 | OUT_FEATURES: ["s2", "s3", "s4"] # fpn produce 4 levels, only using 3 for now 11 | 12 | YOLO: 13 | ANCHORS: 14 | [ 15 | [[116, 90], [156, 198], [373, 326]], 16 | [[30, 61], [62, 45], [42, 119]], 17 | [[10, 13], [16, 30], [33, 23]], 18 | ] 19 | CLASSES: 80 20 | IN_FEATURES: ["s2", "s3", "s4"] # p3, p4, p5 from FPN 21 | CONF_THRESHOLD: 0.001 22 | NMS_THRESHOLD: 0.65 23 | IGNORE_THRESHOLD: 0.7 24 | VARIANT: "yolov7" 25 | WIDTH_MUL: 1.0 # for FPN neck 26 | # WIDTH_MUL: 0.5 # for FPN neck 27 | LOSS_TYPE: "v7" 28 | LOSS: 29 | LAMBDA_IOU: 1.1 30 | NECK: 31 | TYPE: "fpn" 32 | WITH_SPP: True 33 | 34 | DATASETS: 35 | TRAIN: ("coco_2017_train",) 36 | # TEST: ("coco_2014_val_mini",) 37 | TEST: ("coco_2017_val",) 38 | 39 | 40 | INPUT: 41 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 42 | MAX_SIZE_TRAIN: 800 # force max size train to 800? 43 | MIN_SIZE_TEST: 640 44 | MAX_SIZE_TEST: 800 45 | # open all augmentations 46 | JITTER_CROP: 47 | ENABLED: False 48 | RESIZE: 49 | ENABLED: False 50 | # SHAPE: (540, 960) 51 | DISTORTION: 52 | ENABLED: False 53 | # MOSAIC: 54 | # ENABLED: True 55 | # NUM_IMAGES: 4 56 | # DEBUG_VIS: True 57 | # # MOSAIC_WIDTH: 960 58 | # # MOSAIC_HEIGHT: 540 59 | MOSAIC_AND_MIXUP: 60 | ENABLED: True 61 | DEBUG_VIS: False 62 | ENABLE_MIXUP: True 63 | DISABLE_AT_ITER: 120000 64 | 65 | 66 | SOLVER: 67 | AMP: 68 | ENABLED: true 69 | IMS_PER_BATCH: 64 # 1/5 bs than YOLOX 70 | # IMS_PER_BATCH: 5 # 1/5 bs than YOLOX 71 | # it can be 0.016 maybe 72 | BASE_LR: 0.002 73 | STEPS: (60000, 80000) 74 | WARMUP_FACTOR: 0.00033333 75 | WARMUP_ITERS: 1500 76 | MAX_ITER: 190000 77 | LR_SCHEDULER_NAME: "WarmupCosineLR" 78 | 79 | 80 | TEST: 81 | EVAL_PERIOD: 10000 82 | # EVAL_PERIOD: 0 83 | OUTPUT_DIR: "output/coco_regnetx_0.4g" 84 | 85 | DATALOADER: 86 | # proposals are part of the dataset_dicts, and take a lot of RAM 87 | NUM_WORKERS: 1 88 | -------------------------------------------------------------------------------- /configs/coco/sparseinst/Base-SparseInst.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "SparseInst" 3 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 4 | PIXEL_MEAN: [123.675, 116.280, 103.530] 5 | PIXEL_STD: [58.395, 57.120, 57.375] 6 | BACKBONE: 7 | FREEZE_AT: 0 8 | NAME: "build_resnet_backbone" 9 | RESNETS: 10 | NORM: "FrozenBN" 11 | DEPTH: 50 12 | STRIDE_IN_1X1: False 13 | OUT_FEATURES: ["res3", "res4", "res5"] 14 | SPARSE_INST: 15 | ENCODER: 16 | NAME: "InstanceContextEncoder" 17 | DECODER: 18 | NAME: "GroupIAMDecoder" 19 | DATASETS: 20 | TRAIN: ("coco_2017_train",) 21 | TEST: ("coco_2017_val",) 22 | SOLVER: 23 | IMS_PER_BATCH: 64 24 | BASE_LR: 0.00005 25 | STEPS: (210000, 250000) 26 | MAX_ITER: 270000 27 | WEIGHT_DECAY: 0.05 28 | INPUT: 29 | MIN_SIZE_TRAIN: (416, 448, 480, 512, 544, 576, 608, 640) 30 | MAX_SIZE_TRAIN: 853 31 | MIN_SIZE_TEST: 640 32 | MAX_SIZE_TEST: 853 33 | FORMAT: "RGB" 34 | MASK_FORMAT: "bitmask" 35 | TEST: 36 | EVAL_PERIOD: 7330 37 | DATALOADER: 38 | NUM_WORKERS: 6 39 | VERSION: 2 40 | -------------------------------------------------------------------------------- /configs/coco/sparseinst/sparse_inst_r50_base.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-SparseInst.yaml" 2 | MODEL: 3 | SPARSE_INST: 4 | DECODER: 5 | NAME: "BaseIAMDecoder" 6 | OUTPUT_DIR: "output/sparse_inst_r50_base" -------------------------------------------------------------------------------- /configs/coco/sparseinst/sparse_inst_r50_dcn_giam_aug.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-SparseInst.yaml" 2 | MODEL: 3 | RESNETS: 4 | DEFORM_ON_PER_STAGE: [False, False, True, True] # dcn on res4, res5 5 | INPUT: 6 | CROP: 7 | ENABLED: True 8 | TYPE: "absolute_range" 9 | SIZE: (384, 600) 10 | MASK_FORMAT: "polygon" 11 | OUTPUT_DIR: "output/sparse_inst_r50_dcn_giam_aug" -------------------------------------------------------------------------------- /configs/coco/sparseinst/sparse_inst_r50_giam.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-SparseInst.yaml" 2 | OUTPUT_DIR: "output/sparse_inst_r50_giam" -------------------------------------------------------------------------------- /configs/coco/sparseinst/sparse_inst_r50_giam_aug.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-SparseInst.yaml" 2 | INPUT: 3 | CROP: 4 | ENABLED: True 5 | TYPE: "absolute_range" 6 | SIZE: (384, 600) 7 | MASK_FORMAT: "polygon" 8 | OUTPUT_DIR: "output/sparse_inst_r50_giam_aug" -------------------------------------------------------------------------------- /configs/coco/sparseinst/sparse_inst_r50vd_base.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-SparseInst.yaml" 2 | MODEL: 3 | MODEL: 4 | WEIGHTS: "../../pretrained_models/resnet50d_ra2-464e36ba.pth" 5 | BACKBONE: 6 | FREEZE_AT: 0 7 | NAME: "build_resnet_vd_backbone" 8 | SPARSE_INST: 9 | DECODER: 10 | NAME: "BaseIAMDecoder" 11 | OUTPUT_DIR: "output/sparse_inst_r50_base" -------------------------------------------------------------------------------- /configs/coco/sparseinst/sparse_inst_r50vd_dcn_giam.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-SparseInst.yaml" 2 | MODEL: 3 | BACKBONE: 4 | FREEZE_AT: 0 5 | NAME: "build_resnet_vd_backbone" 6 | RESNETS: 7 | DEFORM_ON_PER_STAGE: [False, False, True, True] # dcn on res4, res5 8 | OUTPUT_DIR: "output/sparse_inst_r50vd_dcn_giam" 9 | 10 | -------------------------------------------------------------------------------- /configs/coco/sparseinst/sparse_inst_r50vd_dcn_giam_aug.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-SparseInst.yaml" 2 | MODEL: 3 | BACKBONE: 4 | FREEZE_AT: 0 5 | NAME: "build_resnet_vd_backbone" 6 | RESNETS: 7 | DEFORM_ON_PER_STAGE: [False, False, True, True] # dcn on res4, res5 8 | INPUT: 9 | CROP: 10 | ENABLED: True 11 | TYPE: "absolute_range" 12 | SIZE: (384, 600) 13 | MASK_FORMAT: "polygon" 14 | OUTPUT_DIR: "output/sparse_inst_r50vd_dcn_giam_aug" 15 | 16 | -------------------------------------------------------------------------------- /configs/coco/sparseinst/sparse_inst_r50vd_giam.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-SparseInst.yaml" 2 | MODEL: 3 | WEIGHTS: "../../pretrained_models/resnet50d_ra2-464e36ba.pth" 4 | BACKBONE: 5 | FREEZE_AT: 0 6 | NAME: "build_resnet_vd_backbone" 7 | OUTPUT_DIR: "output/sparse_inst_r50vd_giam" 8 | -------------------------------------------------------------------------------- /configs/coco/sparseinst/sparse_inst_r50vd_giam_aug.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-SparseInst.yaml" 2 | MODEL: 3 | BACKBONE: 4 | FREEZE_AT: 0 5 | NAME: "build_resnet_vd_backbone" 6 | INPUT: 7 | CROP: 8 | ENABLED: True 9 | TYPE: "absolute_range" 10 | SIZE: (384, 600) 11 | MASK_FORMAT: "polygon" 12 | OUTPUT_DIR: "output/sparse_inst_r50vd_giam_aug" 13 | -------------------------------------------------------------------------------- /configs/coco/swin_s.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | META_ARCHITECTURE: "YOLOV7" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_swin_transformer_backbone" 8 | SWIN: 9 | TYPE: "small" 10 | WEIGHTS: "weights/swin_small_patch4_window7_224.pth" # manually download weights from https://github.com/microsoft/Swin-Transformer 11 | OUT_FEATURES: [1, 2, 3] # we might also want try res2, res3, res4, res5 12 | 13 | YOLO: 14 | ANCHORS: 15 | [ 16 | [[116, 90], [156, 198], [373, 326]], 17 | [[30, 61], [62, 45], [42, 119]], 18 | [[10, 13], [16, 30], [33, 23]], 19 | ] 20 | CLASSES: 80 21 | IN_FEATURES: [1, 2, 3] 22 | CONF_THRESHOLD: 0.01 23 | NMS_THRESHOLD: 0.5 24 | IGNORE_THRESHOLD: 0.7 25 | VARIANT: "yolov7" 26 | LOSS_TYPE: "v7" 27 | LOSS: 28 | LAMBDA_IOU: 1.1 29 | NECK: 30 | TYPE: "yolov3" 31 | WITH_SPP: True 32 | 33 | DATASETS: 34 | TRAIN: ("coco_2017_train",) 35 | TEST: ("coco_2017_val",) 36 | 37 | INPUT: 38 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 39 | MAX_SIZE_TRAIN: 1056 40 | MIN_SIZE_TEST: 672 41 | MAX_SIZE_TEST: 800 42 | # open all augmentations 43 | JITTER_CROP: 44 | ENABLED: False 45 | RESIZE: 46 | ENABLED: False 47 | # SHAPE: (540, 960) 48 | DISTORTION: 49 | ENABLED: False 50 | # MOSAIC: 51 | # ENABLED: True 52 | # NUM_IMAGES: 4 53 | # DEBUG_VIS: True 54 | # # MOSAIC_WIDTH: 960 55 | # # MOSAIC_HEIGHT: 540 56 | MOSAIC_AND_MIXUP: 57 | ENABLED: True 58 | DEBUG_VIS: False 59 | ENABLE_MIXUP: False 60 | 61 | SOLVER: 62 | AMP: 63 | ENABLED: true 64 | IMS_PER_BATCH: 24 # 1/5 bs than YOLOX 65 | # it can be 0.016 maybe 66 | BASE_LR: 0.003 67 | STEPS: (60000, 80000) 68 | WARMUP_FACTOR: 0.00033333 69 | WARMUP_ITERS: 1500 70 | MAX_ITER: 190000 71 | LR_SCHEDULER_NAME: "WarmupCosineLR" 72 | 73 | TEST: 74 | EVAL_PERIOD: 10000 75 | # EVAL_PERIOD: 0 76 | OUTPUT_DIR: "output/coco_swin_s" 77 | 78 | DATALOADER: 79 | # proposals are part of the dataset_dicts, and take a lot of RAM 80 | NUM_WORKERS: 2 81 | -------------------------------------------------------------------------------- /configs/coco/swin_t.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | META_ARCHITECTURE: "YOLOV7" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_swin_transformer_backbone" 8 | SWIN: 9 | TYPE: "tiny" 10 | WEIGHTS: "weights/swin_tiny_patch4_window7_224.pth" 11 | OUT_FEATURES: [1, 2, 3] # we might also want try res2, res3, res4, res5 12 | 13 | RESNETS: 14 | DEPTH: 50 15 | 16 | YOLO: 17 | ANCHORS: 18 | [ 19 | [[116, 90], [156, 198], [373, 326]], 20 | [[30, 61], [62, 45], [42, 119]], 21 | [[10, 13], [16, 30], [33, 23]], 22 | ] 23 | CLASSES: 80 24 | IN_FEATURES: [1, 2, 3] 25 | CONF_THRESHOLD: 0.01 26 | NMS_THRESHOLD: 0.5 27 | IGNORE_THRESHOLD: 0.7 28 | VARIANT: "yolov7" 29 | LOSS_TYPE: "v7" 30 | LOSS: 31 | LAMBDA_IOU: 1.1 32 | NECK: 33 | TYPE: "yolov3" 34 | WITH_SPP: True 35 | 36 | DATASETS: 37 | TRAIN: ("coco_2017_train",) 38 | TEST: ("coco_2017_val",) 39 | 40 | INPUT: 41 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 42 | MAX_SIZE_TRAIN: 1056 43 | MIN_SIZE_TEST: 672 44 | MAX_SIZE_TEST: 800 45 | # open all augmentations 46 | JITTER_CROP: 47 | ENABLED: False 48 | RESIZE: 49 | ENABLED: False 50 | # SHAPE: (540, 960) 51 | DISTORTION: 52 | ENABLED: False 53 | # MOSAIC: 54 | # ENABLED: True 55 | # NUM_IMAGES: 4 56 | # DEBUG_VIS: True 57 | # # MOSAIC_WIDTH: 960 58 | # # MOSAIC_HEIGHT: 540 59 | MOSAIC_AND_MIXUP: 60 | ENABLED: True 61 | DEBUG_VIS: False 62 | ENABLE_MIXUP: False 63 | 64 | 65 | SOLVER: 66 | AMP: 67 | ENABLED: true 68 | IMS_PER_BATCH: 32 # 1/5 bs than YOLOX 69 | # it can be 0.016 maybe 70 | BASE_LR: 0.003 71 | STEPS: (60000, 80000) 72 | WARMUP_FACTOR: 0.00033333 73 | WARMUP_ITERS: 1500 74 | MAX_ITER: 190000 75 | LR_SCHEDULER_NAME: "WarmupCosineLR" 76 | 77 | 78 | TEST: 79 | EVAL_PERIOD: 10000 80 | # EVAL_PERIOD: 0 81 | OUTPUT_DIR: "output/coco_swin_t" 82 | 83 | DATALOADER: 84 | # proposals are part of the dataset_dicts, and take a lot of RAM 85 | NUM_WORKERS: 2 -------------------------------------------------------------------------------- /configs/coco/yolof/yolof_CSP_D_53_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-YOLOF.yaml" 2 | MODEL: 3 | WEIGHTS: "./pretrained_models/cspdarknet53.pth" 4 | META_ARCHITECTURE: "YOLOF" 5 | BACKBONE: 6 | NAME: "build_darknet_backbone" 7 | DARKNET: 8 | NORM: "SyncBN" 9 | RES5_DILATION: 2 10 | ANCHOR_GENERATOR: 11 | SIZES: [[16, 32, 64, 128, 256, 512]] 12 | YOLOF: 13 | ENCODER: 14 | IN_CHANNELS: 1024 15 | NUM_RESIDUAL_BLOCKS: 8 16 | BLOCK_DILATIONS: [1, 2, 3, 4, 5, 6, 7, 8] 17 | NORM: "SyncBN" 18 | ACTIVATION: "LeakyReLU" 19 | DECODER: 20 | NUM_ANCHORS: 6 21 | NORM: "SyncBN" 22 | ACTIVATION: "LeakyReLU" 23 | POS_IGNORE_THRESHOLD: 0.1 24 | SOLVER: 25 | BASE_LR: 0.04 26 | BACKBONE_MULTIPLIER: 1.0 27 | STEPS: (52500, 62500) 28 | MAX_ITER: 67500 29 | CHECKPOINT_PERIOD: 5000 30 | INPUT: 31 | JITTER_CROP: 32 | ENABLED: True 33 | RESIZE: 34 | ENABLED: True 35 | DISTORTION: 36 | ENABLED: True 37 | MOSAIC: 38 | ENABLED: True 39 | OUTPUT_DIR: "output/yolof/CSP_D_53_DC5_3x" 40 | -------------------------------------------------------------------------------- /configs/coco/yolof/yolof_R_50_DC5_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "yolof_R_50_C5_1x.yaml" 2 | MODEL: 3 | RESNETS: 4 | RES5_DILATION: 2 5 | ANCHOR_GENERATOR: 6 | SIZES: [[16, 32, 64, 128, 256, 512]] 7 | YOLOF: 8 | ENCODER: 9 | BLOCK_DILATIONS: [4, 8, 12, 16] 10 | DECODER: 11 | NUM_ANCHORS: 6 12 | MATCHER: 13 | TOPK: 8 14 | POS_IGNORE_THRESHOLD: 0.1 15 | OUTPUT_DIR: "output/yolof/R_50_DC5_1x" 16 | -------------------------------------------------------------------------------- /configs/coco/yolov6/yolov6_m.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-YOLOv7.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [0.485, 0.456, 0.406] # same value as PP-YOLOv2, RGB order 4 | PIXEL_STD: [0.229, 0.224, 0.225] 5 | 6 | WEIGHTS: "" 7 | MASK_ON: False 8 | META_ARCHITECTURE: "YOLOV6" 9 | BACKBONE: 10 | NAME: "build_efficientrep_backbone" 11 | OUT_FEATURES: ["stride8", "stride16", "stride32"] 12 | 13 | YOLO: 14 | CLASSES: 80 15 | IN_FEATURES: ["stride8", "stride16", "stride32"] 16 | CONF_THRESHOLD: 0.001 17 | NMS_THRESHOLD: 0.65 18 | IGNORE_THRESHOLD: 0.7 19 | WIDTH_MUL: 1. 20 | DEPTH_MUL: 1. 21 | LOSS: 22 | LAMBDA_IOU: 1.5 23 | NECK: 24 | TYPE: "reppan" 25 | 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | # TEST: ("coco_2014_val_mini",) 29 | TEST: ("coco_2017_val",) 30 | 31 | INPUT: 32 | # FORMAT: "RGB" # using BGR default 33 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 34 | MAX_SIZE_TRAIN: 800 # force max size train to 800? 35 | MIN_SIZE_TEST: 640 36 | MAX_SIZE_TEST: 800 37 | # open all augmentations 38 | JITTER_CROP: 39 | ENABLED: False 40 | RESIZE: 41 | ENABLED: False 42 | # SHAPE: (540, 960) 43 | DISTORTION: 44 | ENABLED: True 45 | COLOR_JITTER: 46 | BRIGHTNESS: True 47 | SATURATION: True 48 | # MOSAIC: 49 | # ENABLED: True 50 | # NUM_IMAGES: 4 51 | # DEBUG_VIS: True 52 | # # MOSAIC_WIDTH: 960 53 | # # MOSAIC_HEIGHT: 540 54 | MOSAIC_AND_MIXUP: 55 | ENABLED: True 56 | # ENABLED: False 57 | DEBUG_VIS: False 58 | ENABLE_MIXUP: False 59 | DISABLE_AT_ITER: 120000 60 | 61 | 62 | SOLVER: 63 | # enable fp16 training 64 | AMP: 65 | ENABLED: true 66 | IMS_PER_BATCH: 112 67 | BASE_LR: 0.027 68 | STEPS: (60000, 80000) 69 | WARMUP_FACTOR: 0.00033333 70 | WARMUP_ITERS: 1200 71 | MAX_ITER: 230000 72 | LR_SCHEDULER_NAME: "WarmupCosineLR" 73 | 74 | TEST: 75 | EVAL_PERIOD: 10000 76 | # EVAL_PERIOD: 0 77 | OUTPUT_DIR: "output/coco_yolox_s" 78 | VIS_PERIOD: 5000 79 | 80 | DATALOADER: 81 | # proposals are part of the dataset_dicts, and take a lot of RAM 82 | NUM_WORKERS: 3 83 | -------------------------------------------------------------------------------- /configs/coco/yolov6/yolov6_s.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-YOLOv7.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [0.485, 0.456, 0.406] # same value as PP-YOLOv2, RGB order 4 | PIXEL_STD: [0.229, 0.224, 0.225] 5 | 6 | WEIGHTS: "" 7 | MASK_ON: False 8 | META_ARCHITECTURE: "YOLOV6" 9 | BACKBONE: 10 | NAME: "build_efficientrep_backbone" 11 | OUT_FEATURES: ["stride8", "stride16", "stride32"] 12 | 13 | YOLO: 14 | CLASSES: 80 15 | IN_FEATURES: ["stride8", "stride16", "stride32"] 16 | CONF_THRESHOLD: 0.001 17 | NMS_THRESHOLD: 0.65 18 | IGNORE_THRESHOLD: 0.7 19 | WIDTH_MUL: 0.50 20 | DEPTH_MUL: 0.33 21 | LOSS: 22 | LAMBDA_IOU: 1.5 23 | HEAD: 24 | TYPE: "yolov6" 25 | NECK: 26 | TYPE: "reppan" 27 | 28 | DATASETS: 29 | TRAIN: ("coco_2017_train",) 30 | # TEST: ("coco_2014_val_mini",) 31 | TEST: ("coco_2017_val",) 32 | 33 | INPUT: 34 | # FORMAT: "RGB" # using BGR default 35 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 36 | MAX_SIZE_TRAIN: 800 # force max size train to 800? 37 | MIN_SIZE_TEST: 640 38 | MAX_SIZE_TEST: 800 39 | # open all augmentations 40 | JITTER_CROP: 41 | ENABLED: False 42 | RESIZE: 43 | ENABLED: False 44 | # SHAPE: (540, 960) 45 | DISTORTION: 46 | ENABLED: True 47 | COLOR_JITTER: 48 | BRIGHTNESS: True 49 | SATURATION: True 50 | # MOSAIC: 51 | # ENABLED: True 52 | # NUM_IMAGES: 4 53 | # DEBUG_VIS: True 54 | # # MOSAIC_WIDTH: 960 55 | # # MOSAIC_HEIGHT: 540 56 | MOSAIC_AND_MIXUP: 57 | ENABLED: True 58 | # ENABLED: False 59 | DEBUG_VIS: False 60 | ENABLE_MIXUP: False 61 | DISABLE_AT_ITER: 120000 62 | 63 | 64 | SOLVER: 65 | # enable fp16 training 66 | AMP: 67 | ENABLED: true 68 | IMS_PER_BATCH: 80 69 | BASE_LR: 0.02 70 | STEPS: (60000, 80000) 71 | WARMUP_FACTOR: 0.00033333 72 | WARMUP_ITERS: 1200 73 | MAX_ITER: 230000 74 | LR_SCHEDULER_NAME: "WarmupCosineLR" 75 | 76 | TEST: 77 | EVAL_PERIOD: 10000 78 | # EVAL_PERIOD: 0 79 | OUTPUT_DIR: "output/coco_yolox_s" 80 | VIS_PERIOD: 5000 81 | 82 | DATALOADER: 83 | # proposals are part of the dataset_dicts, and take a lot of RAM 84 | NUM_WORKERS: 3 85 | -------------------------------------------------------------------------------- /configs/coco/yolov6/yolov6_tiny.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-YOLOv7.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [0.485, 0.456, 0.406] # same value as PP-YOLOv2, RGB order 4 | PIXEL_STD: [0.229, 0.224, 0.225] 5 | 6 | WEIGHTS: "" 7 | MASK_ON: False 8 | META_ARCHITECTURE: "YOLOV6" 9 | BACKBONE: 10 | NAME: "build_efficientrep_backbone" 11 | OUT_FEATURES: ["stride8", "stride16", "stride32"] 12 | 13 | YOLO: 14 | CLASSES: 80 15 | IN_FEATURES: ["stride8", "stride16", "stride32"] 16 | CONF_THRESHOLD: 0.001 17 | NMS_THRESHOLD: 0.65 18 | IGNORE_THRESHOLD: 0.7 19 | WIDTH_MUL: 0.50 20 | DEPTH_MUL: 0.25 21 | LOSS: 22 | LAMBDA_IOU: 1.5 23 | NECK: 24 | TYPE: "reppan" 25 | 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | # TEST: ("coco_2014_val_mini",) 29 | TEST: ("coco_2017_val",) 30 | 31 | INPUT: 32 | # FORMAT: "RGB" # using BGR default 33 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 34 | MAX_SIZE_TRAIN: 800 # force max size train to 800? 35 | MIN_SIZE_TEST: 640 36 | MAX_SIZE_TEST: 800 37 | # open all augmentations 38 | JITTER_CROP: 39 | ENABLED: False 40 | RESIZE: 41 | ENABLED: False 42 | # SHAPE: (540, 960) 43 | DISTORTION: 44 | ENABLED: True 45 | COLOR_JITTER: 46 | BRIGHTNESS: True 47 | SATURATION: True 48 | # MOSAIC: 49 | # ENABLED: True 50 | # NUM_IMAGES: 4 51 | # DEBUG_VIS: True 52 | # # MOSAIC_WIDTH: 960 53 | # # MOSAIC_HEIGHT: 540 54 | MOSAIC_AND_MIXUP: 55 | ENABLED: True 56 | # ENABLED: False 57 | DEBUG_VIS: False 58 | ENABLE_MIXUP: False 59 | DISABLE_AT_ITER: 120000 60 | 61 | 62 | SOLVER: 63 | # enable fp16 training 64 | AMP: 65 | ENABLED: true 66 | IMS_PER_BATCH: 112 67 | BASE_LR: 0.027 68 | STEPS: (60000, 80000) 69 | WARMUP_FACTOR: 0.00033333 70 | WARMUP_ITERS: 1200 71 | MAX_ITER: 230000 72 | LR_SCHEDULER_NAME: "WarmupCosineLR" 73 | 74 | TEST: 75 | EVAL_PERIOD: 10000 76 | # EVAL_PERIOD: 0 77 | OUTPUT_DIR: "output/coco_yolox_s" 78 | VIS_PERIOD: 5000 79 | 80 | DATALOADER: 81 | # proposals are part of the dataset_dicts, and take a lot of RAM 82 | NUM_WORKERS: 3 83 | -------------------------------------------------------------------------------- /configs/coco/yolox/yolox_convnext.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-YOLOv7.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [0.485, 0.456, 0.406] # same value as PP-YOLOv2, RGB order 4 | PIXEL_STD: [0.229, 0.224, 0.225] 5 | 6 | WEIGHTS: "" 7 | MASK_ON: False 8 | META_ARCHITECTURE: "YOLOX" 9 | BACKBONE: 10 | NAME: "build_convnext_backbone" 11 | 12 | DARKNET: 13 | WEIGHTS: "" 14 | DEPTH_WISE: False 15 | OUT_FEATURES: [0, 1, 2] 16 | 17 | YOLO: 18 | CLASSES: 80 19 | # IN_FEATURES: ["dark3", "dark4", "dark5"] 20 | IN_FEATURES: [0, 1, 2] 21 | CONF_THRESHOLD: 0.001 22 | NMS_THRESHOLD: 0.65 23 | IGNORE_THRESHOLD: 0.7 24 | WIDTH_MUL: 0.50 25 | DEPTH_MUL: 0.33 26 | LOSS_TYPE: "v7" 27 | LOSS: 28 | LAMBDA_IOU: 1.5 29 | 30 | DATASETS: 31 | TRAIN: ("coco_2017_train",) 32 | # TEST: ("coco_2014_val_mini",) 33 | TEST: ("coco_2017_val",) 34 | 35 | INPUT: 36 | # FORMAT: "RGB" # using BGR default 37 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 38 | MAX_SIZE_TRAIN: 800 # force max size train to 800? 39 | MIN_SIZE_TEST: 640 40 | MAX_SIZE_TEST: 800 41 | # open all augmentations 42 | JITTER_CROP: 43 | ENABLED: False 44 | RESIZE: 45 | ENABLED: False 46 | # SHAPE: (540, 960) 47 | DISTORTION: 48 | ENABLED: True 49 | COLOR_JITTER: 50 | BRIGHTNESS: True 51 | SATURATION: True 52 | # MOSAIC: 53 | # ENABLED: True 54 | # NUM_IMAGES: 4 55 | # DEBUG_VIS: True 56 | # # MOSAIC_WIDTH: 960 57 | # # MOSAIC_HEIGHT: 540 58 | MOSAIC_AND_MIXUP: 59 | ENABLED: True 60 | # ENABLED: False 61 | DEBUG_VIS: False 62 | ENABLE_MIXUP: False 63 | DISABLE_AT_ITER: 120000 64 | 65 | 66 | SOLVER: 67 | # enable fp16 training 68 | AMP: 69 | ENABLED: true 70 | IMS_PER_BATCH: 112 71 | BASE_LR: 0.027 72 | STEPS: (60000, 80000) 73 | WARMUP_FACTOR: 0.00033333 74 | WARMUP_ITERS: 1200 75 | MAX_ITER: 230000 76 | LR_SCHEDULER_NAME: "WarmupCosineLR" 77 | 78 | TEST: 79 | EVAL_PERIOD: 10000 80 | # EVAL_PERIOD: 0 81 | OUTPUT_DIR: "output/coco_yolox_s" 82 | VIS_PERIOD: 5000 83 | 84 | DATALOADER: 85 | # proposals are part of the dataset_dicts, and take a lot of RAM 86 | NUM_WORKERS: 3 87 | -------------------------------------------------------------------------------- /configs/coco/yolox_regnetx_s.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [0.485, 0.456, 0.406] # same value as PP-YOLOv2, RGB order 4 | PIXEL_STD: [0.229, 0.224, 0.225] 5 | 6 | WEIGHTS: "" 7 | MASK_ON: False 8 | META_ARCHITECTURE: "YOLOX" 9 | BACKBONE: 10 | NAME: "build_regnet_backbone" 11 | SIMPLE: true 12 | STRIDE: 32 13 | CHANNEL: 384 14 | REGNETS: 15 | TYPE: "RegNetX_400MF" 16 | OUT_FEATURES: ["s2", "s3", "s4"] # fpn produce 4 levels, only using 3 for now 17 | 18 | YOLO: 19 | CLASSES: 80 20 | IN_FEATURES: ["s2", "s3", "s4"] 21 | CONF_THRESHOLD: 0.001 22 | NMS_THRESHOLD: 0.65 23 | IGNORE_THRESHOLD: 0.7 24 | WIDTH_MUL: 0.50 25 | DEPTH_MUL: 0.33 26 | LOSS_TYPE: "v7" 27 | LOSS: 28 | LAMBDA_IOU: 1.5 29 | 30 | DATASETS: 31 | TRAIN: ("coco_2017_train",) 32 | # TEST: ("coco_2014_val_mini",) 33 | TEST: ("coco_2017_val",) 34 | 35 | INPUT: 36 | # FORMAT: "RGB" # using BGR default 37 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 38 | MAX_SIZE_TRAIN: 800 # force max size train to 800? 39 | MIN_SIZE_TEST: 640 40 | MAX_SIZE_TEST: 800 41 | # open all augmentations 42 | JITTER_CROP: 43 | ENABLED: False 44 | RESIZE: 45 | ENABLED: False 46 | # SHAPE: (540, 960) 47 | DISTORTION: 48 | ENABLED: True 49 | COLOR_JITTER: 50 | BRIGHTNESS: True 51 | SATURATION: True 52 | # MOSAIC: 53 | # ENABLED: True 54 | # NUM_IMAGES: 4 55 | # DEBUG_VIS: True 56 | # # MOSAIC_WIDTH: 960 57 | # # MOSAIC_HEIGHT: 540 58 | MOSAIC_AND_MIXUP: 59 | ENABLED: True 60 | # ENABLED: False 61 | DEBUG_VIS: False 62 | ENABLE_MIXUP: False 63 | DISABLE_AT_ITER: 120000 64 | 65 | 66 | SOLVER: 67 | # enable fp16 training 68 | AMP: 69 | ENABLED: true 70 | IMS_PER_BATCH: 112 71 | BASE_LR: 0.027 72 | STEPS: (60000, 80000) 73 | WARMUP_FACTOR: 0.00033333 74 | WARMUP_ITERS: 1200 75 | MAX_ITER: 230000 76 | LR_SCHEDULER_NAME: "WarmupCosineLR" 77 | 78 | TEST: 79 | EVAL_PERIOD: 10000 80 | # EVAL_PERIOD: 0 81 | OUTPUT_DIR: "output/coco_yolox_s" 82 | VIS_PERIOD: 5000 83 | 84 | DATALOADER: 85 | # proposals are part of the dataset_dicts, and take a lot of RAM 86 | NUM_WORKERS: 3 87 | -------------------------------------------------------------------------------- /configs/coco/yolox_s.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [0.485, 0.456, 0.406] # same value as PP-YOLOv2, RGB order 4 | PIXEL_STD: [0.229, 0.224, 0.225] 5 | 6 | WEIGHTS: "" 7 | MASK_ON: False 8 | META_ARCHITECTURE: "YOLOX" 9 | BACKBONE: 10 | NAME: "build_cspdarknetx_backbone" 11 | 12 | DARKNET: 13 | WEIGHTS: "" 14 | DEPTH_WISE: False 15 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 16 | 17 | YOLO: 18 | CLASSES: 80 19 | IN_FEATURES: ["dark3", "dark4", "dark5"] 20 | CONF_THRESHOLD: 0.001 21 | NMS_THRESHOLD: 0.65 22 | IGNORE_THRESHOLD: 0.7 23 | WIDTH_MUL: 0.50 24 | DEPTH_MUL: 0.33 25 | LOSS_TYPE: "v7" 26 | LOSS: 27 | LAMBDA_IOU: 1.5 28 | 29 | DATASETS: 30 | TRAIN: ("coco_2017_train",) 31 | # TEST: ("coco_2014_val_mini",) 32 | TEST: ("coco_2017_val",) 33 | 34 | INPUT: 35 | # FORMAT: "RGB" # using BGR default 36 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 37 | MAX_SIZE_TRAIN: 800 # force max size train to 800? 38 | MIN_SIZE_TEST: 640 39 | MAX_SIZE_TEST: 800 40 | # open all augmentations 41 | JITTER_CROP: 42 | ENABLED: False 43 | RESIZE: 44 | ENABLED: False 45 | # SHAPE: (540, 960) 46 | DISTORTION: 47 | ENABLED: True 48 | COLOR_JITTER: 49 | BRIGHTNESS: True 50 | SATURATION: True 51 | # MOSAIC: 52 | # ENABLED: True 53 | # NUM_IMAGES: 4 54 | # DEBUG_VIS: True 55 | # # MOSAIC_WIDTH: 960 56 | # # MOSAIC_HEIGHT: 540 57 | MOSAIC_AND_MIXUP: 58 | ENABLED: True 59 | # ENABLED: False 60 | DEBUG_VIS: False 61 | ENABLE_MIXUP: False 62 | DISABLE_AT_ITER: 120000 63 | 64 | 65 | SOLVER: 66 | # enable fp16 training 67 | AMP: 68 | ENABLED: true 69 | IMS_PER_BATCH: 112 70 | BASE_LR: 0.027 71 | STEPS: (60000, 80000) 72 | WARMUP_FACTOR: 0.00033333 73 | WARMUP_ITERS: 1200 74 | MAX_ITER: 230000 75 | LR_SCHEDULER_NAME: "WarmupCosineLR" 76 | 77 | TEST: 78 | EVAL_PERIOD: 10000 79 | # EVAL_PERIOD: 0 80 | OUTPUT_DIR: "output/coco_yolox_s" 81 | VIS_PERIOD: 5000 82 | 83 | DATALOADER: 84 | # proposals are part of the dataset_dicts, and take a lot of RAM 85 | NUM_WORKERS: 3 86 | -------------------------------------------------------------------------------- /configs/common/coco_schedule.py: -------------------------------------------------------------------------------- 1 | from fvcore.common.param_scheduler import MultiStepParamScheduler 2 | 3 | from detectron2.config import LazyCall as L 4 | from detectron2.solver import WarmupParamScheduler 5 | 6 | 7 | def default_X_scheduler(num_X): 8 | """ 9 | Returns the config for a default multi-step LR scheduler such as "1x", "3x", 10 | commonly referred to in papers, where every 1x has the total length of 1440k 11 | training images (~12 COCO epochs). LR is decayed twice at the end of training 12 | following the strategy defined in "Rethinking ImageNet Pretraining", Sec 4. 13 | 14 | Args: 15 | num_X: a positive real number 16 | 17 | Returns: 18 | DictConfig: configs that define the multiplier for LR during training 19 | """ 20 | # total number of iterations assuming 16 batch size, using 1440000/16=90000 21 | total_steps_16bs = num_X * 90000 22 | 23 | if num_X <= 2: 24 | scheduler = L(MultiStepParamScheduler)( 25 | values=[1.0, 0.1, 0.01], 26 | # note that scheduler is scale-invariant. This is equivalent to 27 | # milestones=[6, 8, 9] 28 | milestones=[60000, 80000, 90000], 29 | ) 30 | else: 31 | scheduler = L(MultiStepParamScheduler)( 32 | values=[1.0, 0.1, 0.01], 33 | milestones=[total_steps_16bs - 60000, total_steps_16bs - 20000, total_steps_16bs], 34 | ) 35 | return L(WarmupParamScheduler)( 36 | scheduler=scheduler, 37 | warmup_length=1000 / total_steps_16bs, 38 | warmup_method="linear", 39 | warmup_factor=0.001, 40 | ) 41 | 42 | 43 | lr_multiplier_1x = default_X_scheduler(1) 44 | lr_multiplier_2x = default_X_scheduler(2) 45 | lr_multiplier_3x = default_X_scheduler(3) 46 | lr_multiplier_6x = default_X_scheduler(6) 47 | lr_multiplier_9x = default_X_scheduler(9) 48 | -------------------------------------------------------------------------------- /configs/common/data/coco.py: -------------------------------------------------------------------------------- 1 | from omegaconf import OmegaConf 2 | 3 | import detectron2.data.transforms as T 4 | from detectron2.config import LazyCall as L 5 | from detectron2.data import ( 6 | DatasetMapper, 7 | build_detection_test_loader, 8 | build_detection_train_loader, 9 | get_detection_dataset_dicts, 10 | ) 11 | from detectron2.evaluation import COCOEvaluator 12 | 13 | dataloader = OmegaConf.create() 14 | 15 | dataloader.train = L(build_detection_train_loader)( 16 | dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"), 17 | mapper=L(DatasetMapper)( 18 | is_train=True, 19 | augmentations=[ 20 | L(T.ResizeShortestEdge)( 21 | short_edge_length=(640, 672, 704, 736, 768, 800), 22 | sample_style="choice", 23 | max_size=1333, 24 | ), 25 | L(T.RandomFlip)(horizontal=True), 26 | ], 27 | image_format="BGR", 28 | use_instance_mask=True, 29 | ), 30 | total_batch_size=16, 31 | num_workers=4, 32 | ) 33 | 34 | dataloader.test = L(build_detection_test_loader)( 35 | dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False), 36 | mapper=L(DatasetMapper)( 37 | is_train=False, 38 | augmentations=[ 39 | L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333), 40 | ], 41 | image_format="${...train.mapper.image_format}", 42 | ), 43 | num_workers=4, 44 | ) 45 | 46 | dataloader.evaluator = L(COCOEvaluator)( 47 | dataset_name="${..test.dataset.names}", 48 | ) 49 | -------------------------------------------------------------------------------- /configs/common/data/coco_keypoint.py: -------------------------------------------------------------------------------- 1 | from detectron2.data.detection_utils import create_keypoint_hflip_indices 2 | 3 | from .coco import dataloader 4 | 5 | dataloader.train.dataset.min_keypoints = 1 6 | dataloader.train.dataset.names = "keypoints_coco_2017_train" 7 | dataloader.test.dataset.names = "keypoints_coco_2017_val" 8 | 9 | dataloader.train.mapper.update( 10 | use_instance_mask=False, 11 | use_keypoint=True, 12 | keypoint_hflip_indices=create_keypoint_hflip_indices(dataloader.train.dataset.names), 13 | ) 14 | -------------------------------------------------------------------------------- /configs/common/data/coco_panoptic_separated.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.evaluation import ( 3 | COCOEvaluator, 4 | COCOPanopticEvaluator, 5 | DatasetEvaluators, 6 | SemSegEvaluator, 7 | ) 8 | 9 | from .coco import dataloader 10 | 11 | dataloader.train.dataset.names = "coco_2017_train_panoptic_separated" 12 | dataloader.train.dataset.filter_empty = False 13 | dataloader.test.dataset.names = "coco_2017_val_panoptic_separated" 14 | 15 | 16 | dataloader.evaluator = [ 17 | L(COCOEvaluator)( 18 | dataset_name="${...test.dataset.names}", 19 | ), 20 | L(SemSegEvaluator)( 21 | dataset_name="${...test.dataset.names}", 22 | ), 23 | L(COCOPanopticEvaluator)( 24 | dataset_name="${...test.dataset.names}", 25 | ), 26 | ] 27 | -------------------------------------------------------------------------------- /configs/common/models/panoptic_fpn.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import LazyCall as L 2 | from detectron2.layers import ShapeSpec 3 | from detectron2.modeling import PanopticFPN 4 | # from detectron2.modeling.meta_arch.semantic_seg import SemSegFPNHead 5 | from yolov7.modeling.head.sem_seg_head import SemSegFPNHead 6 | 7 | from .mask_rcnn_fpn import model 8 | 9 | 10 | # model.roi_heads.num_classes = 45 11 | model._target_ = PanopticFPN 12 | model.sem_seg_head = L(SemSegFPNHead)( 13 | input_shape={ 14 | f: L(ShapeSpec)(stride=s, channels="${....backbone.out_channels}") 15 | for f, s in zip(["p2", "p3", "p4", "p5"], [4, 8, 16, 32]) 16 | }, 17 | ignore_value=255, 18 | num_classes=54, # COCO stuff + 1 19 | conv_dims=128, 20 | common_stride=4, 21 | loss_weight=0.5, 22 | norm="GN", 23 | ) 24 | -------------------------------------------------------------------------------- /configs/common/optim.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from detectron2.config import LazyCall as L 4 | from detectron2.solver.build import get_default_optimizer_params 5 | 6 | SGD = L(torch.optim.SGD)( 7 | params=L(get_default_optimizer_params)( 8 | # params.model is meant to be set to the model object, before instantiating 9 | # the optimizer. 10 | weight_decay_norm=0.0 11 | ), 12 | lr=0.02, 13 | momentum=0.9, 14 | weight_decay=1e-4, 15 | ) 16 | -------------------------------------------------------------------------------- /configs/common/readme.md: -------------------------------------------------------------------------------- 1 | this folder contains new model design way in d2 2 | 3 | -------------------------------------------------------------------------------- /configs/common/train.py: -------------------------------------------------------------------------------- 1 | # Common training-related configs that are designed for "tools/lazyconfig_train_net.py" 2 | # You can use your own instead, together with your own train_net.py 3 | train = dict( 4 | output_dir="./output", 5 | init_checkpoint="detectron2://ImageNetPretrained/MSRA/R-50.pkl", 6 | max_iter=90000, 7 | amp=dict(enabled=False), # options for Automatic Mixed Precision 8 | ddp=dict( # options for DistributedDataParallel 9 | broadcast_buffers=False, 10 | find_unused_parameters=False, 11 | fp16_compression=False, 12 | ), 13 | checkpointer=dict(period=5000, max_to_keep=100), # options for PeriodicCheckpointer 14 | eval_period=5000, 15 | log_period=20, 16 | device="cuda" 17 | # ... 18 | ) 19 | -------------------------------------------------------------------------------- /configs/facemask/cspdarknet53_1gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_cspdarknet_backbone" 8 | DARKNET: 9 | WEIGHTS: "weights/yolov3.pt" # using pytorch weights from ultralytics's yolov3 10 | DEPTH: 53 11 | WITH_CSP: True 12 | STEM_OUT_CHANNELS: 32 13 | WEIGHTS: "" 14 | OUT_FEATURES: ["layer3", "layer4", "layer5"] 15 | YOLO: 16 | ANCHORS: 17 | [ 18 | [[322, 428], [194, 260], [123, 168]], 19 | [[84, 115], [60, 79], [41, 53]], 20 | [[27, 37], [18, 23], [10, 13]], 21 | ] 22 | CLASSES: 2 23 | IN_FEATURES: ["layer3", "layer4", "layer5"] # p3, p4, p5 from FPN 24 | CONF_THRESHOLD: 0.01 25 | NMS_THRESHOLD: 0.2 26 | IGNORE_THRESHOLD: 0.5 27 | VARIANT: "yolov7" 28 | LOSS_TYPE: "v7" 29 | DATASETS: 30 | TRAIN: ("facemask_train",) 31 | TEST: ("facemask_val",) 32 | 33 | INPUT: 34 | # open all augmentations 35 | JITTER_CROP: 36 | ENABLED: True 37 | RESIZE: 38 | ENABLED: True 39 | # SHAPE: (540, 960) 40 | DISTORTION: 41 | ENABLED: True 42 | MOSAIC: 43 | ENABLED: True 44 | NUM_IMAGES: 4 45 | # MOSAIC_WIDTH: 960 46 | # MOSAIC_HEIGHT: 540 47 | 48 | SOLVER: 49 | IMS_PER_BATCH: 2 50 | BASE_LR: 0.0001 51 | STEPS: (60000, 80000) 52 | MAX_ITER: 120000 53 | 54 | TEST: 55 | EVAL_PERIOD: 10000 56 | # EVAL_PERIOD: 0 57 | OUTPUT_DIR: "output/facemask_cspdarknet" 58 | 59 | DATALOADER: 60 | # proposals are part of the dataset_dicts, and take a lot of RAM 61 | NUM_WORKERS: 2 -------------------------------------------------------------------------------- /configs/facemask/r2_50_1gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_res2net_backbone" 8 | RESNETS: 9 | R2TYPE: "res2net50_v1b" 10 | DEPTH: 50 11 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 12 | YOLO: 13 | ANCHORS: 14 | [ 15 | [[322, 428], [194, 260], [123, 168]], 16 | [[84, 115], [60, 79], [41, 53]], 17 | [[27, 37], [18, 23], [10, 13]], 18 | ] 19 | CLASSES: 2 20 | IN_FEATURES: ["res3", "res4", "res5"] # p3, p4, p5 from FPN 21 | CONF_THRESHOLD: 0.001 22 | NMS_THRESHOLD: 0.65 23 | IGNORE_THRESHOLD: 0.7 24 | VARIANT: "yolov7" 25 | LOSS_TYPE: "v7" 26 | WIDTH_MUL: 1.0 27 | # WIDTH_MUL: 1. 28 | LOSS: 29 | LAMBDA_IOU: 1.0 30 | NECK: 31 | TYPE: "fpn" 32 | WITH_SPP: true 33 | 34 | DATASETS: 35 | TRAIN: ("facemask_train",) 36 | TEST: ("facemask_val",) 37 | 38 | INPUT: 39 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 40 | # MAX_SIZE_TRAIN: 1056 # force max size train to 800? 41 | MAX_SIZE_TRAIN: 800 # force max size train to 800? 42 | MIN_SIZE_TEST: 640 43 | MAX_SIZE_TEST: 800 44 | # open all augmentations 45 | JITTER_CROP: 46 | ENABLED: False 47 | RESIZE: 48 | ENABLED: False 49 | # SHAPE: (540, 960) 50 | DISTORTION: 51 | ENABLED: False 52 | # MOSAIC: 53 | # ENABLED: True 54 | # NUM_IMAGES: 4 55 | # DEBUG_VIS: True 56 | # # MOSAIC_WIDTH: 960 57 | # # MOSAIC_HEIGHT: 540 58 | MOSAIC_AND_MIXUP: 59 | ENABLED: True 60 | DEBUG_VIS: False 61 | ENABLE_MIXUP: False 62 | 63 | SOLVER: 64 | IMS_PER_BATCH: 3 65 | BASE_LR: 0.00025 66 | STEPS: (60000, 80000) 67 | MAX_ITER: 200000 68 | 69 | 70 | TEST: 71 | EVAL_PERIOD: 10000 72 | # EVAL_PERIOD: 0 73 | OUTPUT_DIR: "output/facemask_r2_50" 74 | 75 | DATALOADER: 76 | # proposals are part of the dataset_dicts, and take a lot of RAM 77 | NUM_WORKERS: 2 78 | -------------------------------------------------------------------------------- /configs/facemask/r50_1gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_resnet_backbone" 8 | RESNETS: 9 | DEPTH: 50 10 | RES2_OUT_CHANNELS: 256 # align with darknet output channels 11 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 12 | YOLO: 13 | ANCHORS: 14 | [ 15 | [[322, 428], [194, 260], [123, 168]], 16 | [[84, 115], [60, 79], [41, 53]], 17 | [[27, 37], [18, 23], [10, 13]], 18 | ] 19 | CLASSES: 2 20 | IN_FEATURES: ["res3", "res4", "res5"] # p3, p4, p5 from FPN 21 | CONF_THRESHOLD: 0.01 22 | NMS_THRESHOLD: 0.2 23 | IGNORE_THRESHOLD: 0.5 24 | VARIANT: "yolov7" 25 | LOSS_TYPE: "v7" 26 | DATASETS: 27 | TRAIN: ("facemask_train",) 28 | TEST: ("facemask_val",) 29 | 30 | INPUT: 31 | # open all augmentations 32 | JITTER_CROP: 33 | ENABLED: True 34 | RESIZE: 35 | ENABLED: True 36 | # SHAPE: (540, 960) 37 | DISTORTION: 38 | ENABLED: True 39 | MOSAIC: 40 | ENABLED: True 41 | NUM_IMAGES: 4 42 | # MOSAIC_WIDTH: 960 43 | # MOSAIC_HEIGHT: 540 44 | 45 | 46 | SOLVER: 47 | IMS_PER_BATCH: 4 48 | BASE_LR: 0.00008 49 | STEPS: (60000, 80000) 50 | MAX_ITER: 270000 51 | 52 | 53 | TEST: 54 | EVAL_PERIOD: 10000 55 | # EVAL_PERIOD: 0 56 | 57 | DATALOADER: 58 | # proposals are part of the dataset_dicts, and take a lot of RAM 59 | NUM_WORKERS: 2 60 | -------------------------------------------------------------------------------- /configs/facemask/r50_pan_1gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_resnet_pan_backbone" 8 | RESNETS: 9 | DEPTH: 50 10 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 11 | FPN: 12 | IN_FEATURES: ["res3", "res4", "res5"] 13 | OUT_CHANNELS_LIST: [256, 512, 1024] 14 | YOLO: 15 | ANCHORS: 16 | [ 17 | [[322, 428], [194, 260], [123, 168]], 18 | [[84, 115], [60, 79], [41, 53]], 19 | [[27, 37], [18, 23], [10, 13]], 20 | ] 21 | CLASSES: 2 22 | IN_FEATURES: ["p3", "p4", "p5"] # p3, p4, p5 from FPN 23 | CONF_THRESHOLD: 0.2 24 | NMS_THRESHOLD: 0.1 25 | IGNORE_THRESHOLD: 0.1 26 | VARIANT: "yolov7" 27 | LOSS_TYPE: "v4" 28 | LOSS: 29 | LAMBDA_CLS: 1.1 30 | DATASETS: 31 | TRAIN: ("facemask_train",) 32 | TEST: ("facemask_val",) 33 | 34 | INPUT: 35 | # open all augmentations 36 | JITTER_CROP: 37 | ENABLED: True 38 | RESIZE: 39 | ENABLED: True 40 | SHAPE: (640, 640) 41 | DISTORTION: 42 | ENABLED: True 43 | MOSAIC: 44 | ENABLED: True 45 | NUM_IMAGES: 4 46 | MOSAIC_WIDTH: 640 47 | MOSAIC_HEIGHT: 640 48 | 49 | SOLVER: 50 | IMS_PER_BATCH: 3 51 | BASE_LR: 0.0004 52 | STEPS: (60000, 80000) 53 | MAX_ITER: 90000 54 | 55 | DATALOADER: 56 | # proposals are part of the dataset_dicts, and take a lot of RAM 57 | NUM_WORKERS: 2 58 | -------------------------------------------------------------------------------- /configs/facemask/swin_tiny_1gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | MASK_ON: False 5 | BACKBONE: 6 | NAME: "build_swin_transformer_backbone" 7 | SWIN: 8 | WEIGHTS: "weights/swin_tiny_patch4_window7_224.pth" # must have 9 | TYPE: "tiny" 10 | OUT_FEATURES: [1, 2, 3] 11 | YOLO: 12 | ANCHORS: 13 | [ 14 | [[116, 90], [156, 198], [373, 326]], 15 | [[30, 61], [62, 45], [42, 119]], 16 | [[10, 13], [16, 30], [33, 23]], 17 | ] 18 | CLASSES: 2 19 | IN_FEATURES: [0, 1, 2] # reordered out indices 20 | CONF_THRESHOLD: 0.01 21 | NMS_THRESHOLD: 0.5 22 | IGNORE_THRESHOLD: 0.7 23 | VARIANT: "yolov7" 24 | DATASETS: 25 | TRAIN: ("facemask_train",) 26 | TEST: ("facemask_val",) 27 | 28 | SOLVER: 29 | IMS_PER_BATCH: 1 30 | BASE_LR: 0.0002 31 | STEPS: (60000, 80000) 32 | MAX_ITER: 90000 33 | 34 | DATALOADER: 35 | # proposals are part of the dataset_dicts, and take a lot of RAM 36 | NUM_WORKERS: 2 -------------------------------------------------------------------------------- /configs/facemask/yolov5_s.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | MASK_ON: False 5 | META_ARCHITECTURE: "YOLOV5" 6 | BACKBONE: 7 | NAME: "build_cspdarknetx_backbone" 8 | 9 | DARKNET: 10 | WEIGHTS: "" 11 | DEPTH_WISE: False 12 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 13 | YOLO: 14 | ANCHORS: 15 | [ 16 | [[13, 25], [15, 14], [7, 10]], 17 | [[47, 29], [23, 40], [28, 19]], 18 | [[120, 106], [80, 47], [45, 66]], 19 | ] 20 | CLASSES: 10 21 | IN_FEATURES: ["dark3", "dark4", "dark5"] 22 | CONF_THRESHOLD: 0.001 23 | NMS_THRESHOLD: 0.6 24 | IGNORE_THRESHOLD: 0.6 25 | WIDTH_MUL: 0.50 26 | DEPTH_MUL: 0.33 27 | VARIANT: "yolov7" 28 | LOSS_TYPE: "v7" 29 | LOSS: 30 | LAMBDA_IOU: 0.05 31 | LAMBDA_CONF: 1.0 32 | LAMBDA_CLS: 0.5 33 | ANCHOR_RATIO_THRESH: 4.0 34 | 35 | DATASETS: 36 | TRAIN: ("visdrone_train",) 37 | TEST: ("visdrone_val",) 38 | 39 | INPUT: 40 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 41 | MAX_SIZE_TRAIN: 1056 42 | MIN_SIZE_TEST: 672 43 | MAX_SIZE_TEST: 768 44 | # open all augmentations 45 | JITTER_CROP: 46 | ENABLED: False 47 | RESIZE: 48 | ENABLED: False 49 | # SHAPE: (540, 960) 50 | DISTORTION: 51 | ENABLED: False 52 | # MOSAIC: 53 | # ENABLED: True 54 | # NUM_IMAGES: 4 55 | # DEBUG_VIS: True 56 | # # MOSAIC_WIDTH: 960 57 | # # MOSAIC_HEIGHT: 540 58 | MOSAIC_AND_MIXUP: 59 | ENABLED: True 60 | DEBUG_VIS: False 61 | ENABLE_MIXUP: False 62 | 63 | SOLVER: 64 | IMS_PER_BATCH: 5 65 | BASE_LR: 0.04 66 | STEPS: (60000, 80000) 67 | WARMUP_FACTOR: 0.00033333 68 | WARMUP_ITERS: 1500 69 | MAX_ITER: 130000 70 | LR_SCHEDULER_NAME: "WarmupCosineLR" 71 | 72 | TEST: 73 | EVAL_PERIOD: 5000 74 | # EVAL_PERIOD: 0 75 | OUTPUT_DIR: "output/visdrone_yolov5_s" 76 | VIS_PERIOD: 5000 77 | 78 | DATALOADER: 79 | # proposals are part of the dataset_dicts, and take a lot of RAM 80 | NUM_WORKERS: 1 81 | -------------------------------------------------------------------------------- /configs/new_baselines/mask_rcnn_R_50_FPN_100ep_LSJ.py: -------------------------------------------------------------------------------- 1 | import detectron2.data.transforms as T 2 | from detectron2.config.lazy import LazyCall as L 3 | from detectron2.layers.batch_norm import NaiveSyncBatchNorm 4 | from detectron2.solver import WarmupParamScheduler 5 | from fvcore.common.param_scheduler import MultiStepParamScheduler 6 | 7 | from ..common.data.coco import dataloader 8 | from ..common.models.mask_rcnn_fpn import model 9 | from ..common.optim import SGD as optimizer 10 | from ..common.train import train 11 | 12 | # train from scratch 13 | train.init_checkpoint = "" 14 | train.amp.enabled = True 15 | train.ddp.fp16_compression = True 16 | model.backbone.bottom_up.freeze_at = 0 17 | 18 | # SyncBN 19 | # fmt: off 20 | model.backbone.bottom_up.stem.norm = \ 21 | model.backbone.bottom_up.stages.norm = \ 22 | model.backbone.norm = "SyncBN" 23 | 24 | # Using NaiveSyncBatchNorm becase heads may have empty input. That is not supported by 25 | # torch.nn.SyncBatchNorm. We can remove this after 26 | # https://github.com/pytorch/pytorch/issues/36530 is fixed. 27 | model.roi_heads.box_head.conv_norm = \ 28 | model.roi_heads.mask_head.conv_norm = lambda c: NaiveSyncBatchNorm(c, 29 | stats_mode="N") 30 | # fmt: on 31 | 32 | # 2conv in RPN: 33 | # https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/modeling/architecture/heads.py#L95-L97 # noqa: E501, B950 34 | model.proposal_generator.head.conv_dims = [-1, -1] 35 | 36 | # 4conv1fc box head 37 | model.roi_heads.box_head.conv_dims = [256, 256, 256, 256] 38 | model.roi_heads.box_head.fc_dims = [1024] 39 | 40 | # resize_and_crop_image in: 41 | # https://github.com/tensorflow/tpu/blob/b24729de804fdb751b06467d3dce0637fa652060/models/official/detection/utils/input_utils.py#L127 # noqa: E501, B950 42 | image_size = 1024 43 | dataloader.train.mapper.augmentations = [ 44 | L(T.ResizeScale)( 45 | min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size 46 | ), 47 | L(T.FixedSizeCrop)(crop_size=(image_size, image_size)), 48 | L(T.RandomFlip)(horizontal=True), 49 | ] 50 | 51 | # recompute boxes due to cropping 52 | dataloader.train.mapper.recompute_boxes = True 53 | 54 | # larger batch-size. 55 | dataloader.train.total_batch_size = 64 56 | 57 | # Equivalent to 100 epochs. 58 | # 100 ep = 184375 iters * 64 images/iter / 118000 images/ep 59 | train.max_iter = 184375 60 | 61 | lr_multiplier = L(WarmupParamScheduler)( 62 | scheduler=L(MultiStepParamScheduler)( 63 | values=[1.0, 0.1, 0.01], 64 | milestones=[163889, 177546], 65 | num_updates=train.max_iter, 66 | ), 67 | warmup_length=500 / train.max_iter, 68 | warmup_factor=0.067, 69 | ) 70 | 71 | optimizer.lr = 0.1 72 | optimizer.weight_decay = 4e-5 73 | -------------------------------------------------------------------------------- /configs/new_baselines/mask_rcnn_regnetx_4gf_dds_FPN_200ep_LSJ.py: -------------------------------------------------------------------------------- 1 | from .mask_rcnn_R_50_FPN_100ep_LSJ import ( 2 | dataloader, 3 | lr_multiplier, 4 | model, 5 | optimizer, 6 | train, 7 | ) 8 | from detectron2.config import LazyCall as L 9 | from detectron2.modeling.backbone import RegNet 10 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock 11 | 12 | # Config source: 13 | # https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_regnetx_4gf_dds_fpn_1x.py # noqa 14 | model.backbone.bottom_up = L(RegNet)( 15 | stem_class=SimpleStem, 16 | stem_width=32, 17 | block_class=ResBottleneckBlock, 18 | depth=23, 19 | w_a=38.65, 20 | w_0=96, 21 | w_m=2.43, 22 | group_width=40, 23 | norm="SyncBN", 24 | out_features=["s1", "s2", "s3", "s4"], 25 | ) 26 | model.pixel_std = [57.375, 57.120, 58.395] 27 | 28 | # RegNets benefit from enabling cudnn benchmark mode 29 | train.cudnn_benchmark = True 30 | 31 | 32 | train.max_iter *= 2 # 100ep -> 200ep 33 | 34 | lr_multiplier.scheduler.milestones = [ 35 | milestone * 2 for milestone in lr_multiplier.scheduler.milestones 36 | ] 37 | lr_multiplier.scheduler.num_updates = train.max_iter 38 | -------------------------------------------------------------------------------- /configs/new_baselines/maskrcnn_kps_regnetx_0.4g.py: -------------------------------------------------------------------------------- 1 | from .mask_rcnn_R_50_FPN_100ep_LSJ import ( 2 | dataloader, 3 | lr_multiplier, 4 | optimizer, 5 | train, 6 | ) 7 | from detectron2.config import LazyCall as L 8 | from detectron2.modeling.backbone import RegNet 9 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock 10 | 11 | from ..common.models.mask_rcnn_fpn_kps import model 12 | 13 | 14 | # train maskrcnn and keypoints at the same time. 15 | 16 | # train from scratch 17 | train.init_checkpoint = "" 18 | train.amp.enabled = True 19 | train.ddp.fp16_compression = True 20 | # RegNets benefit from enabling cudnn benchmark mode 21 | train.cudnn_benchmark = True 22 | 23 | # train.output_dir = 'output/panoptic_regnetx_0.4g' 24 | 25 | model.backbone.bottom_up.freeze_at = 0 26 | # model.backbone.bottom_up.freeze_at = 2 27 | model.backbone.bottom_up = L(RegNet)( 28 | stem_class=SimpleStem, 29 | stem_width=32, 30 | block_class=ResBottleneckBlock, 31 | depth=22, 32 | w_a=24.48, 33 | w_0=24, 34 | w_m=2.54, 35 | group_width=16, 36 | norm="SyncBN", 37 | out_features=["s1", "s2", "s3", "s4"], 38 | ) 39 | model.pixel_std = [57.375, 57.120, 58.395] 40 | 41 | 42 | 43 | train.max_iter *= 2 # 100ep -> 200ep 44 | 45 | lr_multiplier.scheduler.milestones = [ 46 | milestone * 2 for milestone in lr_multiplier.scheduler.milestones 47 | ] 48 | lr_multiplier.scheduler.num_updates = train.max_iter 49 | -------------------------------------------------------------------------------- /configs/new_baselines/maskrcnn_regnetx_0.4g.py: -------------------------------------------------------------------------------- 1 | from .mask_rcnn_R_50_FPN_100ep_LSJ import ( 2 | dataloader, 3 | lr_multiplier, 4 | optimizer, 5 | train, 6 | ) 7 | from detectron2.config import LazyCall as L 8 | from detectron2.modeling.backbone import RegNet 9 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock 10 | 11 | from ..common.models.mask_rcnn_fpn import model 12 | 13 | # train from scratch 14 | train.init_checkpoint = "" 15 | train.amp.enabled = True 16 | train.ddp.fp16_compression = True 17 | # RegNets benefit from enabling cudnn benchmark mode 18 | train.cudnn_benchmark = True 19 | 20 | # train.output_dir = 'output/panoptic_regnetx_0.4g' 21 | 22 | model.backbone.bottom_up.freeze_at = 0 23 | # model.backbone.bottom_up.freeze_at = 2 24 | model.backbone.bottom_up = L(RegNet)( 25 | stem_class=SimpleStem, 26 | stem_width=32, 27 | block_class=ResBottleneckBlock, 28 | depth=22, 29 | w_a=24.48, 30 | w_0=24, 31 | w_m=2.54, 32 | group_width=16, 33 | norm="SyncBN", 34 | out_features=["s1", "s2", "s3", "s4"], 35 | ) 36 | model.pixel_std = [57.375, 57.120, 58.395] 37 | 38 | 39 | 40 | train.max_iter *= 2 # 100ep -> 200ep 41 | 42 | lr_multiplier.scheduler.milestones = [ 43 | milestone * 2 for milestone in lr_multiplier.scheduler.milestones 44 | ] 45 | lr_multiplier.scheduler.num_updates = train.max_iter 46 | -------------------------------------------------------------------------------- /configs/new_baselines/panoptic_fpn_regnetx_0.4g.py: -------------------------------------------------------------------------------- 1 | from ..common.optim import SGD as optimizer 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier 3 | from ..common.data.coco_panoptic_separated import dataloader 4 | from ..common.models.panoptic_fpn import model 5 | from ..common.train import train 6 | 7 | import detectron2.data.transforms as T 8 | from detectron2.config import LazyCall as L 9 | from detectron2.modeling.backbone import RegNet 10 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock 11 | from detectron2.solver import WarmupParamScheduler 12 | from fvcore.common.param_scheduler import MultiStepParamScheduler 13 | 14 | # train from scratch 15 | train.init_checkpoint = "" 16 | train.amp.enabled = True 17 | train.ddp.fp16_compression = True 18 | # RegNets benefit from enabling cudnn benchmark mode 19 | train.cudnn_benchmark = True 20 | 21 | # train.output_dir = 'output/panoptic_regnetx_0.4g' 22 | 23 | model.backbone.bottom_up.freeze_at = 0 24 | # model.backbone.bottom_up.freeze_at = 2 25 | model.backbone.bottom_up = L(RegNet)( 26 | stem_class=SimpleStem, 27 | stem_width=32, 28 | block_class=ResBottleneckBlock, 29 | depth=22, 30 | w_a=24.48, 31 | w_0=24, 32 | w_m=2.54, 33 | group_width=16, 34 | norm="SyncBN", 35 | out_features=["s1", "s2", "s3", "s4"], 36 | ) 37 | model.pixel_std = [57.375, 57.120, 58.395] 38 | # model.roi_heads.box_predictor.test_score_thresh = 0.4 39 | 40 | 41 | # image_size = 1024 42 | # dataloader.train.mapper.augmentations = [ 43 | # L(T.ResizeScale)( 44 | # min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size 45 | # ), 46 | # L(T.FixedSizeCrop)(crop_size=(image_size, image_size)), 47 | # L(T.RandomFlip)(horizontal=True), 48 | # ] 49 | # # recompute boxes due to cropping 50 | # dataloader.train.mapper.recompute_boxes = True 51 | # larger batch-size. 52 | dataloader.train.total_batch_size = 40 53 | dataloader.test.num_workers = 1 54 | 55 | # Equivalent to 100 epochs. 56 | # 100 ep = 184375 iters * 64 images/iter / 118000 images/ep 57 | train.max_iter = 184375 58 | 59 | lr_multiplier = L(WarmupParamScheduler)( 60 | scheduler=L(MultiStepParamScheduler)( 61 | values=[0.5, 0.1, 0.01], 62 | milestones=[163889, 177546], 63 | num_updates=train.max_iter, 64 | ), 65 | warmup_length=500 / train.max_iter, 66 | warmup_factor=0.067, 67 | ) 68 | 69 | optimizer.lr = 0.1 70 | optimizer.weight_decay = 4e-5 -------------------------------------------------------------------------------- /configs/new_baselines/panoptic_fpn_regnetx_0.4g_s.py: -------------------------------------------------------------------------------- 1 | from ..common.optim import SGD as optimizer 2 | from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier 3 | from ..common.data.coco_panoptic_separated import dataloader 4 | from ..common.models.panoptic_fpn import model 5 | from ..common.train import train 6 | from yolov7.modeling.head.sem_seg_head import SemSegFPNHead 7 | from detectron2.layers import ShapeSpec 8 | 9 | import detectron2.data.transforms as T 10 | from detectron2.config import LazyCall as L 11 | from detectron2.modeling.backbone import RegNet 12 | from detectron2.modeling.backbone.regnet import SimpleStem, ResBottleneckBlock 13 | from detectron2.solver import WarmupParamScheduler 14 | from fvcore.common.param_scheduler import MultiStepParamScheduler 15 | 16 | # train from scratch 17 | train.init_checkpoint = "" 18 | train.amp.enabled = True 19 | train.ddp.fp16_compression = True 20 | # RegNets benefit from enabling cudnn benchmark mode 21 | train.cudnn_benchmark = True 22 | 23 | # train.output_dir = 'output/panoptic_regnetx_0.4g' 24 | model.sem_seg_head = L(SemSegFPNHead)( 25 | input_shape={ 26 | f: L(ShapeSpec)(stride=s, channels="${....backbone.out_channels}") 27 | for f, s in zip(["p3", "p4", "p5"], [8, 16, 32]) 28 | }, 29 | ignore_value=255, 30 | num_classes=54, # COCO stuff + 1 31 | conv_dims=128, 32 | common_stride=8, 33 | loss_weight=0.5, 34 | norm="GN", 35 | ) 36 | 37 | model.backbone.bottom_up.freeze_at = 0 38 | # model.backbone.bottom_up.freeze_at = 2 39 | model.backbone.bottom_up = L(RegNet)( 40 | stem_class=SimpleStem, 41 | stem_width=32, 42 | block_class=ResBottleneckBlock, 43 | depth=22, 44 | w_a=24.48, 45 | w_0=24, 46 | w_m=2.54, 47 | group_width=16, 48 | norm="SyncBN", 49 | out_features=["s1", "s2", "s3", "s4"], 50 | ) 51 | model.pixel_std = [57.375, 57.120, 58.395] 52 | # model.roi_heads.box_predictor.test_score_thresh = 0.4 53 | 54 | 55 | # image_size = 1024 56 | # dataloader.train.mapper.augmentations = [ 57 | # L(T.ResizeScale)( 58 | # min_scale=0.1, max_scale=2.0, target_height=image_size, target_width=image_size 59 | # ), 60 | # L(T.FixedSizeCrop)(crop_size=(image_size, image_size)), 61 | # L(T.RandomFlip)(horizontal=True), 62 | # ] 63 | # # recompute boxes due to cropping 64 | # dataloader.train.mapper.recompute_boxes = True 65 | # larger batch-size. 66 | dataloader.train.total_batch_size = 32 67 | dataloader.test.num_workers = 1 68 | 69 | # Equivalent to 100 epochs. 70 | # 100 ep = 184375 iters * 64 images/iter / 118000 images/ep 71 | train.max_iter = 184375 72 | 73 | lr_multiplier = L(WarmupParamScheduler)( 74 | scheduler=L(MultiStepParamScheduler)( 75 | values=[0.5, 0.1, 0.01], 76 | milestones=[163889, 177546], 77 | num_updates=train.max_iter, 78 | ), 79 | warmup_length=500 / train.max_iter, 80 | warmup_factor=0.067, 81 | ) 82 | 83 | optimizer.lr = 0.1 84 | optimizer.weight_decay = 4e-5 -------------------------------------------------------------------------------- /configs/taco/darknet53.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YoloV7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | MASK_ON: False 5 | BACKBONE: 6 | NAME: "build_darknet_backbone" 7 | 8 | DARKNET: 9 | WEIGHTS: "weights/yolov3.pt" # using pytorch weights from ultralytics's yolov3 10 | DEPTH: 53 11 | STEM_OUT_CHANNELS: 32 12 | WEIGHTS: "" 13 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 14 | YOLO: 15 | ANCHORS: 16 | [ 17 | [[116, 90], [156, 198], [373, 326]], 18 | [[30, 61], [62, 45], [42, 119]], 19 | [[10, 13], [16, 30], [33, 23]], 20 | ] 21 | CLASSES: 60 22 | IN_FEATURES: ["dark3", "dark4", "dark5"] 23 | CONF_THRESHOLD: 0.01 24 | NMS_THRESHOLD: 0.5 25 | IGNORE_THRESHOLD: 0.1 26 | DATASETS: 27 | TRAIN: ("taco_train",) 28 | TEST: ("taco_val",) 29 | 30 | SOLVER: 31 | IMS_PER_BATCH: 16 32 | BASE_LR: 0.005 33 | STEPS: (60000, 80000) 34 | MAX_ITER: 90000 35 | 36 | DATALOADER: 37 | # proposals are part of the dataset_dicts, and take a lot of RAM 38 | NUM_WORKERS: 2 39 | -------------------------------------------------------------------------------- /configs/taco/r50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YoloV7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_resnet_fpn_backbone" 8 | RESNETS: 9 | DEPTH: 50 10 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 11 | FPN: 12 | IN_FEATURES: ["res3", "res4", "res5"] 13 | YOLO: 14 | ANCHORS: 15 | [ 16 | [[116, 90], [156, 198], [373, 326]], 17 | [[30, 61], [62, 45], [42, 119]], 18 | [[10, 13], [16, 30], [33, 23]], 19 | ] 20 | CLASSES: 60 21 | IN_FEATURES: ["p3", "p4", "p5"] # p3, p4, p5 from FPN 22 | CONF_THRESHOLD: 0.01 23 | NMS_THRESHOLD: 0.5 24 | IGNORE_THRESHOLD: 0.1 25 | VARIANT: "yolov7" 26 | LOSS_TYPE: "v4" 27 | DATASETS: 28 | TRAIN: ("taco_train",) 29 | TEST: ("taco_val",) 30 | 31 | SOLVER: 32 | IMS_PER_BATCH: 48 33 | BASE_LR: 0.0005 34 | STEPS: (70000, 90000) 35 | MAX_ITER: 100000 36 | 37 | DATALOADER: 38 | # proposals are part of the dataset_dicts, and take a lot of RAM 39 | NUM_WORKERS: 2 40 | -------------------------------------------------------------------------------- /configs/tidal_plate/yolox_s.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [0.485, 0.456, 0.406] # same value as PP-YOLOv2, RGB order 4 | PIXEL_STD: [0.229, 0.224, 0.225] 5 | 6 | WEIGHTS: "" 7 | MASK_ON: False 8 | META_ARCHITECTURE: "YOLOX" 9 | BACKBONE: 10 | NAME: "build_cspdarknetx_backbone" 11 | 12 | DARKNET: 13 | WEIGHTS: "" 14 | DEPTH_WISE: False 15 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 16 | 17 | YOLO: 18 | CLASSES: 13 19 | IN_FEATURES: ["dark3", "dark4", "dark5"] 20 | # CONF_THRESHOLD: 0.01 21 | CONF_THRESHOLD: 0.01 22 | NMS_THRESHOLD: 0.65 23 | IGNORE_THRESHOLD: 0.7 24 | WIDTH_MUL: 0.50 25 | DEPTH_MUL: 0.33 26 | LOSS_TYPE: "v7" 27 | LOSS: 28 | LAMBDA_IOU: 1.5 29 | 30 | DATASETS: 31 | TRAIN: ("tl_train",) 32 | TEST: ("tl_val",) 33 | CLASS_NAMES: 34 | [ 35 | "tidal_plate.forward", 36 | "tidal_plate.left", 37 | "tidal_plate.right", 38 | "tidal_plate.left_forward", 39 | "tidal_plate.right_forward", 40 | "tidal_plate.u_turn", 41 | "tidal_plate.u_turn_left", 42 | "tidal_plate.u_turn_forward", 43 | "tidal_plate.u_turn_left_forward", 44 | "tidal_plate.left_forward_right", 45 | "tidal_plate.unknown", 46 | "tidal_plate.cross_forbidden", 47 | "tidal_plate.black_screen", 48 | ] 49 | 50 | INPUT: 51 | # FORMAT: "RGB" # using BGR default 52 | MIN_SIZE_TRAIN: (416, 512, 608, 768, 800) 53 | MAX_SIZE_TRAIN: 1920 # force max size train to 800? 54 | MIN_SIZE_TEST: 800 55 | MAX_SIZE_TEST: 1920 56 | # open all augmentations 57 | JITTER_CROP: 58 | ENABLED: False 59 | RESIZE: 60 | ENABLED: False 61 | # SHAPE: (540, 960) 62 | DISTORTION: 63 | ENABLED: True 64 | # MOSAIC: 65 | # ENABLED: True 66 | # NUM_IMAGES: 4 67 | # DEBUG_VIS: True 68 | # # MOSAIC_WIDTH: 960 69 | # # MOSAIC_HEIGHT: 540 70 | MOSAIC_AND_MIXUP: 71 | ENABLED: True 72 | DEBUG_VIS: False 73 | ENABLE_MIXUP: True 74 | DISABLE_AT_ITER: 120000 75 | 76 | SOLVER: 77 | # enable fp16 training 78 | AMP: 79 | ENABLED: true 80 | IMS_PER_BATCH: 112 81 | # IMS_PER_BATCH: 12 82 | BASE_LR: 0.025 83 | STEPS: (60000, 80000) 84 | WARMUP_FACTOR: 0.00033333 85 | WARMUP_ITERS: 1500 86 | MAX_ITER: 150000 87 | LR_SCHEDULER_NAME: "WarmupCosineLR" 88 | CHECKPOINT_PERIOD: 5000 89 | 90 | TEST: 91 | EVAL_PERIOD: 5000 92 | # EVAL_PERIOD: 0 93 | OUTPUT_DIR: "output/tidal_plate_yolox_s" 94 | VIS_PERIOD: 5000 95 | 96 | DATALOADER: 97 | # proposals are part of the dataset_dicts, and take a lot of RAM 98 | NUM_WORKERS: 3 99 | -------------------------------------------------------------------------------- /configs/tl/cspdarknet.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YoloV7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | MASK_ON: False 5 | META_ARCHITECTURE: "YOLOV7" 6 | BACKBONE: 7 | NAME: "build_cspdarknet_backbone" 8 | 9 | DARKNET: 10 | WEIGHTS: "weights/cspdarknet53_ra_256-d05c7c21.pth" # manually down from: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/cspdarknet53_ra_256-d05c7c21.pth 11 | DEPTH: 53 12 | WITH_CSP: True 13 | STEM_OUT_CHANNELS: 32 14 | WEIGHTS: "" 15 | OUT_FEATURES: ["layer3", "layer4", "layer5"] 16 | YOLO: 17 | ANCHORS: 18 | [ 19 | [[48, 113], [85, 39], [30, 73]], 20 | [[20, 48], [40, 19], [14, 34]], 21 | [[10, 24], [21, 9], [6, 15]], 22 | ] 23 | CLASSES: 5 24 | IN_FEATURES: ["layer3", "layer4", "layer5"] 25 | CONF_THRESHOLD: 0.6 26 | NMS_THRESHOLD: 0.1 27 | IGNORE_THRESHOLD: 0.5 28 | LOSS_TYPE: "v7" 29 | 30 | DATASETS: 31 | TRAIN: ("tl_train",) 32 | TEST: ("tl_val",) 33 | 34 | 35 | INPUT: 36 | # open all augmentations 37 | JITTER_CROP: 38 | ENABLED: True 39 | RESIZE: 40 | ENABLED: True 41 | SHAPE: (540, 960) 42 | DISTORTION: 43 | ENABLED: True 44 | MOSAIC: 45 | ENABLED: True 46 | NUM_IMAGES: 4 47 | MOSAIC_WIDTH: 960 48 | MOSAIC_HEIGHT: 540 49 | 50 | SOLVER: 51 | IMS_PER_BATCH: 32 52 | BASE_LR: 0.0008 53 | WARMUP_ITERS: 1000 54 | STEPS: (60000, 80000) 55 | MAX_ITER: 120000 56 | CLIP_GRADIENTS: 57 | ENABLED: True 58 | CHECKPOINT_PERIOD: 1000 59 | 60 | TEST: 61 | # EVAL_PERIOD: 5000 62 | EVAL_PERIOD: 0 63 | OUTPUT_DIR: "output/tl_cspdarknet53" 64 | VIS_PERIOD: 5000 65 | 66 | DATALOADER: 67 | # proposals are part of the dataset_dicts, and take a lot of RAM 68 | NUM_WORKERS: 2 69 | -------------------------------------------------------------------------------- /configs/tl/darknet53.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YoloV7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | MASK_ON: False 5 | BACKBONE: 6 | NAME: "build_darknet_backbone" 7 | 8 | DARKNET: 9 | WEIGHTS: "weights/yolov3.pt" # using pytorch weights from ultralytics's yolov3 10 | DEPTH: 53 11 | STEM_OUT_CHANNELS: 32 12 | WEIGHTS: "" 13 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 14 | YOLO: 15 | ANCHORS: 16 | [ 17 | [[48, 113], [85, 39], [30, 73]], 18 | [[20, 48], [40, 19], [14, 34]], 19 | [[10, 24], [21, 9], [6, 15]], 20 | ] 21 | CLASSES: 5 22 | IN_FEATURES: ["dark3", "dark4", "dark5"] 23 | CONF_THRESHOLD: 0.6 24 | NMS_THRESHOLD: 0.1 25 | IGNORE_THRESHOLD: 0.1 26 | DATASETS: 27 | TRAIN: ("tl_train",) 28 | TEST: ("tl_val",) 29 | 30 | SOLVER: 31 | IMS_PER_BATCH: 32 32 | BASE_LR: 0.03 33 | WARMUP_ITERS: 2000 34 | STEPS: (60000, 80000) 35 | MAX_ITER: 90000 36 | CLIP_GRADIENTS: 37 | ENABLED: True 38 | CHECKPOINT_PERIOD: 1000 39 | 40 | TEST: 41 | # EVAL_PERIOD: 5000 42 | EVAL_PERIOD: 0 43 | OUTPUT_DIR: "output/tl_darknet53" 44 | VIS_PERIOD: 5000 45 | 46 | DATALOADER: 47 | # proposals are part of the dataset_dicts, and take a lot of RAM 48 | NUM_WORKERS: 2 49 | -------------------------------------------------------------------------------- /configs/tl/detr/detrt_256_6_6_regnetx_0.4g.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "Detr" 3 | WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" 4 | PIXEL_MEAN: [123.675, 116.280, 103.530] 5 | PIXEL_STD: [58.395, 57.120, 57.375] 6 | MASK_ON: False 7 | 8 | BACKBONE: 9 | NAME: "build_regnet_backbone" 10 | REGNETS: 11 | TYPE: "RegNetX_400MF" 12 | OUT_FEATURES: ["s2", "s3", "s4"] # fpn produce 4 levels, only using 3 for now 13 | # RESNETS: 14 | # DEPTH: 50 15 | # STRIDE_IN_1X1: False 16 | # OUT_FEATURES: ["res2", "res3", "res4", "res5"] 17 | DETR: 18 | GIOU_WEIGHT: 2.0 19 | L1_WEIGHT: 5.0 20 | NUM_OBJECT_QUERIES: 100 21 | ENC_LAYERS: 6 22 | DEC_LAYERS: 6 23 | HIDDEN_DIM: 256 24 | CLASSES: 5 25 | 26 | DATASETS: 27 | TRAIN: ("tl_train",) 28 | TEST: ("tl_val",) 29 | 30 | SOLVER: 31 | IMS_PER_BATCH: 56 32 | BASE_LR: 0.001 33 | STEPS: (369600,) 34 | MAX_ITER: 554400 35 | WARMUP_FACTOR: 1.0 36 | WARMUP_ITERS: 10 37 | WEIGHT_DECAY: 0.0001 38 | OPTIMIZER: "ADAMW" 39 | BACKBONE_MULTIPLIER: 0.1 40 | CLIP_GRADIENTS: 41 | ENABLED: True 42 | CLIP_TYPE: "norm" 43 | CLIP_VALUE: 0.01 44 | NORM_TYPE: 2.0 45 | INPUT: 46 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 47 | CROP: 48 | ENABLED: True 49 | TYPE: "absolute_range" 50 | SIZE: (384, 600) 51 | FORMAT: "RGB" 52 | TEST: 53 | EVAL_PERIOD: 4000 54 | DATALOADER: 55 | FILTER_EMPTY_ANNOTATIONS: False 56 | NUM_WORKERS: 2 57 | VERSION: 2 58 | OUTPUT_DIR: "output/coco_detr_regx" -------------------------------------------------------------------------------- /configs/tl/r2_50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_res2net_backbone" 8 | RESNETS: 9 | R2TYPE: "res2net50_v1b" 10 | DEPTH: 50 11 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 12 | YOLO: 13 | ANCHORS: 14 | [ 15 | [[322, 428], [194, 260], [123, 168]], 16 | [[84, 115], [60, 79], [41, 53]], 17 | [[27, 37], [18, 23], [10, 13]], 18 | ] 19 | CLASSES: 5 20 | IN_FEATURES: ["res3", "res4", "res5"] # p3, p4, p5 from FPN 21 | CONF_THRESHOLD: 0.01 22 | NMS_THRESHOLD: 0.2 23 | IGNORE_THRESHOLD: 0.5 24 | VARIANT: "yolov7" 25 | LOSS_TYPE: "v7" 26 | DATASETS: 27 | TRAIN: ("tl_train",) 28 | TEST: ("tl_val",) 29 | 30 | INPUT: 31 | # open all augmentations 32 | JITTER_CROP: 33 | ENABLED: True 34 | RESIZE: 35 | ENABLED: True 36 | SHAPE: (540, 960) 37 | DISTORTION: 38 | ENABLED: True 39 | MOSAIC: 40 | ENABLED: True 41 | NUM_IMAGES: 4 42 | MOSAIC_WIDTH: 960 43 | MOSAIC_HEIGHT: 540 44 | 45 | 46 | SOLVER: 47 | IMS_PER_BATCH: 32 48 | BASE_LR: 0.0008 49 | STEPS: (60000, 80000) 50 | MAX_ITER: 270000 51 | 52 | 53 | TEST: 54 | # EVAL_PERIOD: 10000 55 | EVAL_PERIOD: 0 56 | OUTPUT_DIR: "output/tl_r2_50" 57 | 58 | DATALOADER: 59 | # proposals are part of the dataset_dicts, and take a lot of RAM 60 | NUM_WORKERS: 8 61 | -------------------------------------------------------------------------------- /configs/tl/r50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YoloV7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_resnet_backbone" 8 | RESNETS: 9 | DEPTH: 50 10 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 11 | YOLO: 12 | ANCHORS: 13 | [ 14 | [[48, 113], [85, 39], [30, 73]], 15 | [[20, 48], [40, 19], [14, 34]], 16 | [[10, 24], [21, 9], [6, 15]], 17 | ] 18 | CLASSES: 5 19 | IN_FEATURES: ["res3", "res4", "res5"] # p3, p4, p5 from FPN 20 | CONF_THRESHOLD: 0.1 21 | NMS_THRESHOLD: 0.2 22 | IGNORE_THRESHOLD: 0.5 23 | VARIANT: "yolov7" 24 | LOSS_TYPE: "v7" 25 | 26 | DATASETS: 27 | TRAIN: ("tl_train",) 28 | TEST: ("tl_val",) 29 | 30 | INPUT: 31 | # open all augmentations 32 | JITTER_CROP: 33 | ENABLED: True 34 | RESIZE: 35 | ENABLED: True 36 | SHAPE: (540, 960) 37 | DISTORTION: 38 | ENABLED: True 39 | MOSAIC: 40 | ENABLED: True 41 | NUM_IMAGES: 4 42 | MOSAIC_WIDTH: 960 43 | MOSAIC_HEIGHT: 540 44 | 45 | SOLVER: 46 | IMS_PER_BATCH: 32 47 | BASE_LR: 0.000064 48 | WARMUP_ITERS: 1000 49 | STEPS: (60000, 80000) 50 | MAX_ITER: 120000 51 | CLIP_GRADIENTS: 52 | ENABLED: True 53 | CHECKPOINT_PERIOD: 5000 54 | 55 | 56 | OUTPUT_DIR: "output/tl_r50_fpn" 57 | 58 | DATALOADER: 59 | # proposals are part of the dataset_dicts, and take a lot of RAM 60 | NUM_WORKERS: 2 61 | -------------------------------------------------------------------------------- /configs/tl/res2net_bifpn.yaml: -------------------------------------------------------------------------------- 1 | 2 | _BASE_: "../Base-YoloV7.yaml" 3 | MODEL: 4 | META_ARCHITECTURE: "YOLOV7" 5 | WEIGHTS: "weights/r2_50.pkl" 6 | MASK_ON: False 7 | BACKBONE: 8 | NAME: "build_res2net_bifpn_backbone" 9 | BIFPN: 10 | NUM_BIFPN: 7 11 | OUT_CHANNELS: 288 12 | RESNETS: 13 | DEPTH: 50 14 | WIDTH_PER_GROUP: 26 15 | DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5 16 | DEFORM_MODULATED: True 17 | YOLO: 18 | ANCHORS: 19 | [ 20 | [[48, 113], [85, 39], [30, 73]], 21 | [[20, 48], [40, 19], [14, 34]], 22 | [[10, 24], [21, 9], [6, 15]], 23 | ] 24 | CLASSES: 5 25 | IN_FEATURES: ["p3", "p4", "p5"] # p3, p4, p5 from FPN 26 | CONF_THRESHOLD: 0.2 27 | NMS_THRESHOLD: 0.1 28 | IGNORE_THRESHOLD: 0.1 29 | VARIANT: "yolov7" 30 | LOSS_TYPE: "v4" 31 | DATASETS: 32 | TRAIN: ("tl_train",) 33 | TEST: ("tl_val",) 34 | 35 | INPUT: 36 | # open all augmentations 37 | JITTER_CROP: 38 | ENABLED: True 39 | RESIZE: 40 | ENABLED: True 41 | SHAPE: (540, 960) 42 | DISTORTION: 43 | ENABLED: True 44 | MOSAIC: 45 | ENABLED: True 46 | NUM_IMAGES: 4 47 | MOSAIC_WIDTH: 960 48 | MOSAIC_HEIGHT: 540 49 | 50 | SOLVER: 51 | IMS_PER_BATCH: 48 52 | BASE_LR: 0.0005 53 | STEPS: (70000, 90000) 54 | MAX_ITER: 100000 55 | CHECKPOINT_PERIOD: 1000 56 | 57 | DATALOADER: 58 | # proposals are part of the dataset_dicts, and take a lot of RAM 59 | NUM_WORKERS: 2 60 | 61 | 62 | -------------------------------------------------------------------------------- /configs/tl/res2net_fpn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YoloV7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | # WEIGHTS: "weights/r2_50.pkl" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_res2net_fpn_backbone" 8 | RESNETS: 9 | DEPTH: 34 10 | # WIDTH_PER_GROUP: 26 11 | RES2_OUT_CHANNELS: 64 12 | # DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5 13 | DEFORM_MODULATED: True 14 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 15 | FPN: 16 | IN_FEATURES: ["res3", "res4", "res5"] 17 | OUT_CHANNELS: 512 18 | YOLO: 19 | ANCHORS: 20 | [ 21 | [[48, 113], [85, 39], [30, 73]], 22 | [[20, 48], [40, 19], [14, 34]], 23 | [[10, 24], [21, 9], [6, 15]], 24 | ] 25 | CLASSES: 5 26 | IN_FEATURES: ["p3", "p4", "p5"] # p3, p4, p5 from FPN 27 | CONF_THRESHOLD: 0.2 28 | NMS_THRESHOLD: 0.1 29 | IGNORE_THRESHOLD: 0.1 30 | VARIANT: "yolov7" 31 | LOSS_TYPE: "v7" 32 | 33 | DATASETS: 34 | TRAIN: ("tl_train",) 35 | TEST: ("tl_val",) 36 | 37 | INPUT: 38 | # open all augmentations 39 | JITTER_CROP: 40 | ENABLED: True 41 | RESIZE: 42 | ENABLED: True 43 | SHAPE: (540, 960) 44 | DISTORTION: 45 | ENABLED: True 46 | MOSAIC: 47 | ENABLED: True 48 | NUM_IMAGES: 4 49 | MOSAIC_WIDTH: 960 50 | MOSAIC_HEIGHT: 540 51 | 52 | SOLVER: 53 | IMS_PER_BATCH: 48 54 | BASE_LR: 0.0009 55 | STEPS: (70000, 90000) 56 | MAX_ITER: 100000 57 | CHECKPOINT_PERIOD: 1000 58 | 59 | DATALOADER: 60 | # proposals are part of the dataset_dicts, and take a lot of RAM 61 | NUM_WORKERS: 2 62 | -------------------------------------------------------------------------------- /configs/tl/x_s_pafpn_1gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YoloV7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | MASK_ON: False 5 | META_ARCHITECTURE: "YOLOV7P" 6 | BACKBONE: 7 | NAME: "build_cspdarknetx_backbone" 8 | 9 | DARKNET: 10 | WEIGHTS: "" 11 | DEPTH_WISE: False 12 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 13 | YOLO: 14 | ANCHORS: 15 | [ 16 | [[10, 24], [21, 9], [6, 15]], 17 | [[20, 48], [40, 19], [14, 34]], 18 | [[48, 113], [85, 39], [30, 73]], 19 | ] 20 | # [ 21 | # [[48, 113], [85, 39], [30, 73]], 22 | # [[20, 48], [40, 19], [14, 34]], 23 | # [[10, 24], [21, 9], [6, 15]], 24 | # ] 25 | CLASSES: 5 26 | IN_FEATURES: ["dark3", "dark4", "dark5"] 27 | CONF_THRESHOLD: 0.01 28 | NMS_THRESHOLD: 0.5 29 | IGNORE_THRESHOLD: 0.6 30 | WIDTH_MUL: 0.50 31 | DEPTH_MUL: 0.33 32 | LOSS_TYPE: "v7" 33 | 34 | DATASETS: 35 | TRAIN: ("tl_train",) 36 | TEST: ("tl_val",) 37 | 38 | 39 | INPUT: 40 | # open all augmentations 41 | JITTER_CROP: 42 | ENABLED: True 43 | RESIZE: 44 | ENABLED: True 45 | SHAPE: (540, 960) 46 | DISTORTION: 47 | ENABLED: True 48 | MOSAIC: 49 | ENABLED: True 50 | NUM_IMAGES: 4 51 | MOSAIC_WIDTH: 960 52 | MOSAIC_HEIGHT: 540 53 | 54 | SOLVER: 55 | IMS_PER_BATCH: 4 56 | BASE_LR: 0.006 57 | WARMUP_ITERS: 1000 58 | STEPS: (60000, 80000) 59 | MAX_ITER: 90000 60 | CLIP_GRADIENTS: 61 | ENABLED: True 62 | CHECKPOINT_PERIOD: 1000 63 | 64 | TEST: 65 | # EVAL_PERIOD: 5000 66 | EVAL_PERIOD: 0 67 | OUTPUT_DIR: "output/tl_darknetx_pafpn" 68 | VIS_PERIOD: 5000 69 | 70 | DATALOADER: 71 | # proposals are part of the dataset_dicts, and take a lot of RAM 72 | NUM_WORKERS: 8 73 | -------------------------------------------------------------------------------- /configs/tl/yolov5_s.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | MASK_ON: False 5 | META_ARCHITECTURE: "YOLOV5" 6 | BACKBONE: 7 | NAME: "build_cspdarknetx_backbone" 8 | 9 | DARKNET: 10 | WEIGHTS: "" 11 | DEPTH_WISE: False 12 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 13 | YOLO: 14 | ANCHORS: 15 | [ 16 | [[56, 40], [29, 64], [18, 25]], 17 | [[91, 192], [137, 109], [60, 103]], 18 | [[354, 330], [289, 169], [168, 270]], 19 | ] 20 | CLASSES: 20 21 | IN_FEATURES: ["dark3", "dark4", "dark5"] 22 | CONF_THRESHOLD: 0.001 23 | NMS_THRESHOLD: 0.65 24 | IGNORE_THRESHOLD: 0.6 25 | WIDTH_MUL: 0.50 26 | DEPTH_MUL: 0.33 27 | VARIANT: "yolov7" 28 | LOSS_TYPE: "v7" 29 | LOSS: 30 | LAMBDA_IOU: 0.05 31 | LAMBDA_CONF: 1.0 32 | LAMBDA_CLS: 0.5 33 | ANCHOR_RATIO_THRESH: 4.0 34 | 35 | DATASETS: 36 | TRAIN: ("tl_train",) 37 | TEST: ("tl_val",) 38 | 39 | INPUT: 40 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 41 | MAX_SIZE_TRAIN: 1056 42 | MIN_SIZE_TEST: 672 43 | MAX_SIZE_TEST: 768 44 | # open all augmentations 45 | JITTER_CROP: 46 | ENABLED: False 47 | RESIZE: 48 | ENABLED: False 49 | # SHAPE: (540, 960) 50 | DISTORTION: 51 | ENABLED: False 52 | # MOSAIC: 53 | # ENABLED: True 54 | # NUM_IMAGES: 4 55 | # DEBUG_VIS: True 56 | # # MOSAIC_WIDTH: 960 57 | # # MOSAIC_HEIGHT: 540 58 | MOSAIC_AND_MIXUP: 59 | ENABLED: True 60 | DEBUG_VIS: False 61 | ENABLE_MIXUP: False 62 | 63 | SOLVER: 64 | AMP: 65 | ENABLED: true 66 | IMS_PER_BATCH: 12 67 | BASE_LR: 0.046 68 | STEPS: (60000, 80000) 69 | WARMUP_FACTOR: 0.00033333 70 | WARMUP_ITERS: 1500 71 | MAX_ITER: 150000 72 | LR_SCHEDULER_NAME: "WarmupCosineLR" 73 | 74 | TEST: 75 | EVAL_PERIOD: 5000 76 | # EVAL_PERIOD: 0 77 | OUTPUT_DIR: "output/tl_yolov5_s" 78 | VIS_PERIOD: 5000 79 | 80 | DATALOADER: 81 | # proposals are part of the dataset_dicts, and take a lot of RAM 82 | NUM_WORKERS: 1 83 | -------------------------------------------------------------------------------- /configs/tl/yolox_s.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [0.485, 0.456, 0.406] # same value as PP-YOLOv2, RGB order 4 | PIXEL_STD: [0.229, 0.224, 0.225] 5 | 6 | WEIGHTS: "" 7 | MASK_ON: False 8 | META_ARCHITECTURE: "YOLOX" 9 | BACKBONE: 10 | NAME: "build_cspdarknetx_backbone" 11 | 12 | DARKNET: 13 | WEIGHTS: "" 14 | DEPTH_WISE: False 15 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 16 | 17 | YOLO: 18 | CLASSES: 5 19 | IN_FEATURES: ["dark3", "dark4", "dark5"] 20 | # CONF_THRESHOLD: 0.01 21 | CONF_THRESHOLD: 0.01 22 | NMS_THRESHOLD: 0.65 23 | IGNORE_THRESHOLD: 0.7 24 | WIDTH_MUL: 0.50 25 | DEPTH_MUL: 0.33 26 | LOSS_TYPE: "v7" 27 | LOSS: 28 | LAMBDA_IOU: 1.5 29 | 30 | DATASETS: 31 | TRAIN: ("tl_train",) 32 | TEST: ("tl_val",) 33 | 34 | INPUT: 35 | # FORMAT: "RGB" # using BGR default 36 | MIN_SIZE_TRAIN: (416, 512, 608, 768, 800) 37 | MAX_SIZE_TRAIN: 1920 # force max size train to 800? 38 | MIN_SIZE_TEST: 800 39 | MAX_SIZE_TEST: 1920 40 | # open all augmentations 41 | JITTER_CROP: 42 | ENABLED: False 43 | RESIZE: 44 | ENABLED: False 45 | # SHAPE: (540, 960) 46 | DISTORTION: 47 | ENABLED: True 48 | # MOSAIC: 49 | # ENABLED: True 50 | # NUM_IMAGES: 4 51 | # DEBUG_VIS: True 52 | # # MOSAIC_WIDTH: 960 53 | # # MOSAIC_HEIGHT: 540 54 | MOSAIC_AND_MIXUP: 55 | ENABLED: True 56 | DEBUG_VIS: False 57 | ENABLE_MIXUP: True 58 | DISABLE_AT_ITER: 120000 59 | 60 | SOLVER: 61 | # enable fp16 training 62 | AMP: 63 | ENABLED: true 64 | IMS_PER_BATCH: 112 65 | # IMS_PER_BATCH: 12 66 | BASE_LR: 0.025 67 | STEPS: (60000, 80000) 68 | WARMUP_FACTOR: 0.00033333 69 | WARMUP_ITERS: 1500 70 | MAX_ITER: 150000 71 | LR_SCHEDULER_NAME: "WarmupCosineLR" 72 | CHECKPOINT_PERIOD: 5000 73 | 74 | TEST: 75 | EVAL_PERIOD: 5000 76 | # EVAL_PERIOD: 0 77 | OUTPUT_DIR: "output/tl_yolox_s" 78 | VIS_PERIOD: 5000 79 | 80 | DATALOADER: 81 | # proposals are part of the dataset_dicts, and take a lot of RAM 82 | NUM_WORKERS: 3 83 | -------------------------------------------------------------------------------- /configs/tl/yolox_s_1gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [0.485, 0.456, 0.406] # same value as PP-YOLOv2, RGB order 4 | PIXEL_STD: [0.229, 0.224, 0.225] 5 | 6 | WEIGHTS: "" 7 | MASK_ON: False 8 | META_ARCHITECTURE: "YOLOX" 9 | BACKBONE: 10 | NAME: "build_cspdarknetx_backbone" 11 | 12 | DARKNET: 13 | WEIGHTS: "" 14 | DEPTH_WISE: False 15 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 16 | 17 | YOLO: 18 | CLASSES: 5 19 | IN_FEATURES: ["dark3", "dark4", "dark5"] 20 | CONF_THRESHOLD: 0.001 21 | NMS_THRESHOLD: 0.65 22 | IGNORE_THRESHOLD: 0.7 23 | WIDTH_MUL: 0.50 24 | DEPTH_MUL: 0.33 25 | LOSS_TYPE: "v7" 26 | LOSS: 27 | LAMBDA_IOU: 1.5 28 | 29 | DATASETS: 30 | TRAIN: ("tl_train",) 31 | TEST: ("tl_val",) 32 | 33 | INPUT: 34 | # FORMAT: "RGB" # using BGR default 35 | MIN_SIZE_TRAIN: (416, 512, 608, 768, 800) 36 | MAX_SIZE_TRAIN: 1920 # force max size train to 800? 37 | MIN_SIZE_TEST: 800 38 | MAX_SIZE_TEST: 1920 39 | # open all augmentations 40 | JITTER_CROP: 41 | ENABLED: False 42 | RESIZE: 43 | ENABLED: False 44 | # SHAPE: (540, 960) 45 | DISTORTION: 46 | ENABLED: True 47 | # MOSAIC: 48 | # ENABLED: True 49 | # NUM_IMAGES: 4 50 | # DEBUG_VIS: True 51 | # # MOSAIC_WIDTH: 960 52 | # # MOSAIC_HEIGHT: 540 53 | MOSAIC_AND_MIXUP: 54 | ENABLED: True 55 | DEBUG_VIS: False 56 | ENABLE_MIXUP: True 57 | DISABLE_AT_ITER: 120000 58 | 59 | 60 | SOLVER: 61 | # enable fp16 training 62 | AMP: 63 | ENABLED: true 64 | # IMS_PER_BATCH: 112 65 | IMS_PER_BATCH: 14 66 | BASE_LR: 0.004 67 | STEPS: (60000, 80000) 68 | WARMUP_FACTOR: 0.00033333 69 | WARMUP_ITERS: 1500 70 | MAX_ITER: 230000 71 | LR_SCHEDULER_NAME: "WarmupCosineLR" 72 | 73 | TEST: 74 | EVAL_PERIOD: 10000 75 | # EVAL_PERIOD: 0 76 | OUTPUT_DIR: "output/tl_yolox_s" 77 | VIS_PERIOD: 5000 78 | 79 | DATALOADER: 80 | # proposals are part of the dataset_dicts, and take a lot of RAM 81 | NUM_WORKERS: 3 82 | -------------------------------------------------------------------------------- /configs/visdrone/r2_50_1gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YoloV7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_res2net_backbone" 8 | RESNETS: 9 | R2TYPE: "res2net50_v1b" 10 | DEPTH: 50 11 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 12 | YOLO: 13 | ANCHORS: 14 | [ 15 | [[120, 106], [80, 47], [45, 66]], 16 | [[47, 29], [23, 40], [28, 19]], 17 | [[13, 25], [15, 14], [7, 10]], 18 | ] 19 | CLASSES: 10 20 | MAX_BOXES_NUM: 150 21 | IN_FEATURES: ["res3", "res4", "res5"] # p3, p4, p5 from FPN 22 | CONF_THRESHOLD: 0.01 23 | NMS_THRESHOLD: 0.6 24 | IGNORE_THRESHOLD: 0.7 25 | VARIANT: "yolov7" 26 | LOSS_TYPE: "v7" 27 | LOSS: 28 | LAMBDA_IOU: 1.5 29 | 30 | DATASETS: 31 | TRAIN: ("visdrone_train",) 32 | TEST: ("visdrone_val",) 33 | 34 | 35 | 36 | INPUT: 37 | # MIN_SIZE_TRAIN: (416, 512, 608, 768) 38 | MIN_SIZE_TRAIN: (416, 900) 39 | MAX_SIZE_TRAIN: 1056 40 | MIN_SIZE_TEST: 640 41 | MIN_SIZE_TRAIN_SAMPLING: "range" 42 | # open all augmentations 43 | JITTER_CROP: 44 | ENABLED: False 45 | RESIZE: 46 | ENABLED: False 47 | # SHAPE: (540, 960) 48 | DISTORTION: 49 | ENABLED: True 50 | # MOSAIC: 51 | # ENABLED: True 52 | # NUM_IMAGES: 4 53 | # DEBUG_VIS: True 54 | # # MOSAIC_WIDTH: 960 55 | # # MOSAIC_HEIGHT: 540 56 | MOSAIC_AND_MIXUP: 57 | ENABLED: True 58 | DEBUG_VIS: False 59 | ENABLE_MIXUP: False 60 | RANDOM_FLIP: "horizontal" 61 | 62 | 63 | SOLVER: 64 | IMS_PER_BATCH: 2 65 | # BASE_LR: 0.00008 # add lr 66 | BASE_LR: 0.0001 # add lr 67 | STEPS: (60000, 80000) 68 | MAX_ITER: 150000 69 | LR_SCHEDULER_NAME: "WarmupCosineLR" 70 | 71 | 72 | TEST: 73 | EVAL_PERIOD: 10000 74 | # EVAL_PERIOD: 0 75 | OUTPUT_DIR: "output/visdrone_r2_50" 76 | 77 | DATALOADER: 78 | # proposals are part of the dataset_dicts, and take a lot of RAM 79 | NUM_WORKERS: 1 80 | -------------------------------------------------------------------------------- /configs/visdrone/yolov5_s.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | MASK_ON: False 5 | META_ARCHITECTURE: "YOLOV5" 6 | BACKBONE: 7 | NAME: "build_cspdarknetx_backbone" 8 | 9 | DARKNET: 10 | WEIGHTS: "" 11 | DEPTH_WISE: False 12 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 13 | YOLO: 14 | ANCHORS: 15 | [ 16 | [[13, 25], [15, 14], [7, 10]], 17 | [[47, 29], [23, 40], [28, 19]], 18 | [[120, 106], [80, 47], [45, 66]], 19 | ] 20 | CLASSES: 10 21 | IN_FEATURES: ["dark3", "dark4", "dark5"] 22 | CONF_THRESHOLD: 0.001 23 | NMS_THRESHOLD: 0.6 24 | IGNORE_THRESHOLD: 0.6 25 | WIDTH_MUL: 0.50 26 | DEPTH_MUL: 0.33 27 | VARIANT: "yolov7" 28 | LOSS_TYPE: "v7" 29 | LOSS: 30 | LAMBDA_IOU: 0.05 31 | LAMBDA_CONF: 1.0 32 | LAMBDA_CLS: 0.5 33 | ANCHOR_RATIO_THRESH: 4.0 34 | 35 | DATASETS: 36 | TRAIN: ("visdrone_train",) 37 | TEST: ("visdrone_val",) 38 | 39 | INPUT: 40 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 41 | MAX_SIZE_TRAIN: 1056 42 | MIN_SIZE_TEST: 672 43 | MAX_SIZE_TEST: 768 44 | # open all augmentations 45 | JITTER_CROP: 46 | ENABLED: False 47 | RESIZE: 48 | ENABLED: False 49 | # SHAPE: (540, 960) 50 | DISTORTION: 51 | ENABLED: False 52 | # MOSAIC: 53 | # ENABLED: True 54 | # NUM_IMAGES: 4 55 | # DEBUG_VIS: True 56 | # # MOSAIC_WIDTH: 960 57 | # # MOSAIC_HEIGHT: 540 58 | MOSAIC_AND_MIXUP: 59 | ENABLED: True 60 | DEBUG_VIS: False 61 | ENABLE_MIXUP: False 62 | 63 | SOLVER: 64 | IMS_PER_BATCH: 5 65 | BASE_LR: 0.04 66 | STEPS: (60000, 80000) 67 | WARMUP_FACTOR: 0.00033333 68 | WARMUP_ITERS: 1500 69 | MAX_ITER: 130000 70 | LR_SCHEDULER_NAME: "WarmupCosineLR" 71 | 72 | TEST: 73 | EVAL_PERIOD: 5000 74 | # EVAL_PERIOD: 0 75 | OUTPUT_DIR: "output/visdrone_yolov5_s" 76 | VIS_PERIOD: 5000 77 | 78 | DATALOADER: 79 | # proposals are part of the dataset_dicts, and take a lot of RAM 80 | NUM_WORKERS: 1 81 | -------------------------------------------------------------------------------- /configs/visdrone/yolox_s_1gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | MASK_ON: False 5 | META_ARCHITECTURE: "YOLOX" 6 | BACKBONE: 7 | NAME: "build_cspdarknetx_backbone" 8 | 9 | DARKNET: 10 | WEIGHTS: "" 11 | DEPTH_WISE: False 12 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 13 | 14 | YOLO: 15 | CLASSES: 10 16 | IN_FEATURES: ["dark3", "dark4", "dark5"] 17 | CONF_THRESHOLD: 0.001 18 | NMS_THRESHOLD: 0.65 19 | IGNORE_THRESHOLD: 0.7 20 | MAX_BOXES_NUM: 150 21 | WIDTH_MUL: 0.50 22 | DEPTH_MUL: 0.33 23 | LOSS_TYPE: "v7" 24 | LOSS: 25 | LAMBDA_IOU: 1.5 26 | 27 | DATASETS: 28 | TRAIN: ("visdrone_train",) 29 | TEST: ("visdrone_val",) 30 | 31 | 32 | 33 | INPUT: 34 | # MIN_SIZE_TRAIN: (416, 512, 608, 768) 35 | MIN_SIZE_TRAIN: (416, 900) 36 | MIN_SIZE_TRAIN_SAMPLING: "range" 37 | MAX_SIZE_TRAIN: 1056 38 | MIN_SIZE_TEST: 640 39 | MAX_SIZE_TEST: 960 40 | 41 | # open all augmentations 42 | JITTER_CROP: 43 | ENABLED: False 44 | RESIZE: 45 | ENABLED: False 46 | # SHAPE: (540, 960) 47 | DISTORTION: 48 | ENABLED: True 49 | # MOSAIC: 50 | # ENABLED: True 51 | # NUM_IMAGES: 4 52 | # DEBUG_VIS: True 53 | # # MOSAIC_WIDTH: 960 54 | # # MOSAIC_HEIGHT: 540 55 | MOSAIC_AND_MIXUP: 56 | ENABLED: True 57 | DEBUG_VIS: False 58 | ENABLE_MIXUP: False 59 | RANDOM_FLIP: "horizontal" 60 | 61 | SOLVER: 62 | # enable fp16 training 63 | # AMP: 64 | # ENABLED: true 65 | IMS_PER_BATCH: 5 66 | # BASE_LR: 0.00125 67 | # STEPS: (60000, 80000) 68 | # MAX_ITER: 130000 69 | WARMUP_FACTOR: 0.00033333 70 | WARMUP_ITERS: 3000 71 | BASE_LR: 0.005 72 | STEPS: (52500, 62500) 73 | MAX_ITER: 230000 74 | LR_SCHEDULER_NAME: "WarmupCosineLR" 75 | 76 | TEST: 77 | EVAL_PERIOD: 10000 78 | # EVAL_PERIOD: 0 79 | 80 | OUTPUT_DIR: "output/visdrone_yolox_s" 81 | VIS_PERIOD: 5000 82 | 83 | DATALOADER: 84 | # proposals are part of the dataset_dicts, and take a lot of RAM 85 | NUM_WORKERS: 2 86 | -------------------------------------------------------------------------------- /configs/voc/darknet53_1gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YoloV7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_darknet_backbone" 8 | DARKNET: 9 | WEIGHTS: "weights/yolov3.pt" # using pytorch weights from ultralytics's yolov3 10 | DEPTH: 53 11 | # WITH_CSP: True 12 | STEM_OUT_CHANNELS: 32 13 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 14 | YOLO: 15 | ANCHORS: 16 | [ 17 | [[362, 331], [296, 170], [178, 277]], 18 | [[98, 205], [139, 116], [57, 116]], 19 | [[75, 53], [33, 57], [19, 24]], 20 | ] 21 | CLASSES: 20 22 | IN_FEATURES: ["dark3", "dark4", "dark5"] 23 | CONF_THRESHOLD: 0.01 24 | NMS_THRESHOLD: 0.2 25 | IGNORE_THRESHOLD: 0.7 26 | VARIANT: "yolov7" 27 | LOSS_TYPE: "v7" 28 | DATASETS: 29 | TRAIN: ("voc_train",) 30 | TEST: ("voc_val",) 31 | 32 | INPUT: 33 | MIN_SIZE_TRAIN: (416, 512, 608, 768, 800) 34 | # open all augmentations 35 | JITTER_CROP: 36 | ENABLED: True 37 | RESIZE: 38 | ENABLED: True 39 | # SHAPE: (540, 960) 40 | DISTORTION: 41 | ENABLED: True 42 | MOSAIC: 43 | ENABLED: True 44 | NUM_IMAGES: 4 45 | # MOSAIC_WIDTH: 960 46 | # MOSAIC_HEIGHT: 540 47 | 48 | SOLVER: 49 | IMS_PER_BATCH: 4 50 | BASE_LR: 0.008 51 | # WARMUP_ITERS: 20000 52 | STEPS: (60000, 80000) 53 | MAX_ITER: 120000 54 | LR_SCHEDULER_NAME: "WarmupCosineLR" 55 | 56 | TEST: 57 | EVAL_PERIOD: 10000 58 | # EVAL_PERIOD: 0 59 | OUTPUT_DIR: "output/voc_darknet" 60 | 61 | DATALOADER: 62 | # proposals are part of the dataset_dicts, and take a lot of RAM 63 | NUM_WORKERS: 2 64 | -------------------------------------------------------------------------------- /configs/voc/r2_50_1gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_res2net_backbone" 8 | RESNETS: 9 | R2TYPE: "res2net50_v1b" 10 | DEPTH: 50 11 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 12 | YOLO: 13 | ANCHORS: 14 | [ 15 | [[354, 330], [289, 169], [168, 270]], 16 | [[91, 192], [137, 109], [60, 103]], 17 | [[56, 40], [29, 64], [18, 25]], 18 | ] 19 | CLASSES: 20 20 | IN_FEATURES: ["res3", "res4", "res5"] # p3, p4, p5 from FPN 21 | CONF_THRESHOLD: 0.01 22 | NMS_THRESHOLD: 0.2 23 | IGNORE_THRESHOLD: 0.7 24 | VARIANT: "yolov7" 25 | LOSS_TYPE: "v7" 26 | LOSS: 27 | LAMBDA_IOU: 1.5 28 | 29 | DATASETS: 30 | TRAIN: ("voc_train",) 31 | TEST: ("voc_val",) 32 | 33 | 34 | 35 | INPUT: 36 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 37 | MAX_SIZE_TRAIN: 768 38 | MIN_SIZE_TEST: 512 39 | # open all augmentations 40 | JITTER_CROP: 41 | ENABLED: False 42 | RESIZE: 43 | ENABLED: False 44 | # SHAPE: (540, 960) 45 | DISTORTION: 46 | ENABLED: False 47 | # MOSAIC: 48 | # ENABLED: True 49 | # NUM_IMAGES: 4 50 | # DEBUG_VIS: True 51 | # # MOSAIC_WIDTH: 960 52 | # # MOSAIC_HEIGHT: 540 53 | MOSAIC_AND_MIXUP: 54 | ENABLED: True 55 | DEBUG_VIS: False 56 | ENABLE_MIXUP: False 57 | 58 | 59 | SOLVER: 60 | IMS_PER_BATCH: 3 61 | BASE_LR: 0.002 62 | STEPS: (60000, 80000) 63 | MAX_ITER: 130000 64 | LR_SCHEDULER_NAME: "WarmupCosineLR" 65 | 66 | 67 | TEST: 68 | EVAL_PERIOD: 9000 69 | # EVAL_PERIOD: 0 70 | OUTPUT_DIR: "output/voc_r2_50" 71 | 72 | DATALOADER: 73 | # proposals are part of the dataset_dicts, and take a lot of RAM 74 | NUM_WORKERS: 1 75 | -------------------------------------------------------------------------------- /configs/voc/x_s_pafpn_1gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YoloV7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | MASK_ON: False 5 | META_ARCHITECTURE: "YOLOV7P" 6 | BACKBONE: 7 | NAME: "build_cspdarknetx_backbone" 8 | 9 | DARKNET: 10 | WEIGHTS: "" 11 | DEPTH_WISE: False 12 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 13 | YOLO: 14 | ANCHORS: 15 | [ 16 | [[75, 53], [33, 57], [19, 24]], 17 | [[98, 205], [139, 116], [57, 116]], 18 | [[362, 331], [296, 170], [178, 277]], 19 | ] 20 | CLASSES: 20 21 | IN_FEATURES: ["dark3", "dark4", "dark5"] 22 | CONF_THRESHOLD: 0.01 23 | NMS_THRESHOLD: 0.2 24 | IGNORE_THRESHOLD: 0.6 25 | WIDTH_MUL: 0.50 26 | DEPTH_MUL: 0.33 27 | VARIANT: "yolov7" 28 | LOSS_TYPE: "v7" 29 | LOSS: 30 | LAMBDA_IOU: 2.5 31 | 32 | 33 | 34 | DATASETS: 35 | TRAIN: ("voc_train",) 36 | TEST: ("voc_val",) 37 | 38 | INPUT: 39 | MIN_SIZE_TRAIN: (416, 512, 608, 768, 800) 40 | MAX_SIZE_TRAIN: 1056 41 | # open all augmentations 42 | JITTER_CROP: 43 | ENABLED: True 44 | RESIZE: 45 | ENABLED: True 46 | # SHAPE: (540, 960) 47 | DISTORTION: 48 | ENABLED: True 49 | MOSAIC: 50 | ENABLED: True 51 | NUM_IMAGES: 4 52 | # MOSAIC_WIDTH: 960 53 | # MOSAIC_HEIGHT: 540 54 | 55 | SOLVER: 56 | IMS_PER_BATCH: 6 57 | BASE_LR: 0.006 58 | STEPS: (60000, 80000) 59 | MAX_ITER: 160000 60 | LR_SCHEDULER_NAME: "WarmupCosineLR" 61 | 62 | TEST: 63 | EVAL_PERIOD: 5000 64 | # EVAL_PERIOD: 0 65 | OUTPUT_DIR: "output/voc_x_s_pafpn" 66 | VIS_PERIOD: 5000 67 | 68 | DATALOADER: 69 | # proposals are part of the dataset_dicts, and take a lot of RAM 70 | NUM_WORKERS: 1 71 | -------------------------------------------------------------------------------- /configs/voc/yolov5_s.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | MASK_ON: False 5 | META_ARCHITECTURE: "YOLOV5" 6 | BACKBONE: 7 | NAME: "build_cspdarknetx_backbone" 8 | 9 | DARKNET: 10 | WEIGHTS: "" 11 | DEPTH_WISE: False 12 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 13 | YOLO: 14 | ANCHORS: 15 | [ 16 | [[56, 40], [29, 64], [18, 25]], 17 | [[91, 192], [137, 109], [60, 103]], 18 | [[354, 330], [289, 169], [168, 270]], 19 | ] 20 | CLASSES: 20 21 | IN_FEATURES: ["dark3", "dark4", "dark5"] 22 | CONF_THRESHOLD: 0.001 23 | NMS_THRESHOLD: 0.65 24 | IGNORE_THRESHOLD: 0.6 25 | WIDTH_MUL: 0.50 26 | DEPTH_MUL: 0.33 27 | VARIANT: "yolov7" 28 | LOSS_TYPE: "v7" 29 | LOSS: 30 | LAMBDA_IOU: 0.05 31 | LAMBDA_CONF: 1.0 32 | LAMBDA_CLS: 0.5 33 | ANCHOR_RATIO_THRESH: 2.0 34 | 35 | DATASETS: 36 | TRAIN: ("voc_train",) 37 | TEST: ("voc_val",) 38 | 39 | INPUT: 40 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 41 | MAX_SIZE_TRAIN: 1056 42 | MIN_SIZE_TEST: 672 43 | MAX_SIZE_TEST: 768 44 | # open all augmentations 45 | JITTER_CROP: 46 | ENABLED: False 47 | RESIZE: 48 | ENABLED: False 49 | # SHAPE: (540, 960) 50 | DISTORTION: 51 | ENABLED: False 52 | # MOSAIC: 53 | # ENABLED: True 54 | # NUM_IMAGES: 4 55 | # DEBUG_VIS: True 56 | # # MOSAIC_WIDTH: 960 57 | # # MOSAIC_HEIGHT: 540 58 | MOSAIC_AND_MIXUP: 59 | ENABLED: True 60 | DEBUG_VIS: False 61 | ENABLE_MIXUP: False 62 | 63 | SOLVER: 64 | IMS_PER_BATCH: 4 65 | BASE_LR: 0.01 66 | STEPS: (60000, 80000) 67 | WARMUP_FACTOR: 0.00033333 68 | WARMUP_ITERS: 1500 69 | MAX_ITER: 130000 70 | LR_SCHEDULER_NAME: "WarmupCosineLR" 71 | 72 | TEST: 73 | EVAL_PERIOD: 5000 74 | # EVAL_PERIOD: 0 75 | OUTPUT_DIR: "output/voc_yolov5_s" 76 | VIS_PERIOD: 5000 77 | 78 | DATALOADER: 79 | # proposals are part of the dataset_dicts, and take a lot of RAM 80 | NUM_WORKERS: 1 81 | -------------------------------------------------------------------------------- /configs/voc/yolox_s_1gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | MASK_ON: False 5 | META_ARCHITECTURE: "YOLOX" 6 | BACKBONE: 7 | NAME: "build_cspdarknetx_backbone" 8 | 9 | DARKNET: 10 | WEIGHTS: "" 11 | DEPTH_WISE: False 12 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 13 | YOLO: 14 | CLASSES: 20 15 | IN_FEATURES: ["dark3", "dark4", "dark5"] 16 | CONF_THRESHOLD: 0.01 17 | NMS_THRESHOLD: 0.2 18 | IGNORE_THRESHOLD: 0.6 19 | WIDTH_MUL: 0.50 20 | DEPTH_MUL: 0.33 21 | VARIANT: "yolov7" 22 | LOSS_TYPE: "v7" 23 | LOSS: 24 | LAMBDA_IOU: 3.5 25 | 26 | 27 | DATASETS: 28 | TRAIN: ("voc_train",) 29 | TEST: ("voc_val",) 30 | 31 | INPUT: 32 | MIN_SIZE_TRAIN: (416, 512, 608, 768, 800) 33 | # open all augmentations 34 | JITTER_CROP: 35 | ENABLED: True 36 | RESIZE: 37 | ENABLED: True 38 | # SHAPE: (540, 960) 39 | DISTORTION: 40 | ENABLED: True 41 | MOSAIC: 42 | ENABLED: True 43 | NUM_IMAGES: 4 44 | # MOSAIC_WIDTH: 960 45 | # MOSAIC_HEIGHT: 540 46 | 47 | SOLVER: 48 | IMS_PER_BATCH: 6 49 | BASE_LR: 0.007 50 | STEPS: (60000, 80000) 51 | MAX_ITER: 100000 52 | LR_SCHEDULER_NAME: "WarmupCosineLR" 53 | 54 | TEST: 55 | EVAL_PERIOD: 5000 56 | # EVAL_PERIOD: 0 57 | OUTPUT_DIR: "output/yolox_s" 58 | VIS_PERIOD: 5000 59 | 60 | DATALOADER: 61 | # proposals are part of the dataset_dicts, and take a lot of RAM 62 | NUM_WORKERS: 1 63 | -------------------------------------------------------------------------------- /configs/wearmask/cspdarknet53.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_cspdarknet_backbone" 8 | DARKNET: 9 | WEIGHTS: "weights/yolov3.pt" # using pytorch weights from ultralytics's yolov3 10 | DEPTH: 53 11 | STEM_OUT_CHANNELS: 32 12 | WEIGHTS: "" 13 | OUT_FEATURES: ["layer3", "layer4", "layer5"] 14 | YOLO: 15 | ANCHORS: [[[231.0, 298.0], [356.0, 458.0], [157.0, 199.0]], [[85.0, 106.0], [63.0, 79.0], [115.0, 143.0]], [[46.0, 56.0], [31.0, 37.0], [17.0, 19.0]]] 16 | CLASSES: 2 17 | IN_FEATURES: ["layer3", "layer4", "layer5"] 18 | CONF_THRESHOLD: 0.3 19 | NMS_THRESHOLD: 0.1 20 | IGNORE_THRESHOLD: 0.1 21 | DATASETS: 22 | TRAIN: ("mask_train",) 23 | TEST: ("mask_val",) 24 | 25 | SOLVER: 26 | IMS_PER_BATCH: 16 27 | BASE_LR: 0.005 28 | STEPS: (60000, 80000) 29 | MAX_ITER: 90000 30 | CHECKPOINT_PERIOD: 1000 31 | 32 | DATALOADER: 33 | # proposals are part of the dataset_dicts, and take a lot of RAM 34 | NUM_WORKERS: 2 35 | -------------------------------------------------------------------------------- /configs/wearmask/cspdarknet53_1gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | META_ARCHITECTURE: "YOLOV7" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_cspdarknet_backbone" 8 | DARKNET: 9 | WEIGHTS: "weights/cspdarknet53_ra_256-d05c7c21.pth" # manually down from: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/cspdarknet53_ra_256-d05c7c21.pth 10 | DEPTH: 53 11 | STEM_OUT_CHANNELS: 32 12 | WEIGHTS: "" 13 | OUT_FEATURES: ["layer3", "layer4", "layer5"] 14 | YOLO: 15 | ANCHORS: [[[231.0, 298.0], [356.0, 458.0], [157.0, 199.0]], [[85.0, 106.0], [63.0, 79.0], [115.0, 143.0]], [[46.0, 56.0], [31.0, 37.0], [17.0, 19.0]]] 16 | CLASSES: 2 17 | IN_FEATURES: ["layer3", "layer4", "layer5"] 18 | CONF_THRESHOLD: 0.01 19 | NMS_THRESHOLD: 0.1 20 | IGNORE_THRESHOLD: 0.5 21 | LOSS_TYPE: "v7" 22 | 23 | DATASETS: 24 | TRAIN: ("mask_train",) 25 | TEST: ("mask_val",) 26 | 27 | SOLVER: 28 | IMS_PER_BATCH: 3 29 | BASE_LR: 0.005 30 | STEPS: (60000, 80000) 31 | MAX_ITER: 90000 32 | CHECKPOINT_PERIOD: 1000 33 | 34 | OUTPUT_DIR: "output/wearmask_cspdarknet53" 35 | 36 | 37 | DATALOADER: 38 | # proposals are part of the dataset_dicts, and take a lot of RAM 39 | NUM_WORKERS: 2 40 | -------------------------------------------------------------------------------- /configs/wearmask/darknet53.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | MASK_ON: False 5 | BACKBONE: 6 | NAME: "build_darknet_backbone" 7 | 8 | DARKNET: 9 | WEIGHTS: "weights/yolov3.pt" # using pytorch weights from ultralytics's yolov3 10 | DEPTH: 53 11 | STEM_OUT_CHANNELS: 32 12 | WEIGHTS: "" 13 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 14 | YOLO: 15 | ANCHORS: [[[231.0, 298.0], [356.0, 458.0], [157.0, 199.0]], [[85.0, 106.0], [63.0, 79.0], [115.0, 143.0]], [[46.0, 56.0], [31.0, 37.0], [17.0, 19.0]]] 16 | CLASSES: 2 17 | IN_FEATURES: ["dark3", "dark4", "dark5"] 18 | CONF_THRESHOLD: 0.7 19 | NMS_THRESHOLD: 0.1 20 | IGNORE_THRESHOLD: 0.1 21 | DATASETS: 22 | TRAIN: ("mask_train",) 23 | TEST: ("mask_val",) 24 | 25 | SOLVER: 26 | IMS_PER_BATCH: 16 27 | BASE_LR: 0.005 28 | STEPS: (60000, 80000) 29 | MAX_ITER: 90000 30 | CHECKPOINT_PERIOD: 1000 31 | 32 | DATALOADER: 33 | # proposals are part of the dataset_dicts, and take a lot of RAM 34 | NUM_WORKERS: 2 35 | -------------------------------------------------------------------------------- /configs/wearmask/efficient_b2.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_efficientnet_fpn_backbone" 8 | EFFICIENTNET: 9 | NAME: "efficientnet_b1" 10 | # OUT_FEATURES: ["stride8", "stride16", "stride32"] # fpn produce 4 levels, only using 3 for now 11 | OUT_FEATURES: ["stride4", "stride8", "stride16", "stride32"] 12 | FPN: 13 | # IN_FEATURES: ["stride8", "stride16", "stride32"] 14 | IN_FEATURES: ["stride4", "stride8", "stride16", "stride32"] 15 | OUT_CHANNELS: 64 16 | REPEAT: 3 17 | YOLO: 18 | ANCHORS: 19 | [ 20 | [[231.0, 298.0], [356.0, 458.0], [157.0, 199.0]], 21 | [[85.0, 106.0], [63.0, 79.0], [115.0, 143.0]], 22 | [[46.0, 56.0], [31.0, 37.0], [17.0, 19.0]], 23 | ] 24 | CLASSES: 2 25 | IN_FEATURES: ["p3", "p4", "p5"] # p3, p4, p5 from FPN 26 | CONF_THRESHOLD: 0.2 27 | NMS_THRESHOLD: 0.1 28 | IGNORE_THRESHOLD: 0.1 29 | VARIANT: "yolov7" 30 | LOSS_TYPE: "v4" 31 | DATASETS: 32 | TRAIN: ("mask_train",) 33 | TEST: ("mask_val",) 34 | 35 | SOLVER: 36 | IMS_PER_BATCH: 32 37 | BASE_LR: 0.0005 38 | STEPS: (70000, 90000) 39 | MAX_ITER: 100000 40 | CHECKPOINT_PERIOD: 1000 41 | 42 | DATALOADER: 43 | # proposals are part of the dataset_dicts, and take a lot of RAM 44 | NUM_WORKERS: 2 45 | -------------------------------------------------------------------------------- /configs/wearmask/r50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_resnet_fpn_backbone" 8 | RESNETS: 9 | DEPTH: 50 10 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 11 | FPN: 12 | IN_FEATURES: ["res3", "res4", "res5"] 13 | OUT_CHANNELS: 512 14 | YOLO: 15 | ANCHORS: 16 | [ 17 | [[231.0, 298.0], [356.0, 458.0], [157.0, 199.0]], 18 | [[85.0, 106.0], [63.0, 79.0], [115.0, 143.0]], 19 | [[46.0, 56.0], [31.0, 37.0], [17.0, 19.0]], 20 | ] 21 | CLASSES: 2 22 | IN_FEATURES: ["p3", "p4", "p5"] # p3, p4, p5 from FPN 23 | CONF_THRESHOLD: 0.2 24 | NMS_THRESHOLD: 0.1 25 | IGNORE_THRESHOLD: 0.1 26 | VARIANT: "yolov7" 27 | LOSS_TYPE: "v4" 28 | DATASETS: 29 | TRAIN: ("mask_train",) 30 | TEST: ("mask_val",) 31 | 32 | SOLVER: 33 | IMS_PER_BATCH: 48 34 | BASE_LR: 0.0005 35 | STEPS: (70000, 90000) 36 | MAX_ITER: 100000 37 | CHECKPOINT_PERIOD: 1000 38 | 39 | DATALOADER: 40 | # proposals are part of the dataset_dicts, and take a lot of RAM 41 | NUM_WORKERS: 2 42 | -------------------------------------------------------------------------------- /configs/wearmask/r50_1gpu.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_resnet_fpn_backbone" 8 | RESNETS: 9 | DEPTH: 50 10 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 11 | FPN: 12 | IN_FEATURES: ["res3", "res4", "res5"] 13 | OUT_CHANNELS: 512 14 | YOLO: 15 | ANCHORS: 16 | [ 17 | [[231.0, 298.0], [356.0, 458.0], [157.0, 199.0]], 18 | [[85.0, 106.0], [63.0, 79.0], [115.0, 143.0]], 19 | [[46.0, 56.0], [31.0, 37.0], [17.0, 19.0]], 20 | ] 21 | CLASSES: 2 22 | IN_FEATURES: ["p3", "p4", "p5"] # p3, p4, p5 from FPN 23 | CONF_THRESHOLD: 0.1 24 | NMS_THRESHOLD: 0.1 25 | IGNORE_THRESHOLD: 0.1 26 | VARIANT: "yolov7" 27 | LOSS_TYPE: "v7" 28 | LOSS: 29 | LAMBDA_CONF: 0.8 30 | DATASETS: 31 | TRAIN: ("mask_train",) 32 | TEST: ("mask_val",) 33 | 34 | SOLVER: 35 | IMS_PER_BATCH: 5 36 | BASE_LR: 0.0001 37 | STEPS: (70000, 90000) 38 | MAX_ITER: 100000 39 | CHECKPOINT_PERIOD: 1000 40 | 41 | DATALOADER: 42 | # proposals are part of the dataset_dicts, and take a lot of RAM 43 | NUM_WORKERS: 2 44 | -------------------------------------------------------------------------------- /configs/wearmask/r50_bifpn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YoloV7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_resnet_bifpn_backbone" 8 | RESNETS: 9 | DEPTH: 50 10 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 11 | FPN: 12 | IN_FEATURES: ["res3", "res4", "res5"] 13 | OUT_CHANNELS: 512 14 | YOLO: 15 | ANCHORS: 16 | [ 17 | [[231.0, 298.0], [356.0, 458.0], [157.0, 199.0]], 18 | [[85.0, 106.0], [63.0, 79.0], [115.0, 143.0]], 19 | [[46.0, 56.0], [31.0, 37.0], [17.0, 19.0]], 20 | ] 21 | CLASSES: 2 22 | IN_FEATURES: ["p3", "p4", "p5"] # p3, p4, p5 from FPN 23 | CONF_THRESHOLD: 0.2 24 | NMS_THRESHOLD: 0.1 25 | IGNORE_THRESHOLD: 0.1 26 | VARIANT: "yolov7" 27 | LOSS_TYPE: "v4" 28 | DATASETS: 29 | TRAIN: ("mask_train",) 30 | TEST: ("mask_val",) 31 | 32 | SOLVER: 33 | IMS_PER_BATCH: 48 34 | BASE_LR: 0.0005 35 | STEPS: (70000, 90000) 36 | MAX_ITER: 100000 37 | CHECKPOINT_PERIOD: 1000 38 | 39 | DATALOADER: 40 | # proposals are part of the dataset_dicts, and take a lot of RAM 41 | NUM_WORKERS: 2 42 | -------------------------------------------------------------------------------- /configs/wearmask/r50_pan.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "YOLOV7" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | BACKBONE: 7 | NAME: "build_resnet_pan_backbone" 8 | RESNETS: 9 | DEPTH: 50 10 | OUT_FEATURES: ["res3", "res4", "res5"] # fpn produce 4 levels, only using 3 for now 11 | FPN: 12 | IN_FEATURES: ["res3", "res4", "res5"] 13 | OUT_CHANNELS_LIST: [256, 512, 1024] 14 | YOLO: 15 | ANCHORS: 16 | [ 17 | [[231.0, 298.0], [356.0, 458.0], [157.0, 199.0]], 18 | [[85.0, 106.0], [63.0, 79.0], [115.0, 143.0]], 19 | [[46.0, 56.0], [31.0, 37.0], [17.0, 19.0]], 20 | ] 21 | CLASSES: 2 22 | IN_FEATURES: ["p3", "p4", "p5"] # p3, p4, p5 from FPN 23 | CONF_THRESHOLD: 0.2 24 | NMS_THRESHOLD: 0.1 25 | IGNORE_THRESHOLD: 0.1 26 | VARIANT: "yolov7" 27 | LOSS_TYPE: "v4" 28 | DATASETS: 29 | TRAIN: ("mask_train",) 30 | TEST: ("mask_val",) 31 | 32 | INPUT: 33 | # open all augmentations 34 | JITTER_CROP: 35 | ENABLED: False 36 | RESIZE: 37 | ENABLED: True 38 | SHAPE: (540, 960) 39 | DISTORTION: 40 | ENABLED: False 41 | MOSAIC: 42 | ENABLED: False 43 | NUM_IMAGES: 4 44 | MOSAIC_WIDTH: 960 45 | MOSAIC_HEIGHT: 540 46 | GRID_MASK: 47 | ENABLED: True 48 | 49 | SOLVER: 50 | IMS_PER_BATCH: 2 51 | BASE_LR: 0.0005 52 | STEPS: (70000, 90000) 53 | MAX_ITER: 100000 54 | CHECKPOINT_PERIOD: 1000 55 | 56 | DATALOADER: 57 | # proposals are part of the dataset_dicts, and take a lot of RAM 58 | NUM_WORKERS: 2 59 | -------------------------------------------------------------------------------- /configs/wearmask/yolov5_s.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-YOLOv7.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | MASK_ON: False 5 | META_ARCHITECTURE: "YOLOV5" 6 | BACKBONE: 7 | NAME: "build_cspdarknetx_backbone" 8 | 9 | DARKNET: 10 | WEIGHTS: "" 11 | DEPTH_WISE: False 12 | OUT_FEATURES: ["dark3", "dark4", "dark5"] 13 | YOLO: 14 | ANCHORS: 15 | # [ 16 | # [[10, 13], [16, 30], [33, 23]], 17 | # [[30, 61], [62, 45], [59, 119]], 18 | # [[116, 90], [156, 198], [373, 326]], 19 | # ] 20 | [ 21 | [[46.0, 56.0], [31.0, 37.0], [17.0, 19.0]], 22 | [[85.0, 106.0], [63.0, 79.0], [115.0, 143.0]], 23 | [[231.0, 298.0], [356.0, 458.0], [157.0, 199.0]], 24 | ] 25 | CLASSES: 2 26 | IN_FEATURES: ["dark3", "dark4", "dark5"] 27 | CONF_THRESHOLD: 0.001 28 | NMS_THRESHOLD: 0.65 29 | IGNORE_THRESHOLD: 0.6 30 | WIDTH_MUL: 0.50 31 | DEPTH_MUL: 0.33 32 | VARIANT: "yolov7" 33 | LOSS_TYPE: "v7" 34 | LOSS: 35 | LAMBDA_IOU: 0.05 36 | LAMBDA_CONF: 1.0 37 | LAMBDA_CLS: 0.5 38 | ANCHOR_RATIO_THRESH: 4.0 39 | 40 | DATASETS: 41 | TRAIN: ("mask_train",) 42 | TEST: ("mask_val",) 43 | 44 | INPUT: 45 | MIN_SIZE_TRAIN: (416, 512, 608, 768) 46 | MAX_SIZE_TRAIN: 1056 47 | MIN_SIZE_TEST: 672 48 | MAX_SIZE_TEST: 768 49 | # open all augmentations 50 | JITTER_CROP: 51 | ENABLED: False 52 | RESIZE: 53 | ENABLED: False 54 | # SHAPE: (540, 960) 55 | DISTORTION: 56 | ENABLED: False 57 | # MOSAIC: 58 | # ENABLED: True 59 | # NUM_IMAGES: 4 60 | # DEBUG_VIS: True 61 | # # MOSAIC_WIDTH: 960 62 | # # MOSAIC_HEIGHT: 540 63 | MOSAIC_AND_MIXUP: 64 | ENABLED: True 65 | DEBUG_VIS: False 66 | ENABLE_MIXUP: False 67 | 68 | SOLVER: 69 | IMS_PER_BATCH: 12 70 | BASE_LR: 0.046 71 | STEPS: (60000, 80000) 72 | WARMUP_FACTOR: 0.00033333 73 | WARMUP_ITERS: 1500 74 | MAX_ITER: 150000 75 | LR_SCHEDULER_NAME: "WarmupCosineLR" 76 | 77 | TEST: 78 | EVAL_PERIOD: 5000 79 | # EVAL_PERIOD: 0 80 | OUTPUT_DIR: "output/wearmask_yolov5_s" 81 | VIS_PERIOD: 5000 82 | 83 | DATALOADER: 84 | # proposals are part of the dataset_dicts, and take a lot of RAM 85 | NUM_WORKERS: 1 86 | -------------------------------------------------------------------------------- /deploy/configs/r18.yaml: -------------------------------------------------------------------------------- 1 | extra_prepare_dict: 2 | extra_qconfig_dict: 3 | w_observer: MinMaxObserver 4 | a_observer: EMAMinMaxObserver 5 | w_fakequantize: FixedFakeQuantize 6 | a_fakequantize: FixedFakeQuantize 7 | w_qscheme: 8 | bit: 8 9 | # symmetry: False 10 | symmetry: true 11 | per_channel: True 12 | pot_scale: False 13 | a_qscheme: 14 | bit: 8 15 | # symmetry: False 16 | symmetry: true 17 | per_channel: False 18 | pot_scale: False 19 | quantize: 20 | quantize_type: naive_ptq # support naive_ptq or advanced_ptq 21 | cali_batchsize: 16 22 | backend: 'Tensorrt' 23 | # backend: 'ONNX_QNN' 24 | # backend: 'PPLW8A16' 25 | deploy: 26 | model_name: 'r18.onnx' 27 | output_path: './' 28 | deploy_to_qlinear: true 29 | model: # architecture details 30 | type: resnet18 # model name 31 | kwargs: 32 | num_classes: 1000 33 | path: /path-of-pretrained 34 | data: 35 | path: /path-of-imagenet 36 | batch_size: 64 37 | num_workers: 4 38 | pin_memory: True 39 | input_size: 224 40 | test_resize: 256 41 | process: 42 | seed: 1005 -------------------------------------------------------------------------------- /deploy/prune_nni.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/deploy/prune_nni.py -------------------------------------------------------------------------------- /deploy/quant_atom/.gitignore: -------------------------------------------------------------------------------- 1 | Output/ 2 | -------------------------------------------------------------------------------- /deploy/quant_fx/.gitignore: -------------------------------------------------------------------------------- 1 | *.pt 2 | *.torchscript 3 | data/ 4 | vendor/ 5 | -------------------------------------------------------------------------------- /deploy/quant_fx/configs/r18.yaml: -------------------------------------------------------------------------------- 1 | extra_prepare_dict: 2 | extra_qconfig_dict: 3 | w_observer: MinMaxObserver 4 | a_observer: EMAMinMaxObserver 5 | w_fakequantize: LearnableFakeQuantize 6 | a_fakequantize: LearnableFakeQuantize 7 | w_qscheme: 8 | bit: 8 9 | # symmetry: False 10 | symmetry: true 11 | per_channel: True 12 | pot_scale: False 13 | a_qscheme: 14 | bit: 8 15 | # symmetry: False 16 | symmetry: true 17 | per_channel: False 18 | pot_scale: False 19 | quantize: 20 | quantize_type: naive_ptq # support naive_ptq or advanced_ptq 21 | cali_batchsize: 16 22 | # backend: 'Tensorrt' 23 | backend: 'ONNX_QNN' 24 | # backend: 'PPLW8A16' 25 | deploy: 26 | model_name: 'r18.onnx' 27 | output_path: './' 28 | deploy_to_qlinear: true 29 | model: # architecture details 30 | type: resnet18 # model name 31 | kwargs: 32 | num_classes: 1000 33 | path: /path-of-pretrained 34 | data: 35 | path: /path-of-imagenet 36 | batch_size: 64 37 | num_workers: 4 38 | pin_memory: True 39 | input_size: 224 40 | test_resize: 256 41 | process: 42 | seed: 1005 -------------------------------------------------------------------------------- /deploy/quant_fx/qt_q_test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import quantization 3 | from torchvision import models 4 | 5 | qat_resnet18 = models.resnet18(pretrained=True).cuda() 6 | 7 | qat_resnet18.qconfig = quantization.QConfig( 8 | activation=quantization.default_fake_quant, 9 | weight=quantization.default_per_channel_weight_fake_quant, 10 | ) 11 | quantization.prepare_qat(qat_resnet18, inplace=True) 12 | qat_resnet18.apply(quantization.enable_observer) 13 | qat_resnet18.apply(quantization.enable_fake_quant) 14 | 15 | dummy_input = torch.randn(16, 3, 224, 224).cuda() 16 | _ = qat_resnet18(dummy_input) 17 | for module in qat_resnet18.modules(): 18 | if isinstance(module, quantization.FakeQuantize): 19 | module.calculate_qparams() 20 | qat_resnet18.apply(quantization.disable_observer) 21 | 22 | qat_resnet18.cuda() 23 | 24 | input_names = ["actual_input_1"] 25 | output_names = ["output1"] 26 | 27 | 28 | torch.onnx.export( 29 | qat_resnet18, dummy_input, "quant_model.onnx", verbose=True, opset_version=13 30 | ) 31 | -------------------------------------------------------------------------------- /deploy/quant_fx/r18.onnx_clip_ranges.json: -------------------------------------------------------------------------------- 1 | { 2 | "tensorrt": { 3 | "blob_range": { 4 | "input": 2.7645304203033447, 5 | "::FixedPerTensorAffine_419": 5.25675630569458, 6 | "::FixedPerTensorAffine_425": 5.23717737197876, 7 | "::FixedPerTensorAffine_438": 2.820558786392212, 8 | "::FixedPerTensorAffine_450": 3.860928535461426, 9 | "::FixedPerTensorAffine_457": 5.737135887145996, 10 | "::FixedPerTensorAffine_470": 2.6348819732666016, 11 | "::FixedPerTensorAffine_482": 6.245926380157471, 12 | "::FixedPerTensorAffine_489": 6.591638088226318, 13 | "::FixedPerTensorAffine_502": 2.662112236022949, 14 | "::FixedPerTensorAffine_514": 3.563220500946045, 15 | "::FixedPerTensorAffine_526": 3.338449239730835, 16 | "::FixedPerTensorAffine_533": 3.929392099380493, 17 | "::FixedPerTensorAffine_546": 2.284010887145996, 18 | "::FixedPerTensorAffine_558": 3.760089635848999, 19 | "::FixedPerTensorAffine_565": 5.2349677085876465, 20 | "::FixedPerTensorAffine_578": 2.779576539993286, 21 | "::FixedPerTensorAffine_590": 3.2088496685028076, 22 | "::FixedPerTensorAffine_602": 1.4625221490859985, 23 | "::FixedPerTensorAffine_609": 3.332473039627075, 24 | "::FixedPerTensorAffine_622": 2.045714855194092, 25 | "::FixedPerTensorAffine_634": 4.317813396453857, 26 | "::FixedPerTensorAffine_641": 5.273874759674072, 27 | "::FixedPerTensorAffine_654": 3.230210542678833, 28 | "::FixedPerTensorAffine_666": 13.654618263244629, 29 | "::FixedPerTensorAffine_678": 2.2387030124664307, 30 | "::FixedPerTensorAffine_685": 12.90011215209961, 31 | "::FixedPerTensorAffine_698": 9.551894187927246, 32 | "::FixedPerTensorAffine_710": 47.117374420166016, 33 | "::FixedPerTensorAffine_717": 41.599098205566406, 34 | "::FixedPerTensorAffine_724": 40.52422332763672, 35 | "onnx::Flatten_723": 40.52422332763672 36 | } 37 | } 38 | } -------------------------------------------------------------------------------- /deploy/quant_fx/test.py: -------------------------------------------------------------------------------- 1 | from alfred.utils.log import logger 2 | 3 | logger.info('this ia info') 4 | logger.warning('this ia info') 5 | logger.error('this ia info') -------------------------------------------------------------------------------- /deploy/quant_onnx/.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | vendor/ 3 | -------------------------------------------------------------------------------- /deploy/quant_onnx/prepare_onnx.py: -------------------------------------------------------------------------------- 1 | from torchvision.models.resnet import resnet18 2 | from torch import nn 3 | import os 4 | import torch 5 | 6 | 7 | model = resnet18(pretrained=True) 8 | model.fc = nn.Linear(512, 10) 9 | if os.path.exists("r18_raw.pth"): 10 | model.load_state_dict(torch.load("r18_raw.pth", map_location="cpu")) 11 | else: 12 | pass 13 | 14 | model.eval() 15 | 16 | a = torch.randn([1, 3, 224, 224]) 17 | torch.onnx.export( 18 | model, 19 | a, 20 | "r18.onnx", 21 | input_names=["data"], 22 | dynamic_axes={"data": {0: "batch", 2: "h", 3: "w"}}, 23 | opset_version=13 24 | ) 25 | -------------------------------------------------------------------------------- /deploy/quant_onnx/qt_atom_pose.py: -------------------------------------------------------------------------------- 1 | """ 2 | Using atomquant to quant SparseInst model 3 | """ 4 | from atomquant.onnx.ptq_cpu import quantize_static_onnx 5 | from atomquant.onnx.dataloader import get_calib_dataloader_coco 6 | from torchvision import transforms 7 | import cv2 8 | import numpy as np 9 | import sys 10 | import os 11 | import onnxruntime as ort 12 | 13 | 14 | def preprocess_func(img, target): 15 | w = 192 16 | h = 256 17 | a = cv2.resize(img, (w, h)) 18 | a_t = np.array(a).astype(np.float32) 19 | boxes = [] 20 | for t in target: 21 | boxes.append(t["bbox"]) 22 | target = np.array(boxes) 23 | return a_t, target 24 | 25 | 26 | def pqt(onnx_f): 27 | coco_root = os.path.expanduser("~/data/coco/images/val2017") 28 | anno_f = os.path.expanduser("~/data/coco/annotations/instances_val2017_val_val_train.json") 29 | 30 | session = ort.InferenceSession(onnx_f) 31 | input_name = session.get_inputs()[0].name 32 | 33 | calib_dataloader = get_calib_dataloader_coco( 34 | coco_root, anno_f, preprocess_func=preprocess_func, input_names=input_name, bs=1, max_step=50 35 | ) 36 | quantize_static_onnx(onnx_f, calib_dataloader=calib_dataloader) 37 | 38 | 39 | if __name__ == "__main__": 40 | onnx_f = sys.argv[1] 41 | pqt(onnx_f) 42 | -------------------------------------------------------------------------------- /deploy/quant_onnx/qt_atom_r18.py: -------------------------------------------------------------------------------- 1 | """ 2 | Using atomquant to quant SparseInst model 3 | """ 4 | from atomquant.onnx.ptq_cpu import quantize_static_onnx 5 | from atomquant.onnx.dataloader import ( 6 | get_calib_dataloader_from_dataset, 7 | ) 8 | from torchvision import transforms 9 | import cv2 10 | import numpy as np 11 | import sys 12 | import os 13 | import onnxruntime as ort 14 | import torchvision 15 | import time 16 | 17 | 18 | def evaluate_onnx_model(model_p, test_loader, criterion=None): 19 | running_loss = 0 20 | running_corrects = 0 21 | 22 | session = ort.InferenceSession(model_p) 23 | input_name = session.get_inputs()[0].name 24 | 25 | total = 0.0 26 | for inputs, labels in test_loader: 27 | inputs = inputs.cpu().numpy() 28 | labels = labels.cpu().numpy() 29 | 30 | start = time.perf_counter() 31 | outputs = session.run([], {input_name: inputs}) 32 | end = (time.perf_counter() - start) * 1000 33 | total += end 34 | 35 | outputs = outputs[0] 36 | preds = np.argmax(outputs, 1) 37 | if criterion is not None: 38 | loss = criterion(outputs, labels).item() 39 | else: 40 | loss = 0 41 | # statistics 42 | running_corrects += np.sum(preds == labels) 43 | 44 | # eval_loss = running_loss / len(test_loader.dataset) 45 | eval_accuracy = running_corrects / len(test_loader.dataset) 46 | total /= len(test_loader) 47 | print(f"eval loss: {0}, eval acc: {eval_accuracy}, cost: {total}") 48 | return 0, eval_accuracy 49 | 50 | 51 | if __name__ == "__main__": 52 | model_p = sys.argv[1] 53 | model_qp = os.path.join( 54 | os.path.dirname(model_p), 55 | os.path.basename(model_p).replace(".onnx", "_int8.onnx"), 56 | ) 57 | 58 | train_transform = transforms.Compose( 59 | [ 60 | transforms.RandomCrop(32, padding=4), 61 | transforms.RandomHorizontalFlip(), 62 | transforms.ToTensor(), 63 | transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), 64 | ] 65 | ) 66 | test_transform = transforms.Compose( 67 | [ 68 | # transforms.RandomCrop(224, padding=4), 69 | transforms.ToTensor(), 70 | transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), 71 | ] 72 | ) 73 | train_set = torchvision.datasets.CIFAR10( 74 | root="data", train=True, download=True, transform=train_transform 75 | ) 76 | test_set = torchvision.datasets.CIFAR10( 77 | root="data", train=False, download=True, transform=test_transform 78 | ) 79 | 80 | session = ort.InferenceSession(model_p) 81 | input_name = session.get_inputs()[0].name 82 | 83 | calib_dataloader = get_calib_dataloader_from_dataset( 84 | test_set, input_names=input_name, bs=1, max_step=100 85 | ) 86 | quantize_static_onnx(model_p, calib_dataloader=calib_dataloader) 87 | 88 | evaluate_onnx_model(model_qp, calib_dataloader.dataloader_holder) 89 | evaluate_onnx_model(model_p, calib_dataloader.dataloader_holder) 90 | -------------------------------------------------------------------------------- /deploy/quant_onnx/qt_atom_sparseinst.py: -------------------------------------------------------------------------------- 1 | """ 2 | Using atomquant to quant SparseInst model 3 | """ 4 | from atomquant.onnx.ptq_cpu import quantize_static_onnx 5 | from atomquant.onnx.dataloader import get_calib_dataloader_coco 6 | from torchvision import transforms 7 | import cv2 8 | import numpy as np 9 | import sys 10 | import os 11 | import onnxruntime as ort 12 | 13 | 14 | def preprocess_func(img, target): 15 | w = 640 16 | h = 640 17 | a = cv2.resize(img, (w, h)) 18 | a_t = np.array(a).astype(np.float32) 19 | boxes = [] 20 | for t in target: 21 | boxes.append(t["bbox"]) 22 | target = np.array(boxes) 23 | return a_t, target 24 | 25 | 26 | def pqt(onnx_f): 27 | coco_root = os.path.expanduser("~/data/coco/images/val2017") 28 | anno_f = os.path.expanduser("~/data/coco/annotations/instances_val2017_val_val_train.json") 29 | 30 | session = ort.InferenceSession(onnx_f) 31 | input_name = session.get_inputs()[0].name 32 | 33 | calib_dataloader = get_calib_dataloader_coco( 34 | coco_root, anno_f, preprocess_func=preprocess_func, input_names=input_name, bs=1, max_step=50 35 | ) 36 | quantize_static_onnx(onnx_f, calib_dataloader=calib_dataloader) 37 | 38 | 39 | if __name__ == "__main__": 40 | onnx_f = sys.argv[1] 41 | pqt(onnx_f) 42 | -------------------------------------------------------------------------------- /deploy/quant_onnx/readme.md: -------------------------------------------------------------------------------- 1 | # Quant ONNX 2 | 3 | we using tools from ONNXRuntime to directly quantize onnx models and save int8 onnx model. 4 | 5 | 6 | ## Log 7 | 8 | - `2022.04.17`: quantize sparseinst and keypoints failed. Seems need all opset=13 to do quant, opset>12 will caused strange result when quantize in onnxruntime; 9 | -------------------------------------------------------------------------------- /deploy/quant_onnx/test_images/daisy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/deploy/quant_onnx/test_images/daisy.jpg -------------------------------------------------------------------------------- /deploy/quant_onnx/test_images/rose.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/deploy/quant_onnx/test_images/rose.jpg -------------------------------------------------------------------------------- /deploy/quant_onnx/test_images/tulip.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/deploy/quant_onnx/test_images/tulip.jpg -------------------------------------------------------------------------------- /deploy/quant_tvm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/deploy/quant_tvm.py -------------------------------------------------------------------------------- /deploy/trt_cc/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | -------------------------------------------------------------------------------- /deploy/trt_cc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | 3 | project(yolox) 4 | 5 | add_definitions(-std=c++11) 6 | 7 | option(CUDA_USE_STATIC_CUDA_RUNTIME OFF) 8 | set(CMAKE_CXX_STANDARD 11) 9 | set(CMAKE_BUILD_TYPE Debug) 10 | 11 | find_package(CUDA REQUIRED) 12 | 13 | include_directories(${PROJECT_SOURCE_DIR}/include) 14 | # include and link dirs of cuda and tensorrt, you need adapt them if yours are different 15 | # # cuda 16 | # include_directories(/data/cuda/cuda-10.2/cuda/include) 17 | # link_directories(/data/cuda/cuda-10.2/cuda/lib64) 18 | # # cudnn 19 | # include_directories(/data/cuda/cuda-10.2/cudnn/v8.0.4/include) 20 | # link_directories(/data/cuda/cuda-10.2/cudnn/v8.0.4/lib64) 21 | # tensorrt 22 | include_directories($ENV{HOME}/TensorRT/include) 23 | link_directories($ENV{HOME}/TensorRT/lib) 24 | 25 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED") 26 | 27 | find_package(OpenCV) 28 | include_directories(${OpenCV_INCLUDE_DIRS}) 29 | 30 | add_executable(demo_yolox ${PROJECT_SOURCE_DIR}/demo_yolox.cc) 31 | target_link_libraries(demo_yolox nvinfer) 32 | target_link_libraries(demo_yolox cudart) 33 | target_link_libraries(demo_yolox ${OpenCV_LIBS}) 34 | 35 | add_definitions(-O2 -pthread) 36 | 37 | -------------------------------------------------------------------------------- /deploy/trt_cc/readme.md: -------------------------------------------------------------------------------- 1 | run 2 | 3 | ``` 4 | mkdir build 5 | cd build 6 | cmake .. 7 | make -j8 8 | ./demo_yolox ../../../weights/coco_yolox_s.trt -i ../../../images/COCO_val2014_000000001869.jpg 9 | ``` -------------------------------------------------------------------------------- /docs/install.md: -------------------------------------------------------------------------------- 1 | # YOLOv7 Install 2 | 3 | > Please install detectron2 first, this is the basic dependency. For detectron2 just clone official repo and install follow their instructions. 4 | 5 | yolov7 is not a lib, it's a project ready for use. But to install dependencies, there still need some process. 6 | 7 | First, please consider install 2 important lib that you might not familliar with: 8 | 9 | ``` 10 | alfred-py 11 | nbnb 12 | ``` 13 | 14 | Both of them can be installed from pip. The first one provides enhanced and full-featured visualization utils for drawing boxes, masks etc. And it provides some very convenient tools for users to visialization your coco dataset (VOC, YOLO format also supported). After install, you can call `alfred` to get more details. 15 | 16 | `nbnb` is a lib that provides some useful common network blocks. 17 | 18 | Also, if you need fbnetv3, you need install mobilecv from FaceBook: 19 | 20 | ``` 21 | pip install git+https://github.com/facebookresearch/mobile-vision.git 22 | ``` 23 | 24 | After install, you can now ready to train with YOLOv7. 25 | 26 | ``` 27 | python train_net.py --config-file configs/coco/darknet53.yaml --num-gpus 8 28 | ``` 29 | 30 | train YOLOX: 31 | 32 | ``` 33 | python train_net.py --config-file configs/coco/yolox_s.yaml --num-gpus 8 34 | ``` 35 | 36 | ## Train on Custom dataset 37 | 38 | If you want train on custom dataset, you **just need convert your dataset to coco format**. And that's all, that's all you need do. 39 | 40 | Then you just need create a new folder of your dataset under `configs`, and set your data path in config, take VisDrone dataset as example: 41 | 42 | ``` 43 | DATASETS: 44 | TRAIN: ("visdrone_train",) 45 | TEST: ("visdrone_val",) 46 | ``` 47 | 48 | Then register your dataset in `train_visdrone.py`: 49 | 50 | ``` 51 | DATASET_ROOT = './datasets/visdrone' 52 | ANN_ROOT = os.path.join(DATASET_ROOT, 'visdrone_coco_anno') 53 | TRAIN_PATH = os.path.join(DATASET_ROOT, 'VisDrone2019-DET-train/images') 54 | VAL_PATH = os.path.join(DATASET_ROOT, 'VisDrone2019-DET-val/images') 55 | TRAIN_JSON = os.path.join(ANN_ROOT, 'VisDrone2019-DET_train_coco.json') 56 | VAL_JSON = os.path.join(ANN_ROOT, 'VisDrone2019-DET_val_coco.json') 57 | 58 | register_coco_instances("visdrone_train", {}, TRAIN_JSON, TRAIN_PATH) 59 | register_coco_instances("visdrone_val", {}, VAL_JSON, VAL_PATH) 60 | ``` 61 | 62 | Here, you set your json path, your images path, then you are ready to go. 63 | 64 | -------------------------------------------------------------------------------- /docs/usage.md: -------------------------------------------------------------------------------- 1 | ## Training 2 | 3 | You can refer to `install.md` for preparing your own dataset. Basically, just convert your dataset into coco format, and it's ready to go. 4 | 5 | We have 3 **key** train scripts, they are: 6 | 7 | - `train_coco.py`: this is basically most common used train script for coco; 8 | - `train_detr.py`: use this for **any** DETR or transformer based model; 9 | - `train_net.py`: Experimented changing training strategy script, **used for experiement**; 10 | - `train_custom_datasets.py`: train all customized datasets; 11 | 12 | For demo usage, you can using: 13 | 14 | - `demo.py`: for demo visualize result; 15 | - `demo_lazyconfig.py`: for demo using `*.py` as config file; 16 | 17 | 18 | ## Inference 19 | 20 | You can direcly call `demo.py` to inference, visualize. A classic command would be: 21 | 22 | ``` 23 | python demo.py --config-file configs/coco/sparseinst/sparse_inst_r50vd_giam_aug.yaml --video-input ~/Movies/Videos/86277963_nb2-1-80.flv -c 0.4 --opts MODEL.WEIGHTS weights/sparse_inst_r50vd_giam_aug_8bc5b3.pth 24 | ``` 25 | 26 | ## Deploy 27 | 28 | YOLOv7 can be easily deploy via ONNX, you can using `export_onnx.py` and according config file to convert. 29 | 30 | You u got any problems on any model arch, please fire an issue. 31 | -------------------------------------------------------------------------------- /images/COCO_val2014_000000001722.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/images/COCO_val2014_000000001722.jpg -------------------------------------------------------------------------------- /images/COCO_val2014_000000001856.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/images/COCO_val2014_000000001856.jpg -------------------------------------------------------------------------------- /images/COCO_val2014_000000001869.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/images/COCO_val2014_000000001869.jpg -------------------------------------------------------------------------------- /images/COCO_val2014_000000001960.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/images/COCO_val2014_000000001960.jpg -------------------------------------------------------------------------------- /images/COCO_val2014_000000002149.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/images/COCO_val2014_000000002149.jpg -------------------------------------------------------------------------------- /images/COCO_val2014_000000002153.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/images/COCO_val2014_000000002153.jpg -------------------------------------------------------------------------------- /images/COCO_val2014_000000002171.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/images/COCO_val2014_000000002171.jpg -------------------------------------------------------------------------------- /images/COCO_val2014_000000002315.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/images/COCO_val2014_000000002315.jpg -------------------------------------------------------------------------------- /images/COCO_val2014_000000002532.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/images/COCO_val2014_000000002532.jpg -------------------------------------------------------------------------------- /images/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/images/dog.jpg -------------------------------------------------------------------------------- /images/mask/u=1506317376,3450613040&fm=26&fmt=auto&gp=0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/images/mask/u=1506317376,3450613040&fm=26&fmt=auto&gp=0.jpg -------------------------------------------------------------------------------- /images/mask/u=3352497688,3286290828&fm=26&fmt=auto&gp=0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/images/mask/u=3352497688,3286290828&fm=26&fmt=auto&gp=0.jpg -------------------------------------------------------------------------------- /images/mask/u=3557104275,359021270&fm=26&fmt=auto&gp=0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/images/mask/u=3557104275,359021270&fm=26&fmt=auto&gp=0.jpg -------------------------------------------------------------------------------- /images/mask/u=4153583989,584404369&fm=26&fmt=auto&gp=0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/images/mask/u=4153583989,584404369&fm=26&fmt=auto&gp=0.jpg -------------------------------------------------------------------------------- /images/mask/u=724341885,3385420344&fm=26&fmt=auto&gp=0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/images/mask/u=724341885,3385420344&fm=26&fmt=auto&gp=0.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | alfred-py 2 | timm 3 | nbnb 4 | mmpycocotools 5 | omegaconf 6 | 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import time 4 | from setuptools import find_packages, setup 5 | import io 6 | from os import path 7 | 8 | 9 | this_directory = path.abspath(path.dirname(__file__)) 10 | with io.open(path.join(this_directory, "readme.md"), encoding="utf-8") as f: 11 | long_description = f.read() 12 | 13 | 14 | version_file = "yolov7/version.py" 15 | 16 | 17 | def get_version(): 18 | with open(version_file, "r") as f: 19 | exec(compile(f.read(), version_file, "exec")) 20 | return locals()["__version__"] 21 | 22 | 23 | if __name__ == "__main__": 24 | setup( 25 | name="yolov7_d2", 26 | version=get_version(), 27 | description="YOLOv7D2 is a high-level training framework based on detectron2", 28 | long_description="", 29 | author="LucasJin", 30 | author_email="jinfagang19@163.com", 31 | keywords="computer vision, object detection", 32 | url="https://github.com/jinfagang/yolov7_d2", 33 | packages=find_packages(exclude=("configs", "tools", "demo", "images")), 34 | classifiers=[ 35 | "Development Status :: 4 - Beta", 36 | "License :: OSI Approved :: Apache Software License", 37 | "Operating System :: OS Independent", 38 | "Programming Language :: Python :: 3", 39 | "Programming Language :: Python :: 3.5", 40 | "Programming Language :: Python :: 3.6", 41 | "Programming Language :: Python :: 3.7", 42 | "Programming Language :: Python :: 3.8", 43 | "Programming Language :: Python :: 3.9", 44 | ], 45 | license="Apache License 2.0", 46 | zip_safe=False, 47 | ) 48 | -------------------------------------------------------------------------------- /tests.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def batched_index_select(input, dim, index): 4 | views = [1 if i != dim else -1 for i in range(len(input.shape))] 5 | expanse = list(input.shape) 6 | expanse[dim] = -1 7 | index = index.view(views).expand(expanse) 8 | # making the first dim of output be B 9 | return torch.cat(torch.chunk(torch.gather(input, dim, index), chunks=index.shape[0], dim=dim), dim=0) 10 | 11 | 12 | mask = torch.randn([3, 100, 224, 224]) 13 | score = torch.randn(3 ,100) 14 | _, keep = torch.topk(score, 20) 15 | 16 | mask2 = mask.view(-1, 224, 224) 17 | keep2 = keep.view(-1, 20) 18 | 19 | a = mask2[keep2] 20 | 21 | score = score.view(-1) 22 | b = score[keep2] 23 | print(a.shape) 24 | print(b.shape) 25 | # print(keep) 26 | # print(mask) 27 | -------------------------------------------------------------------------------- /train_det.py: -------------------------------------------------------------------------------- 1 | """ 2 | train detection entrance 3 | 4 | Copyright @2022 YOLOv7 authors 5 | 6 | """ 7 | import os 8 | from detectron2.checkpoint import DetectionCheckpointer 9 | from detectron2.config import get_cfg 10 | from detectron2.engine import DefaultTrainer, default_argument_parser, launch 11 | from detectron2.evaluation import COCOEvaluator 12 | from detectron2.data import MetadataCatalog, build_detection_train_loader 13 | from detectron2.modeling import build_model 14 | from detectron2.utils import comm 15 | from yolov7.data.dataset_mapper import MyDatasetMapper, MyDatasetMapper2 16 | from yolov7.config import add_yolo_config 17 | from yolov7.utils.d2overrides import default_setup 18 | from yolov7.utils.wandb.wandb_logger import is_wandb_available 19 | 20 | 21 | class Trainer(DefaultTrainer): 22 | 23 | custom_mapper = None 24 | 25 | @classmethod 26 | def build_evaluator(cls, cfg, dataset_name, output_folder=None): 27 | if output_folder is None: 28 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") 29 | return COCOEvaluator(dataset_name, output_dir=output_folder) 30 | 31 | @classmethod 32 | def build_train_loader(cls, cfg): 33 | cls.custom_mapper = MyDatasetMapper2(cfg, True) 34 | return build_detection_train_loader(cfg, mapper=cls.custom_mapper) 35 | 36 | @classmethod 37 | def build_model(cls, cfg): 38 | model = build_model(cfg) 39 | return model 40 | 41 | def build_writers(self): 42 | if self.cfg.WANDB.ENABLED is is_wandb_available(): 43 | from yolov7.utils.wandb.wandb_logger import WandbWriter 44 | 45 | writers = super().build_writers() + [ 46 | WandbWriter(self.cfg.WANDB.PROJECT_NAME) 47 | ] 48 | else: 49 | writers = super().build_writers() 50 | return writers 51 | 52 | 53 | def setup(args): 54 | cfg = get_cfg() 55 | add_yolo_config(cfg) 56 | cfg.merge_from_file(args.config_file) 57 | cfg.merge_from_list(args.opts) 58 | cfg.freeze() 59 | default_setup(cfg, args) 60 | return cfg 61 | 62 | 63 | def main(args): 64 | cfg = setup(args) 65 | if args.eval_only: 66 | model = Trainer.build_model(cfg) 67 | DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( 68 | cfg.MODEL.WEIGHTS, resume=args.resume 69 | ) 70 | res = Trainer.test(cfg, model) 71 | return res 72 | 73 | trainer = Trainer(cfg) 74 | trainer.resume_or_load(resume=args.resume) 75 | return trainer.train() 76 | 77 | 78 | if __name__ == "__main__": 79 | args = default_argument_parser().parse_args() 80 | launch( 81 | main, 82 | args.num_gpus, 83 | num_machines=args.num_machines, 84 | machine_rank=args.machine_rank, 85 | dist_url=args.dist_url, 86 | args=(args,), 87 | ) 88 | -------------------------------------------------------------------------------- /train_inseg.py: -------------------------------------------------------------------------------- 1 | import os 2 | from detectron2.checkpoint import DetectionCheckpointer 3 | from detectron2.config import get_cfg 4 | from detectron2.engine import ( 5 | DefaultTrainer, 6 | default_argument_parser, 7 | default_setup, 8 | launch 9 | ) 10 | from detectron2.data import build_detection_train_loader 11 | from detectron2.modeling import build_model 12 | 13 | from yolov7.config import add_yolo_config 14 | from yolov7.data.dataset_mapper import MyDatasetMapper, MyDatasetMapper2 15 | from yolov7.evaluation.coco_evaluation import COCOMaskEvaluator 16 | 17 | """ 18 | Script used for training instance segmentation, i.e. SparseInst. 19 | """ 20 | 21 | class Trainer(DefaultTrainer): 22 | 23 | custom_mapper = None 24 | 25 | @classmethod 26 | def build_evaluator(cls, cfg, dataset_name, output_folder=None): 27 | if output_folder is None: 28 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") 29 | return COCOMaskEvaluator(dataset_name, output_dir=output_folder) 30 | 31 | @classmethod 32 | def build_train_loader(cls, cfg): 33 | cls.custom_mapper = MyDatasetMapper(cfg, True) 34 | return build_detection_train_loader(cfg, mapper=cls.custom_mapper) 35 | 36 | @classmethod 37 | def build_model(cls, cfg): 38 | model = build_model(cfg) 39 | return model 40 | 41 | 42 | def setup(args): 43 | cfg = get_cfg() 44 | add_yolo_config(cfg) 45 | cfg.merge_from_file(args.config_file) 46 | cfg.merge_from_list(args.opts) 47 | cfg.freeze() 48 | default_setup(cfg, args) 49 | return cfg 50 | 51 | 52 | def main(args): 53 | cfg = setup(args) 54 | 55 | if args.eval_only: 56 | model = Trainer.build_model(cfg) 57 | DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( 58 | cfg.MODEL.WEIGHTS, resume=args.resume 59 | ) 60 | res = Trainer.test(cfg, model) 61 | return res 62 | 63 | trainer = Trainer(cfg) 64 | trainer.resume_or_load(resume=args.resume) 65 | return trainer.train() 66 | 67 | 68 | if __name__ == "__main__": 69 | args = default_argument_parser().parse_args() 70 | launch( 71 | main, 72 | args.num_gpus, 73 | num_machines=args.num_machines, 74 | machine_rank=args.machine_rank, 75 | dist_url=args.dist_url, 76 | args=(args,), 77 | ) 78 | -------------------------------------------------------------------------------- /upload_pypi.sh: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (c) 2020 JinTian. 3 | ## 4 | ## This file is part of alfred 5 | ## (see http://jinfagang.github.io). 6 | ## 7 | ## Licensed to the Apache Software Foundation (ASF) under one 8 | ## or more contributor license agreements. See the NOTICE file 9 | ## distributed with this work for additional information 10 | ## regarding copyright ownership. The ASF licenses this file 11 | ## to you under the Apache License, Version 2.0 (the 12 | ## "License"); you may not use this file except in compliance 13 | ## with the License. You may obtain a copy of the License at 14 | ## 15 | ## http://www.apache.org/licenses/LICENSE-2.0 16 | ## 17 | ## Unless required by applicable law or agreed to in writing, 18 | ## software distributed under the License is distributed on an 19 | ## "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 20 | ## KIND, either express or implied. See the License for the 21 | ## specific language governing permissions and limitations 22 | ## under the License. 23 | ## 24 | # check setup is correct or not 25 | python3 setup.py check 26 | 27 | sudo rm -r build/ 28 | sudo rm -r dist/ 29 | 30 | # pypi interface are not valid any longer 31 | # python3 setup.py sdist 32 | # python3 setup.py sdist upload -r pypi 33 | 34 | # using twine instead 35 | python3 setup.py sdist 36 | twine upload dist/* 37 | 38 | -------------------------------------------------------------------------------- /weights/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | !get_models.sh 4 | -------------------------------------------------------------------------------- /weights/get_models.sh: -------------------------------------------------------------------------------- 1 | gdown https://drive.google.com/file/d/1MK8rO3qtA7vN9KVSBdp0VvZHCNq8-bvz/view\?usp\=sharing --fuzzy 2 | -------------------------------------------------------------------------------- /yolov7/__init__.py: -------------------------------------------------------------------------------- 1 | from .modeling import YOLO -------------------------------------------------------------------------------- /yolov7/configs/config_convnext.py: -------------------------------------------------------------------------------- 1 | 2 | from detectron2.config import CfgNode as CN 3 | 4 | def add_convnext_default_configs(_C): 5 | _C.MODEL.CONVNEXT = CN() 6 | 7 | _C.MODEL.CONVNEXT.OUT_FEATURES = ["dark3", "dark4", "dark5"] 8 | _C.MODEL.CONVNEXT.WEIGHTS = "" 9 | _C.MODEL.CONVNEXT.DEPTH_WISE = False 10 | -------------------------------------------------------------------------------- /yolov7/configs/config_sparseinst.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Tianheng Cheng and its affiliates. All Rights Reserved 2 | 3 | from detectron2.config import CfgNode as CN 4 | 5 | 6 | def add_sparse_inst_config(cfg): 7 | 8 | cfg.MODEL.DEVICE = "cuda" 9 | cfg.MODEL.MASK_ON = True 10 | # [SparseInst] 11 | cfg.MODEL.SPARSE_INST = CN() 12 | 13 | # parameters for inference 14 | cfg.MODEL.SPARSE_INST.CLS_THRESHOLD = 0.005 15 | cfg.MODEL.SPARSE_INST.MASK_THRESHOLD = 0.45 16 | cfg.MODEL.SPARSE_INST.MAX_DETECTIONS = 100 17 | 18 | # [Encoder] 19 | cfg.MODEL.SPARSE_INST.ENCODER = CN() 20 | cfg.MODEL.SPARSE_INST.ENCODER.NAME = "FPNPPMEncoder" 21 | cfg.MODEL.SPARSE_INST.ENCODER.NORM = "" 22 | cfg.MODEL.SPARSE_INST.ENCODER.IN_FEATURES = ["res3", "res4", "res5"] 23 | cfg.MODEL.SPARSE_INST.ENCODER.NUM_CHANNELS = 256 24 | 25 | # [Decoder] 26 | cfg.MODEL.SPARSE_INST.DECODER = CN() 27 | cfg.MODEL.SPARSE_INST.DECODER.NAME = "BaseIAMDecoder" 28 | cfg.MODEL.SPARSE_INST.DECODER.NUM_MASKS = 100 29 | cfg.MODEL.SPARSE_INST.DECODER.NUM_CLASSES = 80 30 | # kernels for mask features 31 | cfg.MODEL.SPARSE_INST.DECODER.KERNEL_DIM = 128 32 | # upsample factor for output masks 33 | cfg.MODEL.SPARSE_INST.DECODER.SCALE_FACTOR = 2.0 34 | cfg.MODEL.SPARSE_INST.DECODER.OUTPUT_IAM = False 35 | cfg.MODEL.SPARSE_INST.DECODER.GROUPS = 4 36 | # decoder.inst_branch 37 | cfg.MODEL.SPARSE_INST.DECODER.INST = CN() 38 | cfg.MODEL.SPARSE_INST.DECODER.INST.DIM = 256 39 | cfg.MODEL.SPARSE_INST.DECODER.INST.CONVS = 4 40 | # decoder.mask_branch 41 | cfg.MODEL.SPARSE_INST.DECODER.MASK = CN() 42 | cfg.MODEL.SPARSE_INST.DECODER.MASK.DIM = 256 43 | cfg.MODEL.SPARSE_INST.DECODER.MASK.CONVS = 4 44 | 45 | # [Loss] 46 | cfg.MODEL.SPARSE_INST.LOSS = CN() 47 | cfg.MODEL.SPARSE_INST.LOSS.NAME = "SparseInstCriterion" 48 | cfg.MODEL.SPARSE_INST.LOSS.ITEMS = ("labels", "masks") 49 | # loss weight 50 | cfg.MODEL.SPARSE_INST.LOSS.CLASS_WEIGHT = 2.0 51 | cfg.MODEL.SPARSE_INST.LOSS.MASK_PIXEL_WEIGHT = 5.0 52 | cfg.MODEL.SPARSE_INST.LOSS.MASK_DICE_WEIGHT = 2.0 53 | # iou-aware objectness loss weight 54 | cfg.MODEL.SPARSE_INST.LOSS.OBJECTNESS_WEIGHT = 1.0 55 | 56 | # [Matcher] 57 | cfg.MODEL.SPARSE_INST.MATCHER = CN() 58 | cfg.MODEL.SPARSE_INST.MATCHER.NAME = "SparseInstMatcher" 59 | cfg.MODEL.SPARSE_INST.MATCHER.ALPHA = 0.8 60 | cfg.MODEL.SPARSE_INST.MATCHER.BETA = 0.2 61 | 62 | # [Optimizer] 63 | cfg.SOLVER.OPTIMIZER = "ADAMW" 64 | cfg.SOLVER.BACKBONE_MULTIPLIER = 1.0 65 | cfg.SOLVER.AMSGRAD = False 66 | 67 | # [Dataset mapper] 68 | cfg.MODEL.SPARSE_INST.DATASET_MAPPER = "SparseInstDatasetMapper" 69 | -------------------------------------------------------------------------------- /yolov7/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .augmentation_impl import * 3 | 4 | -------------------------------------------------------------------------------- /yolov7/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbone import build_darknet_backbone 2 | from .backbone import build_swin_transformer_backbone 3 | from .backbone import build_efficientnet_backbone, build_efficientnet_fpn_backbone 4 | from .meta_arch import YOLO, YOLOV7 5 | -------------------------------------------------------------------------------- /yolov7/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .darknet import build_darknet_backbone 2 | from .swin_transformer import build_swin_transformer_backbone 3 | from .efficientnet import build_efficientnet_backbone, build_efficientnet_fpn_backbone 4 | from .cspdarknet import build_cspdarknet_backbone 5 | from .pvt_v2 import build_pvt_v2_backbone 6 | 7 | from .res2nets.wrapper import build_res2net_backbone 8 | 9 | from .darknetx import build_cspdarknetx_backbone 10 | from .regnet import build_regnet_backbone 11 | from .fbnet_v3 import * 12 | from .fbnet_v2 import FBNetV2C4Backbone, build_fbnet 13 | from .resnetvd import build_resnet_vd_backbone 14 | 15 | from .convnext import build_convnext_backbone 16 | from .efficientrep import build_efficientrep_backbone -------------------------------------------------------------------------------- /yolov7/modeling/backbone/cfg.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import CfgNode as CN 2 | 3 | 4 | def add_fbnet_v2_default_configs(_C): 5 | _C.MODEL.FBNET_V2 = CN() 6 | 7 | _C.MODEL.FBNET_V2.ARCH = "default" 8 | _C.MODEL.FBNET_V2.ARCH_DEF = [] 9 | # number of channels input to trunk 10 | _C.MODEL.FBNET_V2.STEM_IN_CHANNELS = 3 11 | _C.MODEL.FBNET_V2.SCALE_FACTOR = 1.0 12 | # the output channels will be divisible by WIDTH_DIVISOR 13 | _C.MODEL.FBNET_V2.WIDTH_DIVISOR = 1 14 | 15 | # normalization configs 16 | # name of norm such as "bn", "sync_bn", "gn" 17 | _C.MODEL.FBNET_V2.NORM = "bn" 18 | # for advanced use case that requries extra arguments, passing a list of 19 | # dict such as [{"num_groups": 8}, {"momentum": 0.1}] (merged in given order). 20 | # Note that string written it in .yaml will be evaluated by yacs, thus this 21 | # node will become normal python object. 22 | # https://github.com/rbgirshick/yacs/blob/master/yacs/config.py#L410 23 | _C.MODEL.FBNET_V2.NORM_ARGS = [] 24 | 25 | _C.MODEL.VT_FPN = CN() 26 | 27 | _C.MODEL.VT_FPN.IN_FEATURES = ["res2", "res3", "res4", "res5"] 28 | _C.MODEL.VT_FPN.OUT_CHANNELS = 256 29 | _C.MODEL.VT_FPN.LAYERS = 3 30 | _C.MODEL.VT_FPN.TOKEN_LS = [16, 16, 8, 8] 31 | _C.MODEL.VT_FPN.TOKEN_C = 1024 32 | _C.MODEL.VT_FPN.HEADS = 16 33 | _C.MODEL.VT_FPN.MIN_GROUP_PLANES = 64 34 | _C.MODEL.VT_FPN.NORM = "BN" 35 | _C.MODEL.VT_FPN.POS_HWS = [] 36 | _C.MODEL.VT_FPN.POS_N_DOWNSAMPLE = [] 37 | 38 | 39 | 40 | def add_convnext_default_configs(_C): 41 | _C.MODEL.CONVNEXT = CN() 42 | 43 | _C.MODEL.CONVNEXT.OUT_FEATURES = ["dark3", "dark4", "dark5"] 44 | _C.MODEL.CONVNEXT.WEIGHTS = "" 45 | _C.MODEL.CONVNEXT.DEPTH_WISE = False 46 | 47 | -------------------------------------------------------------------------------- /yolov7/modeling/backbone/cspresnet.py: -------------------------------------------------------------------------------- 1 | from detectron2.layers import Conv2d, FrozenBatchNorm2d, ShapeSpec 2 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY 3 | from detectron2.modeling.backbone import Backbone 4 | 5 | import torch 6 | from timm.models.cspnet import cspresnet50d 7 | 8 | 9 | @BACKBONE_REGISTRY.register() 10 | def build_cspresnet50d_backbone(cfg, input_shape): 11 | """ 12 | Create a EfficientNet instance from config. 13 | 14 | Returns: 15 | ResNet: a :class:`ResNet` instance. 16 | """ 17 | arch = cfg.MODEL.EFFICIENTNET.NAME 18 | features_indices = cfg.MODEL.EFFICIENTNET.FEATURE_INDICES 19 | _out_features = cfg.MODEL.EFFICIENTNET.OUT_FEATURES 20 | backbone = cspresnet50d(pretrained=True) 21 | backbone._out_features = _out_features 22 | return backbone 23 | 24 | -------------------------------------------------------------------------------- /yolov7/modeling/backbone/dabdetr_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/yolov7/modeling/backbone/dabdetr_backbone.py -------------------------------------------------------------------------------- /yolov7/modeling/backbone/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .wrappers import Conv2d ,SeparableConv2d, MaxPool2d 2 | from .activations import MemoryEfficientSwish, Swish -------------------------------------------------------------------------------- /yolov7/modeling/backbone/layers/activations.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # -------------------------------------------------------- 4 | # Descripttion: https://github.com/sxhxliang/detectron2_backbone 5 | # version: 0.0.1 6 | # Author: Shihua Liang (sxhx.liang@gmail.com) 7 | # FilePath: /detectron2_backbone/detectron2_backbone/layers/activations.py 8 | # Create: 2020-05-04 10:33:14 9 | # LastAuthor: Shihua Liang 10 | # lastTime: 2020-05-04 10:33:52 11 | # -------------------------------------------------------- 12 | 13 | import math 14 | 15 | import torch 16 | from torch import nn 17 | 18 | class SwishImplementation(torch.autograd.Function): 19 | @staticmethod 20 | def forward(ctx, i): 21 | result = i * torch.sigmoid(i) 22 | ctx.save_for_backward(i) 23 | return result 24 | 25 | @staticmethod 26 | def backward(ctx, grad_output): 27 | i = ctx.saved_variables[0] 28 | sigmoid_i = torch.sigmoid(i) 29 | return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i))) 30 | 31 | class MemoryEfficientSwish(nn.Module): 32 | def forward(self, x): 33 | return SwishImplementation.apply(x) 34 | 35 | class Swish(nn.Module): 36 | def forward(self, x): 37 | return x * torch.sigmoid(x) -------------------------------------------------------------------------------- /yolov7/modeling/backbone/layers/utils.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import torch.nn as nn 4 | 5 | from detectron2.layers import (BatchNorm2d, NaiveSyncBatchNorm, 6 | FrozenBatchNorm2d) 7 | from detectron2.utils import env 8 | 9 | 10 | norms = { 11 | "BN": BatchNorm2d, 12 | # Fixed in https://github.com/pytorch/pytorch/pull/36382 13 | "SyncBN": NaiveSyncBatchNorm if env.TORCH_VERSION <= ( 14 | 1, 5) else nn.SyncBatchNorm, 15 | "FrozenBN": FrozenBatchNorm2d, 16 | "GN": lambda channels: nn.GroupNorm(32, channels), 17 | # for debugging: 18 | "nnSyncBN": nn.SyncBatchNorm, 19 | "naiveSyncBN": NaiveSyncBatchNorm, 20 | } 21 | 22 | 23 | def get_norm(norm, out_channels, **kwargs): 24 | """ 25 | Args: 26 | norm (str or callable): either one of BN, SyncBN, FrozenBN, GN; 27 | or a callable that takes a channel number and returns 28 | the normalization layer as a nn.Module. 29 | kwargs: Additional parameters in normalization layers, 30 | such as, eps, momentum 31 | 32 | Returns: 33 | nn.Module or None: the normalization layer 34 | """ 35 | if norm is None: 36 | return None 37 | if isinstance(norm, str): 38 | if len(norm) == 0: 39 | return None 40 | assert norm in norms.keys(), 'normtype must be: {}'.format(norms.keys()) 41 | norm = norms[norm] 42 | return norm(out_channels, **kwargs) 43 | 44 | 45 | def get_activation(activation): 46 | """ 47 | Only support `ReLU` and `LeakyReLU` now. 48 | 49 | Args: 50 | activation (str or callable): 51 | 52 | Returns: 53 | nn.Module: the activation layer 54 | """ 55 | 56 | act = { 57 | "ReLU": nn.ReLU, 58 | "LeakyReLU": nn.LeakyReLU, 59 | }[activation] 60 | if activation == "LeakyReLU": 61 | act = partial(act, negative_slope=0.1) 62 | return act(inplace=True) 63 | -------------------------------------------------------------------------------- /yolov7/modeling/backbone/res2nets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/yolov7/modeling/backbone/res2nets/__init__.py -------------------------------------------------------------------------------- /yolov7/modeling/backbone/res2nets/wrapper.py: -------------------------------------------------------------------------------- 1 | from .res2net_v1b import res2net50_v1b, res2net50_v1b_26w_4s, res2net101_v1b, res2net101_v1b_26w_4s, res2net152_v1b_26w_4s 2 | from .res2next import res2next50 3 | from detectron2.modeling.backbone import build_backbone, BACKBONE_REGISTRY 4 | 5 | 6 | @BACKBONE_REGISTRY.register() 7 | def build_res2net_backbone(cfg, input_shape): 8 | """ 9 | Create a Res2Net instance from config. 10 | Returns: 11 | ResNet: a :class:`ResNet` instance. 12 | """ 13 | r2type = cfg.MODEL.RESNETS.R2TYPE 14 | out_features = cfg.MODEL.RESNETS.OUT_FEATURES 15 | 16 | if r2type == "res2net50_v1b": 17 | model = res2net50_v1b(pretrained=True, out_features=out_features) 18 | elif r2type == "res2net50_v1b_26w_4s": 19 | model = res2net50_v1b_26w_4s(pretrained=True, out_features=out_features) 20 | elif r2type == "res2net101_v1b": 21 | model = res2net101_v1b(pretrained=True, out_features=out_features) 22 | elif r2type == "res2net101_v1b_26w_4s": 23 | model = res2net101_v1b_26w_4s(pretrained=True, out_features=out_features) 24 | elif r2type == "res2next50": 25 | model = res2next50(pretrained=True, out_features=out_features) 26 | return model -------------------------------------------------------------------------------- /yolov7/modeling/backbone/volo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/yolov7/modeling/backbone/volo.py -------------------------------------------------------------------------------- /yolov7/modeling/head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/yolov7/modeling/head/__init__.py -------------------------------------------------------------------------------- /yolov7/modeling/loss/seg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 4 | """ 5 | This file provides the definition of the convolutional heads used to predict masks, as well as the losses 6 | """ 7 | import io 8 | from collections import defaultdict 9 | from typing import List, Optional 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | from PIL import Image 14 | from torch import Tensor 15 | 16 | 17 | 18 | def dice_loss(inputs, targets, num_boxes): 19 | """ 20 | Compute the DICE loss, similar to generalized IOU for masks 21 | Args: 22 | inputs: A float tensor of arbitrary shape. 23 | The predictions for each example. 24 | targets: A float tensor with the same shape as inputs. Stores the binary 25 | classification label for each element in inputs 26 | (0 for the negative class and 1 for the positive class). 27 | """ 28 | inputs = inputs.sigmoid() 29 | inputs = inputs.flatten(1) 30 | numerator = 2 * (inputs * targets).sum(1) 31 | denominator = inputs.sum(-1) + targets.sum(-1) 32 | loss = 1 - (numerator + 1) / (denominator + 1) 33 | return loss.sum() / num_boxes 34 | 35 | 36 | def sigmoid_focal_loss( 37 | inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2 38 | ): 39 | """ 40 | Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002. 41 | Args: 42 | inputs: A float tensor of arbitrary shape. 43 | The predictions for each example. 44 | targets: A float tensor with the same shape as inputs. Stores the binary 45 | classification label for each element in inputs 46 | (0 for the negative class and 1 for the positive class). 47 | alpha: (optional) Weighting factor in range (0,1) to balance 48 | positive vs negative examples. Default = -1 (no weighting). 49 | gamma: Exponent of the modulating factor (1 - p_t) to 50 | balance easy vs hard examples. 51 | Returns: 52 | Loss tensor 53 | """ 54 | prob = inputs.sigmoid() 55 | ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none") 56 | p_t = prob * targets + (1 - prob) * (1 - targets) 57 | loss = ce_loss * ((1 - p_t) ** gamma) 58 | 59 | if alpha >= 0: 60 | alpha_t = alpha * targets + (1 - alpha) * (1 - targets) 61 | loss = alpha_t * loss 62 | 63 | return loss.mean(1).sum() / num_boxes 64 | -------------------------------------------------------------------------------- /yolov7/modeling/meta_arch/__init__.py: -------------------------------------------------------------------------------- 1 | from .yolo import YOLO 2 | from .yolov7 import YOLOV7 3 | from .yolov7p import YOLOV7P 4 | from .yolomask import YOLOMask 5 | 6 | from .yolof import YOLOF 7 | from .yolox import YOLOX 8 | from .yolov5 import YOLOV5 9 | from .yolov6 import YOLOV6 10 | 11 | from .solov2 import SOLOv2 12 | 13 | from .detr import Detr 14 | from .anchor_detr import AnchorDetr 15 | from .smca_detr import SMCADetr 16 | from .detr_d2go import DetrD2go 17 | from .sparseinst import SparseInst -------------------------------------------------------------------------------- /yolov7/modeling/meta_arch/maskrcnn_seg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/yolov7/modeling/meta_arch/maskrcnn_seg.py -------------------------------------------------------------------------------- /yolov7/modeling/meta_arch/vidtplus.py: -------------------------------------------------------------------------------- 1 | ''' 2 | https://github.com/naver-ai/vidt/tree/vidt-plus 3 | 4 | swin-nano with mAP 45, while about 20FPS 5 | ''' 6 | -------------------------------------------------------------------------------- /yolov7/modeling/transcoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/yolov7/modeling/transcoders/__init__.py -------------------------------------------------------------------------------- /yolov7/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | from .build import build_optimizer_mapper 4 | 5 | __all__ = ["build_optimizer_mapper"] 6 | -------------------------------------------------------------------------------- /yolov7/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/yolov7/utils/__init__.py -------------------------------------------------------------------------------- /yolov7/utils/allreduce_norm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | import pickle 6 | from collections import OrderedDict 7 | 8 | import torch 9 | from torch import distributed as dist 10 | from torch import nn 11 | from detectron2.utils.comm import get_world_size, _get_global_gloo_group 12 | 13 | ASYNC_NORM = ( 14 | nn.BatchNorm1d, 15 | nn.BatchNorm2d, 16 | nn.BatchNorm3d, 17 | nn.InstanceNorm1d, 18 | nn.InstanceNorm2d, 19 | nn.InstanceNorm3d, 20 | ) 21 | 22 | __all__ = [ 23 | "get_async_norm_states", 24 | "pyobj2tensor", 25 | "tensor2pyobj", 26 | "all_reduce", 27 | "all_reduce_norm", 28 | ] 29 | 30 | 31 | def get_async_norm_states(module): 32 | async_norm_states = OrderedDict() 33 | for name, child in module.named_modules(): 34 | if isinstance(child, ASYNC_NORM): 35 | for k, v in child.state_dict().items(): 36 | async_norm_states[".".join([name, k])] = v 37 | return async_norm_states 38 | 39 | 40 | def pyobj2tensor(pyobj, device="cuda"): 41 | """serialize picklable python object to tensor""" 42 | storage = torch.ByteStorage.from_buffer(pickle.dumps(pyobj)) 43 | return torch.ByteTensor(storage).to(device=device) 44 | 45 | 46 | def tensor2pyobj(tensor): 47 | """deserialize tensor to picklable python object""" 48 | return pickle.loads(tensor.cpu().numpy().tobytes()) 49 | 50 | 51 | def _get_reduce_op(op_name): 52 | return { 53 | "sum": dist.ReduceOp.SUM, 54 | "mean": dist.ReduceOp.SUM, 55 | }[op_name.lower()] 56 | 57 | 58 | def all_reduce(py_dict, op="sum", group=None): 59 | """ 60 | Apply all reduce function for python dict object. 61 | NOTE: make sure that every py_dict has the same keys and values are in the same shape. 62 | 63 | Args: 64 | py_dict (dict): dict to apply all reduce op. 65 | op (str): operator, could be "sum" or "mean". 66 | """ 67 | world_size = get_world_size() 68 | if world_size == 1: 69 | return py_dict 70 | if group is None: 71 | group = _get_global_gloo_group() 72 | if dist.get_world_size(group) == 1: 73 | return py_dict 74 | 75 | # all reduce logic across different devices. 76 | py_key = list(py_dict.keys()) 77 | py_key_tensor = pyobj2tensor(py_key) 78 | dist.broadcast(py_key_tensor, src=0) 79 | py_key = tensor2pyobj(py_key_tensor) 80 | 81 | tensor_shapes = [py_dict[k].shape for k in py_key] 82 | tensor_numels = [py_dict[k].numel() for k in py_key] 83 | 84 | flatten_tensor = torch.cat([py_dict[k].flatten() for k in py_key]) 85 | dist.all_reduce(flatten_tensor, op=_get_reduce_op(op)) 86 | if op == "mean": 87 | flatten_tensor /= world_size 88 | 89 | split_tensors = [ 90 | x.reshape(shape) 91 | for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes) 92 | ] 93 | return OrderedDict({k: v for k, v in zip(py_key, split_tensors)}) 94 | 95 | 96 | def all_reduce_norm(module): 97 | """ 98 | All reduce norm statistics in different devices. 99 | """ 100 | states = get_async_norm_states(module) 101 | states = all_reduce(states, op="mean") 102 | module.load_state_dict(states, strict=False) 103 | -------------------------------------------------------------------------------- /yolov7/utils/get_default_cfg.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def get_default_solver_configs(_C): 4 | # Set default optimizer 5 | _C.SOLVER.OPTIMIZER = "sgd" 6 | _C.SOLVER.LR_MULTIPLIER_OVERWRITE = [] 7 | _C.SOLVER.WEIGHT_DECAY_EMBED = 0.0 8 | 9 | # Default world size in D2 is 0, which means scaling is not applied. For D2Go 10 | # auto scale is encouraged, setting it to 8 11 | assert _C.SOLVER.REFERENCE_WORLD_SIZE == 0 12 | _C.SOLVER.REFERENCE_WORLD_SIZE = 8 13 | # Besides scaling default D2 configs, also scale quantization configs 14 | _C.SOLVER.AUTO_SCALING_METHODS = [ 15 | "default_scale_d2_configs", 16 | "default_scale_quantization_configs", 17 | ] 18 | return _C -------------------------------------------------------------------------------- /yolov7/utils/wandb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/yolov7_d2/780512971dec4f64a66e23bd304f127a19dd842a/yolov7/utils/wandb/__init__.py -------------------------------------------------------------------------------- /yolov7/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Lucas Jin. All rights reserved. 2 | 3 | __version__ = "0.0.3" 4 | short_version = __version__ 5 | 6 | 7 | def parse_version_info(version_str): 8 | version_info = [] 9 | for x in version_str.split("."): 10 | if x.isdigit(): 11 | version_info.append(int(x)) 12 | elif x.find("rc") != -1: 13 | patch_version = x.split("rc") 14 | version_info.append(int(patch_version[0])) 15 | version_info.append(f"rc{patch_version[1]}") 16 | return tuple(version_info) 17 | 18 | 19 | version_info = parse_version_info(__version__) 20 | --------------------------------------------------------------------------------