├── utils
    ├── __init__.py
    ├── aws
    │   ├── __init__.py
    │   ├── mime.sh
    │   ├── resume.py
    │   └── userdata.sh
    ├── wandb_logging
    │   ├── __init__.py
    │   └── log_dataset.py
    ├── __pycache__
    │   ├── loss.cpython-36.pyc
    │   ├── plots.cpython-36.pyc
    │   ├── __init__.cpython-36.pyc
    │   ├── add_nms.cpython-36.pyc
    │   ├── datasets.cpython-36.pyc
    │   ├── general.cpython-36.pyc
    │   ├── metrics.cpython-36.pyc
    │   ├── autoanchor.cpython-36.pyc
    │   ├── activations.cpython-36.pyc
    │   ├── carbox_auto.cpython-36.pyc
    │   ├── google_utils.cpython-36.pyc
    │   └── torch_utils.cpython-36.pyc
    ├── google_app_engine
    │   ├── additional_requirements.txt
    │   ├── app.yaml
    │   └── Dockerfile
    ├── activations.py
    ├── google_utils.py
    ├── add_nms.py
    ├── autoanchor.py
    └── metrics.py
├── models
    ├── __init__.py
    └── __pycache__
    │   ├── yolo.cpython-36.pyc
    │   ├── common.cpython-36.pyc
    │   ├── __init__.cpython-36.pyc
    │   └── experimental.cpython-36.pyc
├── data
    ├── horses.jpg
    ├── coco.yaml
    ├── hyp.scratch.p5.yaml
    ├── hyp.scratch.p6.yaml
    ├── hyp.scratch.custom.yaml
    ├── hyp.scratch.tiny.yaml
    └── hyp.scratch.mask.yaml
├── figures
    ├── 000000542426.jpg
    ├── 000000553267.jpg
    ├── 000000553735.jpg
    ├── 000000564532.jpg
    ├── 000000573973.jpg
    └── 000000575916.jpg
├── results
    ├── 640_trt_cv2img_VP_0.jpg
    ├── 640_trt_cv2img_VP_1.jpg
    ├── 640_trt_cv2img_VP_2.jpg
    ├── 640_trt_cv2img_VP_3.jpg
    ├── 640_trt_cv2img_VP_4.jpg
    └── 640_trt_cv2img_VP_5.jpg
├── __pycache__
    ├── carbox.cpython-36.pyc
    ├── carbox_160.cpython-36.pyc
    └── carbox_auto.cpython-36.pyc
├── deploy
    └── triton-inference-server
    │   ├── data
    │       ├── dog.jpg
    │       └── dog_result.jpg
    │   ├── boundingbox.py
    │   ├── labels.py
    │   ├── processing.py
    │   ├── render.py
    │   └── README.md
├── requirements.txt
├── cfg
    ├── baseline
    │   ├── r50-csp.yaml
    │   ├── x50-csp.yaml
    │   ├── yolov3.yaml
    │   ├── yolov3-spp.yaml
    │   ├── yolor-csp.yaml
    │   ├── yolov4-csp.yaml
    │   ├── yolor-csp-x.yaml
    │   ├── yolor-e6.yaml
    │   ├── yolor-d6.yaml
    │   ├── yolor-p6.yaml
    │   └── yolor-w6.yaml
    ├── deploy
    │   ├── yolov7-tiny-silu.yaml
    │   ├── yolov7.yaml
    │   ├── yolov7-tiny.yaml
    │   ├── yolov7x.yaml
    │   ├── yolov7-w6.yaml
    │   ├── yolov7-e6.yaml
    │   ├── yolov7-d6.yaml
    │   └── yolov7-e6e.yaml
    ├── training
    │   ├── yolov7.yaml
    │   ├── yolov7-tiny.yaml
    │   ├── yolov7x.yaml
    │   ├── yolov7-w6.yaml
    │   ├── yolov7-e6.yaml
    │   ├── yolov7-d6.yaml
    │   └── yolov7-e6e.yaml
    └── yolov7-mask.yaml
├── hubconf.py
├── README.md
├── export_mask.py
└── segment_video.py


/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # init


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | # init


--------------------------------------------------------------------------------
/utils/aws/__init__.py:
--------------------------------------------------------------------------------
1 | #init


--------------------------------------------------------------------------------
/utils/wandb_logging/__init__.py:
--------------------------------------------------------------------------------
1 | # init


--------------------------------------------------------------------------------
/data/horses.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/data/horses.jpg


--------------------------------------------------------------------------------
/figures/000000542426.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/figures/000000542426.jpg


--------------------------------------------------------------------------------
/figures/000000553267.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/figures/000000553267.jpg


--------------------------------------------------------------------------------
/figures/000000553735.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/figures/000000553735.jpg


--------------------------------------------------------------------------------
/figures/000000564532.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/figures/000000564532.jpg


--------------------------------------------------------------------------------
/figures/000000573973.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/figures/000000573973.jpg


--------------------------------------------------------------------------------
/figures/000000575916.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/figures/000000575916.jpg


--------------------------------------------------------------------------------
/results/640_trt_cv2img_VP_0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/results/640_trt_cv2img_VP_0.jpg


--------------------------------------------------------------------------------
/results/640_trt_cv2img_VP_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/results/640_trt_cv2img_VP_1.jpg


--------------------------------------------------------------------------------
/results/640_trt_cv2img_VP_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/results/640_trt_cv2img_VP_2.jpg


--------------------------------------------------------------------------------
/results/640_trt_cv2img_VP_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/results/640_trt_cv2img_VP_3.jpg


--------------------------------------------------------------------------------
/results/640_trt_cv2img_VP_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/results/640_trt_cv2img_VP_4.jpg


--------------------------------------------------------------------------------
/results/640_trt_cv2img_VP_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/results/640_trt_cv2img_VP_5.jpg


--------------------------------------------------------------------------------
/__pycache__/carbox.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/__pycache__/carbox.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/carbox_160.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/__pycache__/carbox_160.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/carbox_auto.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/__pycache__/carbox_auto.cpython-36.pyc


--------------------------------------------------------------------------------
/models/__pycache__/yolo.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/models/__pycache__/yolo.cpython-36.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/loss.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/loss.cpython-36.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/plots.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/plots.cpython-36.pyc


--------------------------------------------------------------------------------
/models/__pycache__/common.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/models/__pycache__/common.cpython-36.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/add_nms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/add_nms.cpython-36.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/datasets.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/datasets.cpython-36.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/general.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/general.cpython-36.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/metrics.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/metrics.cpython-36.pyc


--------------------------------------------------------------------------------
/deploy/triton-inference-server/data/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/deploy/triton-inference-server/data/dog.jpg


--------------------------------------------------------------------------------
/models/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/models/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/autoanchor.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/autoanchor.cpython-36.pyc


--------------------------------------------------------------------------------
/models/__pycache__/experimental.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/models/__pycache__/experimental.cpython-36.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/activations.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/activations.cpython-36.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/carbox_auto.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/carbox_auto.cpython-36.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/google_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/google_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/torch_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/torch_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/utils/google_app_engine/additional_requirements.txt:
--------------------------------------------------------------------------------
1 | # add these requirements in your app on top of the existing ones
2 | pip==18.1
3 | Flask==1.0.2
4 | gunicorn==19.9.0
5 | 


--------------------------------------------------------------------------------
/deploy/triton-inference-server/data/dog_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/deploy/triton-inference-server/data/dog_result.jpg


--------------------------------------------------------------------------------
/utils/google_app_engine/app.yaml:
--------------------------------------------------------------------------------
 1 | runtime: custom
 2 | env: flex
 3 | 
 4 | service: yolorapp
 5 | 
 6 | liveness_check:
 7 |   initial_delay_sec: 600
 8 | 
 9 | manual_scaling:
10 |   instances: 1
11 | resources:
12 |   cpu: 1
13 |   memory_gb: 4
14 |   disk_size_gb: 20


--------------------------------------------------------------------------------
/utils/aws/mime.sh:
--------------------------------------------------------------------------------
 1 | # AWS EC2 instance startup 'MIME' script https://aws.amazon.com/premiumsupport/knowledge-center/execute-user-data-ec2/
 2 | # This script will run on every instance restart, not only on first start
 3 | # --- DO NOT COPY ABOVE COMMENTS WHEN PASTING INTO USERDATA ---
 4 | 
 5 | Content-Type: multipart/mixed; boundary="//"
 6 | MIME-Version: 1.0
 7 | 
 8 | --//
 9 | Content-Type: text/cloud-config; charset="us-ascii"
10 | MIME-Version: 1.0
11 | Content-Transfer-Encoding: 7bit
12 | Content-Disposition: attachment; filename="cloud-config.txt"
13 | 
14 | #cloud-config
15 | cloud_final_modules:
16 | - [scripts-user, always]
17 | 
18 | --//
19 | Content-Type: text/x-shellscript; charset="us-ascii"
20 | MIME-Version: 1.0
21 | Content-Transfer-Encoding: 7bit
22 | Content-Disposition: attachment; filename="userdata.txt"
23 | 
24 | #!/bin/bash
25 | # --- paste contents of userdata.sh here ---
26 | --//
27 | 


--------------------------------------------------------------------------------
/utils/wandb_logging/log_dataset.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import yaml
 4 | 
 5 | from wandb_utils import WandbLogger
 6 | 
 7 | WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
 8 | 
 9 | 
10 | def create_dataset_artifact(opt):
11 |     with open(opt.data) as f:
12 |         data = yaml.load(f, Loader=yaml.SafeLoader)  # data dict
13 |     logger = WandbLogger(opt, '', None, data, job_type='Dataset Creation')
14 | 
15 | 
16 | if __name__ == '__main__':
17 |     parser = argparse.ArgumentParser()
18 |     parser.add_argument('--data', type=str, default='data/coco.yaml', help='data.yaml path')
19 |     parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
20 |     parser.add_argument('--project', type=str, default='YOLOR', help='name of W&B Project')
21 |     opt = parser.parse_args()
22 |     opt.resume = False  # Explicitly disallow resume check for dataset upload job
23 | 
24 |     create_dataset_artifact(opt)
25 | 


--------------------------------------------------------------------------------
/utils/google_app_engine/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM gcr.io/google-appengine/python
 2 | 
 3 | # Create a virtualenv for dependencies. This isolates these packages from
 4 | # system-level packages.
 5 | # Use -p python3 or -p python3.7 to select python version. Default is version 2.
 6 | RUN virtualenv /env -p python3
 7 | 
 8 | # Setting these environment variables are the same as running
 9 | # source /env/bin/activate.
10 | ENV VIRTUAL_ENV /env
11 | ENV PATH /env/bin:$PATH
12 | 
13 | RUN apt-get update && apt-get install -y python-opencv
14 | 
15 | # Copy the application's requirements.txt and run pip to install all
16 | # dependencies into the virtualenv.
17 | ADD requirements.txt /app/requirements.txt
18 | RUN pip install -r /app/requirements.txt
19 | 
20 | # Add the application source code.
21 | ADD . /app
22 | 
23 | # Run a WSGI server to serve the application. gunicorn must be declared as
24 | # a dependency in requirements.txt.
25 | CMD gunicorn -b :$PORT main:app
26 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Usage: pip install -r requirements.txt
 2 | 
 3 | # Base ----------------------------------------
 4 | matplotlib>=3.2.2
 5 | numpy>=1.18.5
 6 | opencv-python>=4.1.1
 7 | Pillow>=7.1.2
 8 | PyYAML>=5.3.1
 9 | requests>=2.23.0
10 | scipy>=1.4.1
11 | torch>=1.7.0,!=1.12.0
12 | torchvision>=0.8.1,!=0.13.0
13 | tqdm>=4.41.0
14 | protobuf<4.21.3
15 | 
16 | # Logging -------------------------------------
17 | tensorboard>=2.4.1
18 | # wandb
19 | 
20 | # Plotting ------------------------------------
21 | pandas>=1.1.4
22 | seaborn>=0.11.0
23 | 
24 | # Export --------------------------------------
25 | # coremltools>=4.1  # CoreML export
26 | # onnx>=1.9.0  # ONNX export
27 | # onnx-simplifier>=0.3.6  # ONNX simplifier
28 | # scikit-learn==0.19.2  # CoreML quantization
29 | # tensorflow>=2.4.1  # TFLite export
30 | # tensorflowjs>=3.9.0  # TF.js export
31 | # openvino-dev  # OpenVINO export
32 | 
33 | # Extras --------------------------------------
34 | ipython  # interactive notebook
35 | psutil  # system utilization
36 | thop  # FLOPs computation
37 | # albumentations>=1.0.3
38 | # pycocotools>=2.0  # COCO mAP
39 | # roboflow
40 | 


--------------------------------------------------------------------------------
/deploy/triton-inference-server/boundingbox.py:
--------------------------------------------------------------------------------
 1 | class BoundingBox:
 2 |     def __init__(self, classID, confidence, x1, x2, y1, y2, image_width, image_height):
 3 |         self.classID = classID
 4 |         self.confidence = confidence
 5 |         self.x1 = x1
 6 |         self.x2 = x2
 7 |         self.y1 = y1
 8 |         self.y2 = y2
 9 |         self.u1 = x1 / image_width
10 |         self.u2 = x2 / image_width
11 |         self.v1 = y1 / image_height
12 |         self.v2 = y2 / image_height
13 | 
14 |     def box(self):
15 |         return (self.x1, self.y1, self.x2, self.y2)
16 | 
17 |     def width(self):
18 |         return self.x2 - self.x1
19 | 
20 |     def height(self):
21 |         return self.y2 - self.y1
22 | 
23 |     def center_absolute(self):
24 |         return (0.5 * (self.x1 + self.x2), 0.5 * (self.y1 + self.y2))
25 | 
26 |     def center_normalized(self):
27 |         return (0.5 * (self.u1 + self.u2), 0.5 * (self.v1 + self.v2))
28 | 
29 |     def size_absolute(self):
30 |         return (self.x2 - self.x1, self.y2 - self.y1)
31 | 
32 |     def size_normalized(self):
33 |         return (self.u2 - self.u1, self.v2 - self.v1)
34 | 


--------------------------------------------------------------------------------
/utils/aws/resume.py:
--------------------------------------------------------------------------------
 1 | # Resume all interrupted trainings in yolor/ dir including DDP trainings
 2 | # Usage: $ python utils/aws/resume.py
 3 | 
 4 | import os
 5 | import sys
 6 | from pathlib import Path
 7 | 
 8 | import torch
 9 | import yaml
10 | 
11 | sys.path.append('./')  # to run '$ python *.py' files in subdirectories
12 | 
13 | port = 0  # --master_port
14 | path = Path('').resolve()
15 | for last in path.rglob('*/**/last.pt'):
16 |     ckpt = torch.load(last)
17 |     if ckpt['optimizer'] is None:
18 |         continue
19 | 
20 |     # Load opt.yaml
21 |     with open(last.parent.parent / 'opt.yaml') as f:
22 |         opt = yaml.load(f, Loader=yaml.SafeLoader)
23 | 
24 |     # Get device count
25 |     d = opt['device'].split(',')  # devices
26 |     nd = len(d)  # number of devices
27 |     ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1)  # distributed data parallel
28 | 
29 |     if ddp:  # multi-GPU
30 |         port += 1
31 |         cmd = f'python -m torch.distributed.launch --nproc_per_node {nd} --master_port {port} train.py --resume {last}'
32 |     else:  # single-GPU
33 |         cmd = f'python train.py --resume {last}'
34 | 
35 |     cmd += ' > /dev/null 2>&1 &'  # redirect output to dev/null and run in daemon thread
36 |     print(cmd)
37 |     os.system(cmd)
38 | 


--------------------------------------------------------------------------------
/utils/aws/userdata.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # AWS EC2 instance startup script https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html
 3 | # This script will run only once on first instance start (for a re-start script see mime.sh)
 4 | # /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir
 5 | # Use >300 GB SSD
 6 | 
 7 | cd home/ubuntu
 8 | if [ ! -d yolor ]; then
 9 |   echo "Running first-time script." # install dependencies, download COCO, pull Docker
10 |   git clone -b paper https://github.com/WongKinYiu/yolor && sudo chmod -R 777 yolor
11 |   cd yolor
12 |   bash data/scripts/get_coco.sh && echo "Data done." &
13 |   sudo docker pull nvcr.io/nvidia/pytorch:21.08-py3 && echo "Docker done." &
14 |   python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." &
15 |   wait && echo "All tasks done." # finish background tasks
16 | else
17 |   echo "Running re-start script." # resume interrupted runs
18 |   i=0
19 |   list=$(sudo docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour'
20 |   while IFS= read -r id; do
21 |     ((i++))
22 |     echo "restarting container $i: $id"
23 |     sudo docker start $id
24 |     # sudo docker exec -it $id python train.py --resume # single-GPU
25 |     sudo docker exec -d $id python utils/aws/resume.py # multi-scenario
26 |   done <<<"$list"
27 | fi
28 | 


--------------------------------------------------------------------------------
/data/coco.yaml:
--------------------------------------------------------------------------------
 1 | # COCO 2017 dataset http://cocodataset.org
 2 | 
 3 | # download command/URL (optional)
 4 | download: bash ./scripts/get_coco.sh
 5 | 
 6 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
 7 | train: ./coco/train2017.txt  # 118287 images
 8 | val: ./coco/val2017.txt  # 5000 images
 9 | test: ./coco/test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
10 | 
11 | # number of classes
12 | nc: 80
13 | 
14 | # class names
15 | names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
16 |          'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
17 |          'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
18 |          'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
19 |          'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
20 |          'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
21 |          'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
22 |          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
23 |          'hair drier', 'toothbrush' ]
24 | 


--------------------------------------------------------------------------------
/data/hyp.scratch.p5.yaml:
--------------------------------------------------------------------------------
 1 | lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
 2 | lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
 3 | momentum: 0.937  # SGD momentum/Adam beta1
 4 | weight_decay: 0.0005  # optimizer weight decay 5e-4
 5 | warmup_epochs: 3.0  # warmup epochs (fractions ok)
 6 | warmup_momentum: 0.8  # warmup initial momentum
 7 | warmup_bias_lr: 0.1  # warmup initial bias lr
 8 | box: 0.05  # box loss gain
 9 | cls: 0.3  # cls loss gain
10 | cls_pw: 1.0  # cls BCELoss positive_weight
11 | obj: 0.7  # obj loss gain (scale with pixels)
12 | obj_pw: 1.0  # obj BCELoss positive_weight
13 | iou_t: 0.20  # IoU training threshold
14 | anchor_t: 4.0  # anchor-multiple threshold
15 | # anchors: 3  # anchors per output layer (0 to ignore)
16 | fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
17 | hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
18 | hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
19 | hsv_v: 0.4  # image HSV-Value augmentation (fraction)
20 | degrees: 0.0  # image rotation (+/- deg)
21 | translate: 0.2  # image translation (+/- fraction)
22 | scale: 0.9  # image scale (+/- gain)
23 | shear: 0.0  # image shear (+/- deg)
24 | perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
25 | flipud: 0.0  # image flip up-down (probability)
26 | fliplr: 0.5  # image flip left-right (probability)
27 | mosaic: 1.0  # image mosaic (probability)
28 | mixup: 0.15  # image mixup (probability)
29 | copy_paste: 0.0  # image copy paste (probability)
30 | paste_in: 0.15  # image copy paste (probability), use 0 for faster training
31 | loss_ota: 1 # use ComputeLossOTA, use 0 for faster training


--------------------------------------------------------------------------------
/data/hyp.scratch.p6.yaml:
--------------------------------------------------------------------------------
 1 | lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
 2 | lrf: 0.2  # final OneCycleLR learning rate (lr0 * lrf)
 3 | momentum: 0.937  # SGD momentum/Adam beta1
 4 | weight_decay: 0.0005  # optimizer weight decay 5e-4
 5 | warmup_epochs: 3.0  # warmup epochs (fractions ok)
 6 | warmup_momentum: 0.8  # warmup initial momentum
 7 | warmup_bias_lr: 0.1  # warmup initial bias lr
 8 | box: 0.05  # box loss gain
 9 | cls: 0.3  # cls loss gain
10 | cls_pw: 1.0  # cls BCELoss positive_weight
11 | obj: 0.7  # obj loss gain (scale with pixels)
12 | obj_pw: 1.0  # obj BCELoss positive_weight
13 | iou_t: 0.20  # IoU training threshold
14 | anchor_t: 4.0  # anchor-multiple threshold
15 | # anchors: 3  # anchors per output layer (0 to ignore)
16 | fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
17 | hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
18 | hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
19 | hsv_v: 0.4  # image HSV-Value augmentation (fraction)
20 | degrees: 0.0  # image rotation (+/- deg)
21 | translate: 0.2  # image translation (+/- fraction)
22 | scale: 0.9  # image scale (+/- gain)
23 | shear: 0.0  # image shear (+/- deg)
24 | perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
25 | flipud: 0.0  # image flip up-down (probability)
26 | fliplr: 0.5  # image flip left-right (probability)
27 | mosaic: 1.0  # image mosaic (probability)
28 | mixup: 0.15  # image mixup (probability)
29 | copy_paste: 0.0  # image copy paste (probability)
30 | paste_in: 0.15  # image copy paste (probability), use 0 for faster training
31 | loss_ota: 1 # use ComputeLossOTA, use 0 for faster training


--------------------------------------------------------------------------------
/data/hyp.scratch.custom.yaml:
--------------------------------------------------------------------------------
 1 | lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
 2 | lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
 3 | momentum: 0.937  # SGD momentum/Adam beta1
 4 | weight_decay: 0.0005  # optimizer weight decay 5e-4
 5 | warmup_epochs: 3.0  # warmup epochs (fractions ok)
 6 | warmup_momentum: 0.8  # warmup initial momentum
 7 | warmup_bias_lr: 0.1  # warmup initial bias lr
 8 | box: 0.05  # box loss gain
 9 | cls: 0.3  # cls loss gain
10 | cls_pw: 1.0  # cls BCELoss positive_weight
11 | obj: 0.7  # obj loss gain (scale with pixels)
12 | obj_pw: 1.0  # obj BCELoss positive_weight
13 | iou_t: 0.20  # IoU training threshold
14 | anchor_t: 4.0  # anchor-multiple threshold
15 | # anchors: 3  # anchors per output layer (0 to ignore)
16 | fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
17 | hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
18 | hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
19 | hsv_v: 0.4  # image HSV-Value augmentation (fraction)
20 | degrees: 0.0  # image rotation (+/- deg)
21 | translate: 0.2  # image translation (+/- fraction)
22 | scale: 0.5  # image scale (+/- gain)
23 | shear: 0.0  # image shear (+/- deg)
24 | perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
25 | flipud: 0.0  # image flip up-down (probability)
26 | fliplr: 0.5  # image flip left-right (probability)
27 | mosaic: 1.0  # image mosaic (probability)
28 | mixup: 0.0  # image mixup (probability)
29 | copy_paste: 0.0  # image copy paste (probability)
30 | paste_in: 0.0  # image copy paste (probability), use 0 for faster training
31 | loss_ota: 1 # use ComputeLossOTA, use 0 for faster training


--------------------------------------------------------------------------------
/cfg/baseline/r50-csp.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [12,16, 19,36, 40,28]  # P3/8
 9 |   - [36,75, 76,55, 72,146]  # P4/16
10 |   - [142,110, 192,243, 459,401]  # P5/32
11 | 
12 | # CSP-ResNet backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Stem, [128]],  # 0-P1/2
16 |    [-1, 3, ResCSPC, [128]],
17 |    [-1, 1, Conv, [256, 3, 2]],  # 2-P3/8
18 |    [-1, 4, ResCSPC, [256]],
19 |    [-1, 1, Conv, [512, 3, 2]],  # 4-P3/8
20 |    [-1, 6, ResCSPC, [512]],
21 |    [-1, 1, Conv, [1024, 3, 2]],  # 6-P3/8
22 |    [-1, 3, ResCSPC, [1024]],  # 7
23 |   ]
24 | 
25 | # CSP-Res-PAN head
26 | head:
27 |   [[-1, 1, SPPCSPC, [512]], # 8
28 |    [-1, 1, Conv, [256, 1, 1]],
29 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
30 |    [5, 1, Conv, [256, 1, 1]], # route backbone P4
31 |    [[-1, -2], 1, Concat, [1]],
32 |    [-1, 2, ResCSPB, [256]], # 13
33 |    [-1, 1, Conv, [128, 1, 1]],
34 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35 |    [3, 1, Conv, [128, 1, 1]], # route backbone P3
36 |    [[-1, -2], 1, Concat, [1]],
37 |    [-1, 2, ResCSPB, [128]], # 18
38 |    [-1, 1, Conv, [256, 3, 1]],
39 |    [-2, 1, Conv, [256, 3, 2]],
40 |    [[-1, 13], 1, Concat, [1]],  # cat
41 |    [-1, 2, ResCSPB, [256]], # 22
42 |    [-1, 1, Conv, [512, 3, 1]],
43 |    [-2, 1, Conv, [512, 3, 2]],
44 |    [[-1, 8], 1, Concat, [1]],  # cat
45 |    [-1, 2, ResCSPB, [512]], # 26
46 |    [-1, 1, Conv, [1024, 3, 1]],
47 | 
48 |    [[19,23,27], 1, IDetect, [nc, anchors]],   # Detect(P3, P4, P5)
49 |   ]
50 | 


--------------------------------------------------------------------------------
/data/hyp.scratch.tiny.yaml:
--------------------------------------------------------------------------------
 1 | lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
 2 | lrf: 0.01  # final OneCycleLR learning rate (lr0 * lrf)
 3 | momentum: 0.937  # SGD momentum/Adam beta1
 4 | weight_decay: 0.0005  # optimizer weight decay 5e-4
 5 | warmup_epochs: 3.0  # warmup epochs (fractions ok)
 6 | warmup_momentum: 0.8  # warmup initial momentum
 7 | warmup_bias_lr: 0.1  # warmup initial bias lr
 8 | box: 0.05  # box loss gain
 9 | cls: 0.5  # cls loss gain
10 | cls_pw: 1.0  # cls BCELoss positive_weight
11 | obj: 1.0  # obj loss gain (scale with pixels)
12 | obj_pw: 1.0  # obj BCELoss positive_weight
13 | iou_t: 0.20  # IoU training threshold
14 | anchor_t: 4.0  # anchor-multiple threshold
15 | # anchors: 3  # anchors per output layer (0 to ignore)
16 | fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
17 | hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
18 | hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
19 | hsv_v: 0.4  # image HSV-Value augmentation (fraction)
20 | degrees: 0.0  # image rotation (+/- deg)
21 | translate: 0.1  # image translation (+/- fraction)
22 | scale: 0.5  # image scale (+/- gain)
23 | shear: 0.0  # image shear (+/- deg)
24 | perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
25 | flipud: 0.0  # image flip up-down (probability)
26 | fliplr: 0.5  # image flip left-right (probability)
27 | mosaic: 1.0  # image mosaic (probability)
28 | mixup: 0.05  # image mixup (probability)
29 | copy_paste: 0.0  # image copy paste (probability)
30 | paste_in: 0.05  # image copy paste (probability), use 0 for faster training
31 | loss_ota: 1 # use ComputeLossOTA, use 0 for faster training
32 | 


--------------------------------------------------------------------------------
/cfg/baseline/x50-csp.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [12,16, 19,36, 40,28]  # P3/8
 9 |   - [36,75, 76,55, 72,146]  # P4/16
10 |   - [142,110, 192,243, 459,401]  # P5/32
11 | 
12 | # CSP-ResNeXt backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Stem, [128]],  # 0-P1/2
16 |    [-1, 3, ResXCSPC, [128]],
17 |    [-1, 1, Conv, [256, 3, 2]],  # 2-P3/8
18 |    [-1, 4, ResXCSPC, [256]],
19 |    [-1, 1, Conv, [512, 3, 2]],  # 4-P3/8
20 |    [-1, 6, ResXCSPC, [512]],
21 |    [-1, 1, Conv, [1024, 3, 2]],  # 6-P3/8
22 |    [-1, 3, ResXCSPC, [1024]],  # 7
23 |   ]
24 | 
25 | # CSP-ResX-PAN head
26 | head:
27 |   [[-1, 1, SPPCSPC, [512]], # 8
28 |    [-1, 1, Conv, [256, 1, 1]],
29 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
30 |    [5, 1, Conv, [256, 1, 1]], # route backbone P4
31 |    [[-1, -2], 1, Concat, [1]],
32 |    [-1, 2, ResXCSPB, [256]], # 13
33 |    [-1, 1, Conv, [128, 1, 1]],
34 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35 |    [3, 1, Conv, [128, 1, 1]], # route backbone P3
36 |    [[-1, -2], 1, Concat, [1]],
37 |    [-1, 2, ResXCSPB, [128]], # 18
38 |    [-1, 1, Conv, [256, 3, 1]],
39 |    [-2, 1, Conv, [256, 3, 2]],
40 |    [[-1, 13], 1, Concat, [1]],  # cat
41 |    [-1, 2, ResXCSPB, [256]], # 22
42 |    [-1, 1, Conv, [512, 3, 1]],
43 |    [-2, 1, Conv, [512, 3, 2]],
44 |    [[-1, 8], 1, Concat, [1]],  # cat
45 |    [-1, 2, ResXCSPB, [512]], # 26
46 |    [-1, 1, Conv, [1024, 3, 1]],
47 | 
48 |    [[19,23,27], 1, IDetect, [nc, anchors]],   # Detect(P3, P4, P5)
49 |   ]
50 | 


--------------------------------------------------------------------------------
/data/hyp.scratch.mask.yaml:
--------------------------------------------------------------------------------
 1 | lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
 2 | lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
 3 | momentum: 0.937  # SGD momentum/Adam beta1
 4 | weight_decay: 0.0005  # optimizer weight decay 5e-4
 5 | warmup_epochs: 3.0  # warmup epochs (fractions ok)
 6 | warmup_momentum: 0.8  # warmup initial momentum
 7 | warmup_bias_lr: 0.1  # warmup initial bias lr
 8 | box: 0.05  # box loss gain
 9 | cls: 0.3  # cls loss gain
10 | cls_pw: 1.0  # cls BCELoss positive_weight
11 | obj: 0.7  # obj loss gain (scale with pixels)
12 | obj_pw: 1.0  # obj BCELoss positive_weight
13 | mask: 0.05 # mask loss gain
14 | mask_pw: 1.0  # obj BCELoss positive_weight
15 | pointrend: 0.05 # pointrend loss gain
16 | iou_t: 0.20  # IoU training threshold
17 | anchor_t: 4.0  # anchor-multiple threshold
18 | # anchors: 3  # anchors per output layer (0 to ignore)
19 | fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
20 | hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
21 | hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
22 | hsv_v: 0.4  # image HSV-Value augmentation (fraction)
23 | degrees: 0.0  # image rotation (+/- deg)
24 | translate: 0.1  # image translation (+/- fraction)
25 | scale: 0.5  # image scale (+/- gain)
26 | shear: 0.0  # image shear (+/- deg)
27 | perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
28 | flipud: 0.0  # image flip up-down (probability)
29 | fliplr: 0.5  # image flip left-right (probability)
30 | mosaic: 1.0  # image mosaic (probability)
31 | mixup: 0.  # image mixup (probability)
32 | copy_paste: 0. #15  # image copy paste (probability)
33 | paste_in: 0.  # image copy paste (probability)
34 | attn_resolution: 14
35 | num_base: 5
36 | mask_resolution: 56
37 | 
38 | 


--------------------------------------------------------------------------------
/cfg/baseline/yolov3.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # darknet53 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
16 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
17 |    [-1, 1, Bottleneck, [64]],
18 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
19 |    [-1, 2, Bottleneck, [128]],
20 |    [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
21 |    [-1, 8, Bottleneck, [256]],
22 |    [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
23 |    [-1, 8, Bottleneck, [512]],
24 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
25 |    [-1, 4, Bottleneck, [1024]],  # 10
26 |   ]
27 | 
28 | # YOLOv3 head
29 | head:
30 |   [[-1, 1, Bottleneck, [1024, False]],
31 |    [-1, 1, Conv, [512, [1, 1]]],
32 |    [-1, 1, Conv, [1024, 3, 1]],
33 |    [-1, 1, Conv, [512, 1, 1]],
34 |    [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
35 | 
36 |    [-2, 1, Conv, [256, 1, 1]],
37 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P4
39 |    [-1, 1, Bottleneck, [512, False]],
40 |    [-1, 1, Bottleneck, [512, False]],
41 |    [-1, 1, Conv, [256, 1, 1]],
42 |    [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
43 | 
44 |    [-2, 1, Conv, [128, 1, 1]],
45 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P3
47 |    [-1, 1, Bottleneck, [256, False]],
48 |    [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
49 | 
50 |    [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
51 |   ]
52 | 


--------------------------------------------------------------------------------
/cfg/baseline/yolov3-spp.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # darknet53 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
16 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
17 |    [-1, 1, Bottleneck, [64]],
18 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
19 |    [-1, 2, Bottleneck, [128]],
20 |    [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
21 |    [-1, 8, Bottleneck, [256]],
22 |    [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
23 |    [-1, 8, Bottleneck, [512]],
24 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
25 |    [-1, 4, Bottleneck, [1024]],  # 10
26 |   ]
27 | 
28 | # YOLOv3-SPP head
29 | head:
30 |   [[-1, 1, Bottleneck, [1024, False]],
31 |    [-1, 1, SPP, [512, [5, 9, 13]]],
32 |    [-1, 1, Conv, [1024, 3, 1]],
33 |    [-1, 1, Conv, [512, 1, 1]],
34 |    [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
35 | 
36 |    [-2, 1, Conv, [256, 1, 1]],
37 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P4
39 |    [-1, 1, Bottleneck, [512, False]],
40 |    [-1, 1, Bottleneck, [512, False]],
41 |    [-1, 1, Conv, [256, 1, 1]],
42 |    [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
43 | 
44 |    [-2, 1, Conv, [128, 1, 1]],
45 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P3
47 |    [-1, 1, Bottleneck, [256, False]],
48 |    [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
49 | 
50 |    [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
51 |   ]
52 | 


--------------------------------------------------------------------------------
/deploy/triton-inference-server/labels.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | class COCOLabels(Enum):
 4 |     PERSON = 0
 5 |     BICYCLE = 1
 6 |     CAR = 2
 7 |     MOTORBIKE = 3
 8 |     AEROPLANE = 4
 9 |     BUS = 5
10 |     TRAIN = 6
11 |     TRUCK = 7
12 |     BOAT = 8
13 |     TRAFFIC_LIGHT = 9
14 |     FIRE_HYDRANT = 10
15 |     STOP_SIGN = 11
16 |     PARKING_METER = 12
17 |     BENCH = 13
18 |     BIRD = 14
19 |     CAT = 15
20 |     DOG = 16
21 |     HORSE = 17
22 |     SHEEP = 18
23 |     COW = 19
24 |     ELEPHANT = 20
25 |     BEAR = 21
26 |     ZEBRA = 22
27 |     GIRAFFE = 23
28 |     BACKPACK = 24
29 |     UMBRELLA = 25
30 |     HANDBAG = 26
31 |     TIE = 27
32 |     SUITCASE = 28
33 |     FRISBEE = 29
34 |     SKIS = 30
35 |     SNOWBOARD = 31
36 |     SPORTS_BALL = 32
37 |     KITE = 33
38 |     BASEBALL_BAT = 34
39 |     BASEBALL_GLOVE = 35
40 |     SKATEBOARD = 36
41 |     SURFBOARD = 37
42 |     TENNIS_RACKET = 38
43 |     BOTTLE = 39
44 |     WINE_GLASS = 40
45 |     CUP = 41
46 |     FORK = 42
47 |     KNIFE = 43
48 |     SPOON = 44
49 |     BOWL = 45
50 |     BANANA = 46
51 |     APPLE = 47
52 |     SANDWICH = 48
53 |     ORANGE = 49
54 |     BROCCOLI = 50
55 |     CARROT = 51
56 |     HOT_DOG = 52
57 |     PIZZA = 53
58 |     DONUT = 54
59 |     CAKE = 55
60 |     CHAIR = 56
61 |     SOFA = 57
62 |     POTTEDPLANT = 58
63 |     BED = 59
64 |     DININGTABLE = 60
65 |     TOILET = 61
66 |     TVMONITOR = 62
67 |     LAPTOP = 63
68 |     MOUSE = 64
69 |     REMOTE = 65
70 |     KEYBOARD = 66
71 |     CELL_PHONE = 67
72 |     MICROWAVE = 68
73 |     OVEN = 69
74 |     TOASTER = 70
75 |     SINK = 71
76 |     REFRIGERATOR = 72
77 |     BOOK = 73
78 |     CLOCK = 74
79 |     VASE = 75
80 |     SCISSORS = 76
81 |     TEDDY_BEAR = 77
82 |     HAIR_DRIER = 78
83 |     TOOTHBRUSH = 79
84 | 


--------------------------------------------------------------------------------
/cfg/baseline/yolor-csp.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [12,16, 19,36, 40,28]  # P3/8
 9 |   - [36,75, 76,55, 72,146]  # P4/16
10 |   - [142,110, 192,243, 459,401]  # P5/32
11 | 
12 | # CSP-Darknet backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
16 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
17 |    [-1, 1, Bottleneck, [64]],
18 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
19 |    [-1, 2, BottleneckCSPC, [128]],
20 |    [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
21 |    [-1, 8, BottleneckCSPC, [256]],
22 |    [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
23 |    [-1, 8, BottleneckCSPC, [512]],
24 |    [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25 |    [-1, 4, BottleneckCSPC, [1024]],  # 10
26 |   ]
27 | 
28 | # CSP-Dark-PAN head
29 | head:
30 |   [[-1, 1, SPPCSPC, [512]], # 11
31 |    [-1, 1, Conv, [256, 1, 1]],
32 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
33 |    [8, 1, Conv, [256, 1, 1]], # route backbone P4
34 |    [[-1, -2], 1, Concat, [1]],
35 |    [-1, 2, BottleneckCSPB, [256]], # 16 
36 |    [-1, 1, Conv, [128, 1, 1]],
37 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 |    [6, 1, Conv, [128, 1, 1]], # route backbone P3
39 |    [[-1, -2], 1, Concat, [1]],
40 |    [-1, 2, BottleneckCSPB, [128]], # 21
41 |    [-1, 1, Conv, [256, 3, 1]],
42 |    [-2, 1, Conv, [256, 3, 2]],
43 |    [[-1, 16], 1, Concat, [1]],  # cat
44 |    [-1, 2, BottleneckCSPB, [256]], # 25
45 |    [-1, 1, Conv, [512, 3, 1]],
46 |    [-2, 1, Conv, [512, 3, 2]],
47 |    [[-1, 11], 1, Concat, [1]],  # cat
48 |    [-1, 2, BottleneckCSPB, [512]], # 29
49 |    [-1, 1, Conv, [1024, 3, 1]],
50 | 
51 |    [[22,26,30], 1, IDetect, [nc, anchors]],   # Detect(P3, P4, P5)
52 |   ]
53 | 


--------------------------------------------------------------------------------
/cfg/baseline/yolov4-csp.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [12,16, 19,36, 40,28]  # P3/8
 9 |   - [36,75, 76,55, 72,146]  # P4/16
10 |   - [142,110, 192,243, 459,401]  # P5/32
11 | 
12 | # CSP-Darknet backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
16 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
17 |    [-1, 1, Bottleneck, [64]],
18 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
19 |    [-1, 2, BottleneckCSPC, [128]],
20 |    [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
21 |    [-1, 8, BottleneckCSPC, [256]],
22 |    [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
23 |    [-1, 8, BottleneckCSPC, [512]],
24 |    [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25 |    [-1, 4, BottleneckCSPC, [1024]],  # 10
26 |   ]
27 | 
28 | # CSP-Dark-PAN head
29 | head:
30 |   [[-1, 1, SPPCSPC, [512]], # 11
31 |    [-1, 1, Conv, [256, 1, 1]],
32 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
33 |    [8, 1, Conv, [256, 1, 1]], # route backbone P4
34 |    [[-1, -2], 1, Concat, [1]],
35 |    [-1, 2, BottleneckCSPB, [256]], # 16 
36 |    [-1, 1, Conv, [128, 1, 1]],
37 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 |    [6, 1, Conv, [128, 1, 1]], # route backbone P3
39 |    [[-1, -2], 1, Concat, [1]],
40 |    [-1, 2, BottleneckCSPB, [128]], # 21
41 |    [-1, 1, Conv, [256, 3, 1]],
42 |    [-2, 1, Conv, [256, 3, 2]],
43 |    [[-1, 16], 1, Concat, [1]],  # cat
44 |    [-1, 2, BottleneckCSPB, [256]], # 25
45 |    [-1, 1, Conv, [512, 3, 1]],
46 |    [-2, 1, Conv, [512, 3, 2]],
47 |    [[-1, 11], 1, Concat, [1]],  # cat
48 |    [-1, 2, BottleneckCSPB, [512]], # 29
49 |    [-1, 1, Conv, [1024, 3, 1]],
50 | 
51 |    [[22,26,30], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
52 |   ]
53 | 


--------------------------------------------------------------------------------
/cfg/baseline/yolor-csp-x.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.33  # model depth multiple
 4 | width_multiple: 1.25  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [12,16, 19,36, 40,28]  # P3/8
 9 |   - [36,75, 76,55, 72,146]  # P4/16
10 |   - [142,110, 192,243, 459,401]  # P5/32
11 | 
12 | # CSP-Darknet backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
16 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
17 |    [-1, 1, Bottleneck, [64]],
18 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
19 |    [-1, 2, BottleneckCSPC, [128]],
20 |    [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
21 |    [-1, 8, BottleneckCSPC, [256]],
22 |    [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
23 |    [-1, 8, BottleneckCSPC, [512]],
24 |    [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25 |    [-1, 4, BottleneckCSPC, [1024]],  # 10
26 |   ]
27 | 
28 | # CSP-Dark-PAN head
29 | head:
30 |   [[-1, 1, SPPCSPC, [512]], # 11
31 |    [-1, 1, Conv, [256, 1, 1]],
32 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
33 |    [8, 1, Conv, [256, 1, 1]], # route backbone P4
34 |    [[-1, -2], 1, Concat, [1]],
35 |    [-1, 2, BottleneckCSPB, [256]], # 16 
36 |    [-1, 1, Conv, [128, 1, 1]],
37 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 |    [6, 1, Conv, [128, 1, 1]], # route backbone P3
39 |    [[-1, -2], 1, Concat, [1]],
40 |    [-1, 2, BottleneckCSPB, [128]], # 21
41 |    [-1, 1, Conv, [256, 3, 1]],
42 |    [-2, 1, Conv, [256, 3, 2]],
43 |    [[-1, 16], 1, Concat, [1]],  # cat
44 |    [-1, 2, BottleneckCSPB, [256]], # 25
45 |    [-1, 1, Conv, [512, 3, 1]],
46 |    [-2, 1, Conv, [512, 3, 2]],
47 |    [[-1, 11], 1, Concat, [1]],  # cat
48 |    [-1, 2, BottleneckCSPB, [512]], # 29
49 |    [-1, 1, Conv, [1024, 3, 1]],
50 | 
51 |    [[22,26,30], 1, IDetect, [nc, anchors]],   # Detect(P3, P4, P5)
52 |   ]
53 | 


--------------------------------------------------------------------------------
/cfg/baseline/yolor-e6.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # expand model depth
 4 | width_multiple: 1.25  # expand layer channels
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [ 19,27,  44,40,  38,94 ]  # P3/8
 9 |   - [ 96,68,  86,152,  180,137 ]  # P4/16
10 |   - [ 140,301,  303,264,  238,542 ]  # P5/32
11 |   - [ 436,615,  739,380,  925,792 ]  # P6/64
12 | 
13 | # CSP-Darknet backbone
14 | backbone:
15 |   # [from, number, module, args]
16 |   [[-1, 1, ReOrg, []],  # 0
17 |    [-1, 1, Conv, [64, 3, 1]],  # 1-P1/2
18 |    [-1, 1, DownC, [128]],  # 2-P2/4
19 |    [-1, 3, BottleneckCSPA, [128]],
20 |    [-1, 1, DownC, [256]],  # 4-P3/8
21 |    [-1, 7, BottleneckCSPA, [256]],
22 |    [-1, 1, DownC, [512]],  # 6-P4/16
23 |    [-1, 7, BottleneckCSPA, [512]],
24 |    [-1, 1, DownC, [768]], # 8-P5/32
25 |    [-1, 3, BottleneckCSPA, [768]],
26 |    [-1, 1, DownC, [1024]], # 10-P6/64
27 |    [-1, 3, BottleneckCSPA, [1024]],  # 11
28 |   ]
29 | 
30 | # CSP-Dark-PAN head
31 | head:
32 |   [[-1, 1, SPPCSPC, [512]], # 12
33 |    [-1, 1, Conv, [384, 1, 1]],
34 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35 |    [-6, 1, Conv, [384, 1, 1]], # route backbone P5
36 |    [[-1, -2], 1, Concat, [1]],
37 |    [-1, 3, BottleneckCSPB, [384]], # 17
38 |    [-1, 1, Conv, [256, 1, 1]],
39 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40 |    [-13, 1, Conv, [256, 1, 1]], # route backbone P4
41 |    [[-1, -2], 1, Concat, [1]],
42 |    [-1, 3, BottleneckCSPB, [256]], # 22
43 |    [-1, 1, Conv, [128, 1, 1]],
44 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
45 |    [-20, 1, Conv, [128, 1, 1]], # route backbone P3
46 |    [[-1, -2], 1, Concat, [1]],
47 |    [-1, 3, BottleneckCSPB, [128]], # 27
48 |    [-1, 1, Conv, [256, 3, 1]],
49 |    [-2, 1, DownC, [256]],
50 |    [[-1, 22], 1, Concat, [1]],  # cat
51 |    [-1, 3, BottleneckCSPB, [256]], # 31
52 |    [-1, 1, Conv, [512, 3, 1]],
53 |    [-2, 1, DownC, [384]],
54 |    [[-1, 17], 1, Concat, [1]],  # cat
55 |    [-1, 3, BottleneckCSPB, [384]], # 35
56 |    [-1, 1, Conv, [768, 3, 1]],
57 |    [-2, 1, DownC, [512]],
58 |    [[-1, 12], 1, Concat, [1]],  # cat
59 |    [-1, 3, BottleneckCSPB, [512]], # 39
60 |    [-1, 1, Conv, [1024, 3, 1]],
61 | 
62 |    [[28,32,36,40], 1, IDetect, [nc, anchors]],   # Detect(P3, P4, P5, P6)
63 |   ]


--------------------------------------------------------------------------------
/cfg/baseline/yolor-d6.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # expand model depth
 4 | width_multiple: 1.25  # expand layer channels
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [ 19,27,  44,40,  38,94 ]  # P3/8
 9 |   - [ 96,68,  86,152,  180,137 ]  # P4/16
10 |   - [ 140,301,  303,264,  238,542 ]  # P5/32
11 |   - [ 436,615,  739,380,  925,792 ]  # P6/64
12 | 
13 | # CSP-Darknet backbone
14 | backbone:
15 |   # [from, number, module, args]
16 |   [[-1, 1, ReOrg, []],  # 0
17 |    [-1, 1, Conv, [64, 3, 1]],  # 1-P1/2
18 |    [-1, 1, DownC, [128]],  # 2-P2/4
19 |    [-1, 3, BottleneckCSPA, [128]],
20 |    [-1, 1, DownC, [256]],  # 4-P3/8
21 |    [-1, 15, BottleneckCSPA, [256]],
22 |    [-1, 1, DownC, [512]],  # 6-P4/16
23 |    [-1, 15, BottleneckCSPA, [512]],
24 |    [-1, 1, DownC, [768]], # 8-P5/32
25 |    [-1, 7, BottleneckCSPA, [768]],
26 |    [-1, 1, DownC, [1024]], # 10-P6/64
27 |    [-1, 7, BottleneckCSPA, [1024]],  # 11
28 |   ]
29 | 
30 | # CSP-Dark-PAN head
31 | head:
32 |   [[-1, 1, SPPCSPC, [512]], # 12
33 |    [-1, 1, Conv, [384, 1, 1]],
34 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35 |    [-6, 1, Conv, [384, 1, 1]], # route backbone P5
36 |    [[-1, -2], 1, Concat, [1]],
37 |    [-1, 3, BottleneckCSPB, [384]], # 17
38 |    [-1, 1, Conv, [256, 1, 1]],
39 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40 |    [-13, 1, Conv, [256, 1, 1]], # route backbone P4
41 |    [[-1, -2], 1, Concat, [1]],
42 |    [-1, 3, BottleneckCSPB, [256]], # 22
43 |    [-1, 1, Conv, [128, 1, 1]],
44 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
45 |    [-20, 1, Conv, [128, 1, 1]], # route backbone P3
46 |    [[-1, -2], 1, Concat, [1]],
47 |    [-1, 3, BottleneckCSPB, [128]], # 27
48 |    [-1, 1, Conv, [256, 3, 1]],
49 |    [-2, 1, DownC, [256]],
50 |    [[-1, 22], 1, Concat, [1]],  # cat
51 |    [-1, 3, BottleneckCSPB, [256]], # 31
52 |    [-1, 1, Conv, [512, 3, 1]],
53 |    [-2, 1, DownC, [384]],
54 |    [[-1, 17], 1, Concat, [1]],  # cat
55 |    [-1, 3, BottleneckCSPB, [384]], # 35
56 |    [-1, 1, Conv, [768, 3, 1]],
57 |    [-2, 1, DownC, [512]],
58 |    [[-1, 12], 1, Concat, [1]],  # cat
59 |    [-1, 3, BottleneckCSPB, [512]], # 39
60 |    [-1, 1, Conv, [1024, 3, 1]],
61 | 
62 |    [[28,32,36,40], 1, IDetect, [nc, anchors]],   # Detect(P3, P4, P5, P6)
63 |   ]


--------------------------------------------------------------------------------
/cfg/baseline/yolor-p6.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # expand model depth
 4 | width_multiple: 1.0  # expand layer channels
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [ 19,27,  44,40,  38,94 ]  # P3/8
 9 |   - [ 96,68,  86,152,  180,137 ]  # P4/16
10 |   - [ 140,301,  303,264,  238,542 ]  # P5/32
11 |   - [ 436,615,  739,380,  925,792 ]  # P6/64
12 | 
13 | # CSP-Darknet backbone
14 | backbone:
15 |   # [from, number, module, args]
16 |   [[-1, 1, ReOrg, []],  # 0
17 |    [-1, 1, Conv, [64, 3, 1]],  # 1-P1/2
18 |    [-1, 1, Conv, [128, 3, 2]],  # 2-P2/4
19 |    [-1, 3, BottleneckCSPA, [128]],
20 |    [-1, 1, Conv, [256, 3, 2]],  # 4-P3/8
21 |    [-1, 7, BottleneckCSPA, [256]],
22 |    [-1, 1, Conv, [384, 3, 2]],  # 6-P4/16
23 |    [-1, 7, BottleneckCSPA, [384]],
24 |    [-1, 1, Conv, [512, 3, 2]], # 8-P5/32
25 |    [-1, 3, BottleneckCSPA, [512]],
26 |    [-1, 1, Conv, [640, 3, 2]], # 10-P6/64
27 |    [-1, 3, BottleneckCSPA, [640]],  # 11
28 |   ]
29 | 
30 | # CSP-Dark-PAN head
31 | head:
32 |   [[-1, 1, SPPCSPC, [320]], # 12
33 |    [-1, 1, Conv, [256, 1, 1]],
34 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35 |    [-6, 1, Conv, [256, 1, 1]], # route backbone P5
36 |    [[-1, -2], 1, Concat, [1]],
37 |    [-1, 3, BottleneckCSPB, [256]], # 17
38 |    [-1, 1, Conv, [192, 1, 1]],
39 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40 |    [-13, 1, Conv, [192, 1, 1]], # route backbone P4
41 |    [[-1, -2], 1, Concat, [1]],
42 |    [-1, 3, BottleneckCSPB, [192]], # 22
43 |    [-1, 1, Conv, [128, 1, 1]],
44 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
45 |    [-20, 1, Conv, [128, 1, 1]], # route backbone P3
46 |    [[-1, -2], 1, Concat, [1]],
47 |    [-1, 3, BottleneckCSPB, [128]], # 27
48 |    [-1, 1, Conv, [256, 3, 1]],
49 |    [-2, 1, Conv, [192, 3, 2]],
50 |    [[-1, 22], 1, Concat, [1]],  # cat
51 |    [-1, 3, BottleneckCSPB, [192]], # 31
52 |    [-1, 1, Conv, [384, 3, 1]],
53 |    [-2, 1, Conv, [256, 3, 2]],
54 |    [[-1, 17], 1, Concat, [1]],  # cat
55 |    [-1, 3, BottleneckCSPB, [256]], # 35
56 |    [-1, 1, Conv, [512, 3, 1]],
57 |    [-2, 1, Conv, [320, 3, 2]],
58 |    [[-1, 12], 1, Concat, [1]],  # cat
59 |    [-1, 3, BottleneckCSPB, [320]], # 39
60 |    [-1, 1, Conv, [640, 3, 1]],
61 | 
62 |    [[28,32,36,40], 1, IDetect, [nc, anchors]],   # Detect(P3, P4, P5, P6)
63 |   ]


--------------------------------------------------------------------------------
/cfg/baseline/yolor-w6.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # expand model depth
 4 | width_multiple: 1.0  # expand layer channels
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [ 19,27,  44,40,  38,94 ]  # P3/8
 9 |   - [ 96,68,  86,152,  180,137 ]  # P4/16
10 |   - [ 140,301,  303,264,  238,542 ]  # P5/32
11 |   - [ 436,615,  739,380,  925,792 ]  # P6/64
12 | 
13 | # CSP-Darknet backbone
14 | backbone:
15 |   # [from, number, module, args]
16 |   [[-1, 1, ReOrg, []],  # 0
17 |    [-1, 1, Conv, [64, 3, 1]],  # 1-P1/2
18 |    [-1, 1, Conv, [128, 3, 2]],  # 2-P2/4
19 |    [-1, 3, BottleneckCSPA, [128]],
20 |    [-1, 1, Conv, [256, 3, 2]],  # 4-P3/8
21 |    [-1, 7, BottleneckCSPA, [256]],
22 |    [-1, 1, Conv, [512, 3, 2]],  # 6-P4/16
23 |    [-1, 7, BottleneckCSPA, [512]],
24 |    [-1, 1, Conv, [768, 3, 2]], # 8-P5/32
25 |    [-1, 3, BottleneckCSPA, [768]],
26 |    [-1, 1, Conv, [1024, 3, 2]], # 10-P6/64
27 |    [-1, 3, BottleneckCSPA, [1024]],  # 11
28 |   ]
29 | 
30 | # CSP-Dark-PAN head
31 | head:
32 |   [[-1, 1, SPPCSPC, [512]], # 12
33 |    [-1, 1, Conv, [384, 1, 1]],
34 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35 |    [-6, 1, Conv, [384, 1, 1]], # route backbone P5
36 |    [[-1, -2], 1, Concat, [1]],
37 |    [-1, 3, BottleneckCSPB, [384]], # 17
38 |    [-1, 1, Conv, [256, 1, 1]],
39 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40 |    [-13, 1, Conv, [256, 1, 1]], # route backbone P4
41 |    [[-1, -2], 1, Concat, [1]],
42 |    [-1, 3, BottleneckCSPB, [256]], # 22
43 |    [-1, 1, Conv, [128, 1, 1]],
44 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
45 |    [-20, 1, Conv, [128, 1, 1]], # route backbone P3
46 |    [[-1, -2], 1, Concat, [1]],
47 |    [-1, 3, BottleneckCSPB, [128]], # 27
48 |    [-1, 1, Conv, [256, 3, 1]],
49 |    [-2, 1, Conv, [256, 3, 2]],
50 |    [[-1, 22], 1, Concat, [1]],  # cat
51 |    [-1, 3, BottleneckCSPB, [256]], # 31
52 |    [-1, 1, Conv, [512, 3, 1]],
53 |    [-2, 1, Conv, [384, 3, 2]],
54 |    [[-1, 17], 1, Concat, [1]],  # cat
55 |    [-1, 3, BottleneckCSPB, [384]], # 35
56 |    [-1, 1, Conv, [768, 3, 1]],
57 |    [-2, 1, Conv, [512, 3, 2]],
58 |    [[-1, 12], 1, Concat, [1]],  # cat
59 |    [-1, 3, BottleneckCSPB, [512]], # 39
60 |    [-1, 1, Conv, [1024, 3, 1]],
61 | 
62 |    [[28,32,36,40], 1, IDetect, [nc, anchors]],   # Detect(P3, P4, P5, P6)
63 |   ]


--------------------------------------------------------------------------------
/deploy/triton-inference-server/processing.py:
--------------------------------------------------------------------------------
 1 | from boundingbox import BoundingBox
 2 | 
 3 | import cv2
 4 | import numpy as np
 5 | 
 6 | def preprocess(img, input_shape, letter_box=True):
 7 |     if letter_box:
 8 |         img_h, img_w, _ = img.shape
 9 |         new_h, new_w = input_shape[0], input_shape[1]
10 |         offset_h, offset_w = 0, 0
11 |         if (new_w / img_w) <= (new_h / img_h):
12 |             new_h = int(img_h * new_w / img_w)
13 |             offset_h = (input_shape[0] - new_h) // 2
14 |         else:
15 |             new_w = int(img_w * new_h / img_h)
16 |             offset_w = (input_shape[1] - new_w) // 2
17 |         resized = cv2.resize(img, (new_w, new_h))
18 |         img = np.full((input_shape[0], input_shape[1], 3), 127, dtype=np.uint8)
19 |         img[offset_h:(offset_h + new_h), offset_w:(offset_w + new_w), :] = resized
20 |     else:
21 |         img = cv2.resize(img, (input_shape[1], input_shape[0]))
22 | 
23 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
24 |     img = img.transpose((2, 0, 1)).astype(np.float32)
25 |     img /= 255.0
26 |     return img
27 | 
28 | def postprocess(num_dets, det_boxes, det_scores, det_classes, img_w, img_h, input_shape, letter_box=True):
29 |     boxes = det_boxes[0, :num_dets[0][0]] / np.array([input_shape[0], input_shape[1], input_shape[0], input_shape[1]], dtype=np.float32)
30 |     scores = det_scores[0, :num_dets[0][0]]
31 |     classes = det_classes[0, :num_dets[0][0]].astype(np.int)
32 | 
33 |     old_h, old_w = img_h, img_w
34 |     offset_h, offset_w = 0, 0
35 |     if letter_box:
36 |         if (img_w / input_shape[1]) >= (img_h / input_shape[0]):
37 |             old_h = int(input_shape[0] * img_w / input_shape[1])
38 |             offset_h = (old_h - img_h) // 2
39 |         else:
40 |             old_w = int(input_shape[1] * img_h / input_shape[0])
41 |             offset_w = (old_w - img_w) // 2
42 | 
43 |     boxes = boxes * np.array([old_w, old_h, old_w, old_h], dtype=np.float32)
44 |     if letter_box:
45 |         boxes -= np.array([offset_w, offset_h, offset_w, offset_h], dtype=np.float32)
46 |     boxes = boxes.astype(np.int)
47 | 
48 |     detected_objects = []
49 |     for box, score, label in zip(boxes, scores, classes):
50 |         detected_objects.append(BoundingBox(label, score, box[0], box[2], box[1], box[3], img_w, img_h))
51 |     return detected_objects
52 | 


--------------------------------------------------------------------------------
/utils/activations.py:
--------------------------------------------------------------------------------
 1 | # Activation functions
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | # SiLU https://arxiv.org/pdf/1606.08415.pdf ----------------------------------------------------------------------------
 9 | class SiLU(nn.Module):  # export-friendly version of nn.SiLU()
10 |     @staticmethod
11 |     def forward(x):
12 |         return x * torch.sigmoid(x)
13 | 
14 | 
15 | class Hardswish(nn.Module):  # export-friendly version of nn.Hardswish()
16 |     @staticmethod
17 |     def forward(x):
18 |         # return x * F.hardsigmoid(x)  # for torchscript and CoreML
19 |         return x * F.hardtanh(x + 3, 0., 6.) / 6.  # for torchscript, CoreML and ONNX
20 | 
21 | 
22 | class MemoryEfficientSwish(nn.Module):
23 |     class F(torch.autograd.Function):
24 |         @staticmethod
25 |         def forward(ctx, x):
26 |             ctx.save_for_backward(x)
27 |             return x * torch.sigmoid(x)
28 | 
29 |         @staticmethod
30 |         def backward(ctx, grad_output):
31 |             x = ctx.saved_tensors[0]
32 |             sx = torch.sigmoid(x)
33 |             return grad_output * (sx * (1 + x * (1 - sx)))
34 | 
35 |     def forward(self, x):
36 |         return self.F.apply(x)
37 | 
38 | 
39 | # Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
40 | class Mish(nn.Module):
41 |     @staticmethod
42 |     def forward(x):
43 |         return x * F.softplus(x).tanh()
44 | 
45 | 
46 | class MemoryEfficientMish(nn.Module):
47 |     class F(torch.autograd.Function):
48 |         @staticmethod
49 |         def forward(ctx, x):
50 |             ctx.save_for_backward(x)
51 |             return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
52 | 
53 |         @staticmethod
54 |         def backward(ctx, grad_output):
55 |             x = ctx.saved_tensors[0]
56 |             sx = torch.sigmoid(x)
57 |             fx = F.softplus(x).tanh()
58 |             return grad_output * (fx + x * sx * (1 - fx * fx))
59 | 
60 |     def forward(self, x):
61 |         return self.F.apply(x)
62 | 
63 | 
64 | # FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
65 | class FReLU(nn.Module):
66 |     def __init__(self, c1, k=3):  # ch_in, kernel
67 |         super().__init__()
68 |         self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
69 |         self.bn = nn.BatchNorm2d(c1)
70 | 
71 |     def forward(self, x):
72 |         return torch.max(x, self.bn(self.conv(x)))
73 | 


--------------------------------------------------------------------------------
/cfg/deploy/yolov7-tiny-silu.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | 
  6 | # anchors
  7 | anchors:
  8 |   - [10,13, 16,30, 33,23]  # P3/8
  9 |   - [30,61, 62,45, 59,119]  # P4/16
 10 |   - [116,90, 156,198, 373,326]  # P5/32
 11 | 
 12 | # YOLOv7-tiny backbone
 13 | backbone:
 14 |   # [from, number, module, args]
 15 |   [[-1, 1, Conv, [32, 3, 2]],  # 0-P1/2  
 16 |   
 17 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P2/4    
 18 |    
 19 |    [-1, 1, Conv, [32, 1, 1]],
 20 |    [-2, 1, Conv, [32, 1, 1]],
 21 |    [-1, 1, Conv, [32, 3, 1]],
 22 |    [-1, 1, Conv, [32, 3, 1]],
 23 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 24 |    [-1, 1, Conv, [64, 1, 1]],  # 7
 25 |    
 26 |    [-1, 1, MP, []],  # 8-P3/8
 27 |    [-1, 1, Conv, [64, 1, 1]],
 28 |    [-2, 1, Conv, [64, 1, 1]],
 29 |    [-1, 1, Conv, [64, 3, 1]],
 30 |    [-1, 1, Conv, [64, 3, 1]],
 31 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 32 |    [-1, 1, Conv, [128, 1, 1]],  # 14
 33 |    
 34 |    [-1, 1, MP, []],  # 15-P4/16
 35 |    [-1, 1, Conv, [128, 1, 1]],
 36 |    [-2, 1, Conv, [128, 1, 1]],
 37 |    [-1, 1, Conv, [128, 3, 1]],
 38 |    [-1, 1, Conv, [128, 3, 1]],
 39 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 40 |    [-1, 1, Conv, [256, 1, 1]],  # 21
 41 |    
 42 |    [-1, 1, MP, []],  # 22-P5/32
 43 |    [-1, 1, Conv, [256, 1, 1]],
 44 |    [-2, 1, Conv, [256, 1, 1]],
 45 |    [-1, 1, Conv, [256, 3, 1]],
 46 |    [-1, 1, Conv, [256, 3, 1]],
 47 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 48 |    [-1, 1, Conv, [512, 1, 1]],  # 28
 49 |   ]
 50 | 
 51 | # YOLOv7-tiny head
 52 | head:
 53 |   [[-1, 1, Conv, [256, 1, 1]],
 54 |    [-2, 1, Conv, [256, 1, 1]],
 55 |    [-1, 1, SP, [5]],
 56 |    [-2, 1, SP, [9]],
 57 |    [-3, 1, SP, [13]],
 58 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 59 |    [-1, 1, Conv, [256, 1, 1]],
 60 |    [[-1, -7], 1, Concat, [1]],
 61 |    [-1, 1, Conv, [256, 1, 1]],  # 37
 62 |   
 63 |    [-1, 1, Conv, [128, 1, 1]],
 64 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 65 |    [21, 1, Conv, [128, 1, 1]], # route backbone P4
 66 |    [[-1, -2], 1, Concat, [1]],
 67 |    
 68 |    [-1, 1, Conv, [64, 1, 1]],
 69 |    [-2, 1, Conv, [64, 1, 1]],
 70 |    [-1, 1, Conv, [64, 3, 1]],
 71 |    [-1, 1, Conv, [64, 3, 1]],
 72 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 73 |    [-1, 1, Conv, [128, 1, 1]],  # 47
 74 |   
 75 |    [-1, 1, Conv, [64, 1, 1]],
 76 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 77 |    [14, 1, Conv, [64, 1, 1]], # route backbone P3
 78 |    [[-1, -2], 1, Concat, [1]],
 79 |    
 80 |    [-1, 1, Conv, [32, 1, 1]],
 81 |    [-2, 1, Conv, [32, 1, 1]],
 82 |    [-1, 1, Conv, [32, 3, 1]],
 83 |    [-1, 1, Conv, [32, 3, 1]],
 84 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 85 |    [-1, 1, Conv, [64, 1, 1]],  # 57
 86 |    
 87 |    [-1, 1, Conv, [128, 3, 2]],
 88 |    [[-1, 47], 1, Concat, [1]],
 89 |    
 90 |    [-1, 1, Conv, [64, 1, 1]],
 91 |    [-2, 1, Conv, [64, 1, 1]],
 92 |    [-1, 1, Conv, [64, 3, 1]],
 93 |    [-1, 1, Conv, [64, 3, 1]],
 94 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 95 |    [-1, 1, Conv, [128, 1, 1]],  # 65
 96 |    
 97 |    [-1, 1, Conv, [256, 3, 2]],
 98 |    [[-1, 37], 1, Concat, [1]],
 99 |    
100 |    [-1, 1, Conv, [128, 1, 1]],
101 |    [-2, 1, Conv, [128, 1, 1]],
102 |    [-1, 1, Conv, [128, 3, 1]],
103 |    [-1, 1, Conv, [128, 3, 1]],
104 |    [[-1, -2, -3, -4], 1, Concat, [1]],
105 |    [-1, 1, Conv, [256, 1, 1]],  # 73
106 |       
107 |    [57, 1, Conv, [128, 3, 1]],
108 |    [65, 1, Conv, [256, 3, 1]],
109 |    [73, 1, Conv, [512, 3, 1]],
110 | 
111 |    [[74,75,76], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
112 |   ]
113 | 


--------------------------------------------------------------------------------
/deploy/triton-inference-server/render.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import cv2
  4 | 
  5 | from math import sqrt
  6 | 
  7 | _LINE_THICKNESS_SCALING = 500.0
  8 | 
  9 | np.random.seed(0)
 10 | RAND_COLORS = np.random.randint(50, 255, (64, 3), "int")  # used for class visu
 11 | RAND_COLORS[0] = [220, 220, 220]
 12 | 
 13 | def render_box(img, box, color=(200, 200, 200)):
 14 |     """
 15 |     Render a box. Calculates scaling and thickness automatically.
 16 |     :param img: image to render into
 17 |     :param box: (x1, y1, x2, y2) - box coordinates
 18 |     :param color: (b, g, r) - box color
 19 |     :return: updated image
 20 |     """
 21 |     x1, y1, x2, y2 = box
 22 |     thickness = int(
 23 |         round(
 24 |             (img.shape[0] * img.shape[1])
 25 |             / (_LINE_THICKNESS_SCALING * _LINE_THICKNESS_SCALING)
 26 |         )
 27 |     )
 28 |     thickness = max(1, thickness)
 29 |     img = cv2.rectangle(
 30 |         img,
 31 |         (int(x1), int(y1)),
 32 |         (int(x2), int(y2)),
 33 |         color,
 34 |         thickness=thickness
 35 |     )
 36 |     return img
 37 | 
 38 | def render_filled_box(img, box, color=(200, 200, 200)):
 39 |     """
 40 |     Render a box. Calculates scaling and thickness automatically.
 41 |     :param img: image to render into
 42 |     :param box: (x1, y1, x2, y2) - box coordinates
 43 |     :param color: (b, g, r) - box color
 44 |     :return: updated image
 45 |     """
 46 |     x1, y1, x2, y2 = box
 47 |     img = cv2.rectangle(
 48 |         img,
 49 |         (int(x1), int(y1)),
 50 |         (int(x2), int(y2)),
 51 |         color,
 52 |         thickness=cv2.FILLED
 53 |     )
 54 |     return img
 55 | 
 56 | _TEXT_THICKNESS_SCALING = 700.0
 57 | _TEXT_SCALING = 520.0
 58 | 
 59 | 
 60 | def get_text_size(img, text, normalised_scaling=1.0):
 61 |     """
 62 |     Get calculated text size (as box width and height)
 63 |     :param img: image reference, used to determine appropriate text scaling
 64 |     :param text: text to display
 65 |     :param normalised_scaling: additional normalised scaling. Default 1.0.
 66 |     :return: (width, height) - width and height of text box
 67 |     """
 68 |     thickness = int(
 69 |         round(
 70 |             (img.shape[0] * img.shape[1])
 71 |             / (_TEXT_THICKNESS_SCALING * _TEXT_THICKNESS_SCALING)
 72 |         )
 73 |         * normalised_scaling
 74 |     )
 75 |     thickness = max(1, thickness)
 76 |     scaling = img.shape[0] / _TEXT_SCALING * normalised_scaling
 77 |     return cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, scaling, thickness)[0]
 78 | 
 79 | 
 80 | def render_text(img, text, pos, color=(200, 200, 200), normalised_scaling=1.0):
 81 |     """
 82 |     Render a text into the image. Calculates scaling and thickness automatically.
 83 |     :param img: image to render into
 84 |     :param text: text to display
 85 |     :param pos: (x, y) - upper left coordinates of render position
 86 |     :param color: (b, g, r) - text color
 87 |     :param normalised_scaling: additional normalised scaling. Default 1.0.
 88 |     :return: updated image
 89 |     """
 90 |     x, y = pos
 91 |     thickness = int(
 92 |         round(
 93 |             (img.shape[0] * img.shape[1])
 94 |             / (_TEXT_THICKNESS_SCALING * _TEXT_THICKNESS_SCALING)
 95 |         )
 96 |         * normalised_scaling
 97 |     )
 98 |     thickness = max(1, thickness)
 99 |     scaling = img.shape[0] / _TEXT_SCALING * normalised_scaling
100 |     size = get_text_size(img, text, normalised_scaling)
101 |     cv2.putText(
102 |         img,
103 |         text,
104 |         (int(x), int(y + size[1])),
105 |         cv2.FONT_HERSHEY_SIMPLEX,
106 |         scaling,
107 |         color,
108 |         thickness=thickness,
109 |     )
110 |     return img
111 | 


--------------------------------------------------------------------------------
/hubconf.py:
--------------------------------------------------------------------------------
 1 | """PyTorch Hub models
 2 | 
 3 | Usage:
 4 |     import torch
 5 |     model = torch.hub.load('repo', 'model')
 6 | """
 7 | 
 8 | from pathlib import Path
 9 | 
10 | import torch
11 | 
12 | from models.yolo import Model
13 | from utils.general import check_requirements, set_logging
14 | from utils.google_utils import attempt_download
15 | from utils.torch_utils import select_device
16 | 
17 | dependencies = ['torch', 'yaml']
18 | check_requirements(Path(__file__).parent / 'requirements.txt', exclude=('pycocotools', 'thop'))
19 | set_logging()
20 | 
21 | 
22 | def create(name, pretrained, channels, classes, autoshape):
23 |     """Creates a specified model
24 | 
25 |     Arguments:
26 |         name (str): name of model, i.e. 'yolov7'
27 |         pretrained (bool): load pretrained weights into the model
28 |         channels (int): number of input channels
29 |         classes (int): number of model classes
30 | 
31 |     Returns:
32 |         pytorch model
33 |     """
34 |     try:
35 |         cfg = list((Path(__file__).parent / 'cfg').rglob(f'{name}.yaml'))[0]  # model.yaml path
36 |         model = Model(cfg, channels, classes)
37 |         if pretrained:
38 |             fname = f'{name}.pt'  # checkpoint filename
39 |             attempt_download(fname)  # download if not found locally
40 |             ckpt = torch.load(fname, map_location=torch.device('cpu'))  # load
41 |             msd = model.state_dict()  # model state_dict
42 |             csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
43 |             csd = {k: v for k, v in csd.items() if msd[k].shape == v.shape}  # filter
44 |             model.load_state_dict(csd, strict=False)  # load
45 |             if len(ckpt['model'].names) == classes:
46 |                 model.names = ckpt['model'].names  # set class names attribute
47 |             if autoshape:
48 |                 model = model.autoshape()  # for file/URI/PIL/cv2/np inputs and NMS
49 |         device = select_device('0' if torch.cuda.is_available() else 'cpu')  # default to GPU if available
50 |         return model.to(device)
51 | 
52 |     except Exception as e:
53 |         s = 'Cache maybe be out of date, try force_reload=True.'
54 |         raise Exception(s) from e
55 | 
56 | 
57 | def custom(path_or_model='path/to/model.pt', autoshape=True):
58 |     """custom mode
59 | 
60 |     Arguments (3 options):
61 |         path_or_model (str): 'path/to/model.pt'
62 |         path_or_model (dict): torch.load('path/to/model.pt')
63 |         path_or_model (nn.Module): torch.load('path/to/model.pt')['model']
64 | 
65 |     Returns:
66 |         pytorch model
67 |     """
68 |     model = torch.load(path_or_model, map_location=torch.device('cpu')) if isinstance(path_or_model, str) else path_or_model  # load checkpoint
69 |     if isinstance(model, dict):
70 |         model = model['ema' if model.get('ema') else 'model']  # load model
71 | 
72 |     hub_model = Model(model.yaml).to(next(model.parameters()).device)  # create
73 |     hub_model.load_state_dict(model.float().state_dict())  # load state_dict
74 |     hub_model.names = model.names  # class names
75 |     if autoshape:
76 |         hub_model = hub_model.autoshape()  # for file/URI/PIL/cv2/np inputs and NMS
77 |     device = select_device('0' if torch.cuda.is_available() else 'cpu')  # default to GPU if available
78 |     return hub_model.to(device)
79 | 
80 | 
81 | def yolov7(pretrained=True, channels=3, classes=80, autoshape=True):
82 |     return create('yolov7', pretrained, channels, classes, autoshape)
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     model = custom(path_or_model='yolov7.pt')  # custom example
87 |     # model = create(name='yolov7', pretrained=True, channels=3, classes=80, autoshape=True)  # pretrained example
88 | 
89 |     # Verify inference
90 |     import numpy as np
91 |     from PIL import Image
92 | 
93 |     imgs = [np.zeros((640, 480, 3))]
94 | 
95 |     results = model(imgs)  # batched inference
96 |     results.print()
97 |     results.save()
98 | 


--------------------------------------------------------------------------------
/cfg/deploy/yolov7.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | 
  6 | # anchors
  7 | anchors:
  8 |   - [12,16, 19,36, 40,28]  # P3/8
  9 |   - [36,75, 76,55, 72,146]  # P4/16
 10 |   - [142,110, 192,243, 459,401]  # P5/32
 11 | 
 12 | # yolov7 backbone
 13 | backbone:
 14 |   # [from, number, module, args]
 15 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
 16 |   
 17 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2      
 18 |    [-1, 1, Conv, [64, 3, 1]],
 19 |    
 20 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4  
 21 |    [-1, 1, Conv, [64, 1, 1]],
 22 |    [-2, 1, Conv, [64, 1, 1]],
 23 |    [-1, 1, Conv, [64, 3, 1]],
 24 |    [-1, 1, Conv, [64, 3, 1]],
 25 |    [-1, 1, Conv, [64, 3, 1]],
 26 |    [-1, 1, Conv, [64, 3, 1]],
 27 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 28 |    [-1, 1, Conv, [256, 1, 1]],  # 11
 29 |          
 30 |    [-1, 1, MP, []],
 31 |    [-1, 1, Conv, [128, 1, 1]],
 32 |    [-3, 1, Conv, [128, 1, 1]],
 33 |    [-1, 1, Conv, [128, 3, 2]],
 34 |    [[-1, -3], 1, Concat, [1]],  # 16-P3/8  
 35 |    [-1, 1, Conv, [128, 1, 1]],
 36 |    [-2, 1, Conv, [128, 1, 1]],
 37 |    [-1, 1, Conv, [128, 3, 1]],
 38 |    [-1, 1, Conv, [128, 3, 1]],
 39 |    [-1, 1, Conv, [128, 3, 1]],
 40 |    [-1, 1, Conv, [128, 3, 1]],
 41 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 42 |    [-1, 1, Conv, [512, 1, 1]],  # 24
 43 |          
 44 |    [-1, 1, MP, []],
 45 |    [-1, 1, Conv, [256, 1, 1]],
 46 |    [-3, 1, Conv, [256, 1, 1]],
 47 |    [-1, 1, Conv, [256, 3, 2]],
 48 |    [[-1, -3], 1, Concat, [1]],  # 29-P4/16  
 49 |    [-1, 1, Conv, [256, 1, 1]],
 50 |    [-2, 1, Conv, [256, 1, 1]],
 51 |    [-1, 1, Conv, [256, 3, 1]],
 52 |    [-1, 1, Conv, [256, 3, 1]],
 53 |    [-1, 1, Conv, [256, 3, 1]],
 54 |    [-1, 1, Conv, [256, 3, 1]],
 55 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 56 |    [-1, 1, Conv, [1024, 1, 1]],  # 37
 57 |          
 58 |    [-1, 1, MP, []],
 59 |    [-1, 1, Conv, [512, 1, 1]],
 60 |    [-3, 1, Conv, [512, 1, 1]],
 61 |    [-1, 1, Conv, [512, 3, 2]],
 62 |    [[-1, -3], 1, Concat, [1]],  # 42-P5/32  
 63 |    [-1, 1, Conv, [256, 1, 1]],
 64 |    [-2, 1, Conv, [256, 1, 1]],
 65 |    [-1, 1, Conv, [256, 3, 1]],
 66 |    [-1, 1, Conv, [256, 3, 1]],
 67 |    [-1, 1, Conv, [256, 3, 1]],
 68 |    [-1, 1, Conv, [256, 3, 1]],
 69 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 70 |    [-1, 1, Conv, [1024, 1, 1]],  # 50
 71 |   ]
 72 | 
 73 | # yolov7 head
 74 | head:
 75 |   [[-1, 1, SPPCSPC, [512]], # 51
 76 |   
 77 |    [-1, 1, Conv, [256, 1, 1]],
 78 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 79 |    [37, 1, Conv, [256, 1, 1]], # route backbone P4
 80 |    [[-1, -2], 1, Concat, [1]],
 81 |    
 82 |    [-1, 1, Conv, [256, 1, 1]],
 83 |    [-2, 1, Conv, [256, 1, 1]],
 84 |    [-1, 1, Conv, [128, 3, 1]],
 85 |    [-1, 1, Conv, [128, 3, 1]],
 86 |    [-1, 1, Conv, [128, 3, 1]],
 87 |    [-1, 1, Conv, [128, 3, 1]],
 88 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
 89 |    [-1, 1, Conv, [256, 1, 1]], # 63
 90 |    
 91 |    [-1, 1, Conv, [128, 1, 1]],
 92 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 93 |    [24, 1, Conv, [128, 1, 1]], # route backbone P3
 94 |    [[-1, -2], 1, Concat, [1]],
 95 |    
 96 |    [-1, 1, Conv, [128, 1, 1]],
 97 |    [-2, 1, Conv, [128, 1, 1]],
 98 |    [-1, 1, Conv, [64, 3, 1]],
 99 |    [-1, 1, Conv, [64, 3, 1]],
100 |    [-1, 1, Conv, [64, 3, 1]],
101 |    [-1, 1, Conv, [64, 3, 1]],
102 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
103 |    [-1, 1, Conv, [128, 1, 1]], # 75
104 |       
105 |    [-1, 1, MP, []],
106 |    [-1, 1, Conv, [128, 1, 1]],
107 |    [-3, 1, Conv, [128, 1, 1]],
108 |    [-1, 1, Conv, [128, 3, 2]],
109 |    [[-1, -3, 63], 1, Concat, [1]],
110 |    
111 |    [-1, 1, Conv, [256, 1, 1]],
112 |    [-2, 1, Conv, [256, 1, 1]],
113 |    [-1, 1, Conv, [128, 3, 1]],
114 |    [-1, 1, Conv, [128, 3, 1]],
115 |    [-1, 1, Conv, [128, 3, 1]],
116 |    [-1, 1, Conv, [128, 3, 1]],
117 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
118 |    [-1, 1, Conv, [256, 1, 1]], # 88
119 |       
120 |    [-1, 1, MP, []],
121 |    [-1, 1, Conv, [256, 1, 1]],
122 |    [-3, 1, Conv, [256, 1, 1]],
123 |    [-1, 1, Conv, [256, 3, 2]],
124 |    [[-1, -3, 51], 1, Concat, [1]],
125 |    
126 |    [-1, 1, Conv, [512, 1, 1]],
127 |    [-2, 1, Conv, [512, 1, 1]],
128 |    [-1, 1, Conv, [256, 3, 1]],
129 |    [-1, 1, Conv, [256, 3, 1]],
130 |    [-1, 1, Conv, [256, 3, 1]],
131 |    [-1, 1, Conv, [256, 3, 1]],
132 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
133 |    [-1, 1, Conv, [512, 1, 1]], # 101
134 |    
135 |    [75, 1, RepConv, [256, 3, 1]],
136 |    [88, 1, RepConv, [512, 3, 1]],
137 |    [101, 1, RepConv, [1024, 3, 1]],
138 | 
139 |    [[102,103,104], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
140 |   ]
141 | 


--------------------------------------------------------------------------------
/cfg/training/yolov7.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | 
  6 | # anchors
  7 | anchors:
  8 |   - [12,16, 19,36, 40,28]  # P3/8
  9 |   - [36,75, 76,55, 72,146]  # P4/16
 10 |   - [142,110, 192,243, 459,401]  # P5/32
 11 | 
 12 | # yolov7 backbone
 13 | backbone:
 14 |   # [from, number, module, args]
 15 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
 16 |   
 17 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2      
 18 |    [-1, 1, Conv, [64, 3, 1]],
 19 |    
 20 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4  
 21 |    [-1, 1, Conv, [64, 1, 1]],
 22 |    [-2, 1, Conv, [64, 1, 1]],
 23 |    [-1, 1, Conv, [64, 3, 1]],
 24 |    [-1, 1, Conv, [64, 3, 1]],
 25 |    [-1, 1, Conv, [64, 3, 1]],
 26 |    [-1, 1, Conv, [64, 3, 1]],
 27 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 28 |    [-1, 1, Conv, [256, 1, 1]],  # 11
 29 |          
 30 |    [-1, 1, MP, []],
 31 |    [-1, 1, Conv, [128, 1, 1]],
 32 |    [-3, 1, Conv, [128, 1, 1]],
 33 |    [-1, 1, Conv, [128, 3, 2]],
 34 |    [[-1, -3], 1, Concat, [1]],  # 16-P3/8  
 35 |    [-1, 1, Conv, [128, 1, 1]],
 36 |    [-2, 1, Conv, [128, 1, 1]],
 37 |    [-1, 1, Conv, [128, 3, 1]],
 38 |    [-1, 1, Conv, [128, 3, 1]],
 39 |    [-1, 1, Conv, [128, 3, 1]],
 40 |    [-1, 1, Conv, [128, 3, 1]],
 41 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 42 |    [-1, 1, Conv, [512, 1, 1]],  # 24
 43 |          
 44 |    [-1, 1, MP, []],
 45 |    [-1, 1, Conv, [256, 1, 1]],
 46 |    [-3, 1, Conv, [256, 1, 1]],
 47 |    [-1, 1, Conv, [256, 3, 2]],
 48 |    [[-1, -3], 1, Concat, [1]],  # 29-P4/16  
 49 |    [-1, 1, Conv, [256, 1, 1]],
 50 |    [-2, 1, Conv, [256, 1, 1]],
 51 |    [-1, 1, Conv, [256, 3, 1]],
 52 |    [-1, 1, Conv, [256, 3, 1]],
 53 |    [-1, 1, Conv, [256, 3, 1]],
 54 |    [-1, 1, Conv, [256, 3, 1]],
 55 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 56 |    [-1, 1, Conv, [1024, 1, 1]],  # 37
 57 |          
 58 |    [-1, 1, MP, []],
 59 |    [-1, 1, Conv, [512, 1, 1]],
 60 |    [-3, 1, Conv, [512, 1, 1]],
 61 |    [-1, 1, Conv, [512, 3, 2]],
 62 |    [[-1, -3], 1, Concat, [1]],  # 42-P5/32  
 63 |    [-1, 1, Conv, [256, 1, 1]],
 64 |    [-2, 1, Conv, [256, 1, 1]],
 65 |    [-1, 1, Conv, [256, 3, 1]],
 66 |    [-1, 1, Conv, [256, 3, 1]],
 67 |    [-1, 1, Conv, [256, 3, 1]],
 68 |    [-1, 1, Conv, [256, 3, 1]],
 69 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 70 |    [-1, 1, Conv, [1024, 1, 1]],  # 50
 71 |   ]
 72 | 
 73 | # yolov7 head
 74 | head:
 75 |   [[-1, 1, SPPCSPC, [512]], # 51
 76 |   
 77 |    [-1, 1, Conv, [256, 1, 1]],
 78 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 79 |    [37, 1, Conv, [256, 1, 1]], # route backbone P4
 80 |    [[-1, -2], 1, Concat, [1]],
 81 |    
 82 |    [-1, 1, Conv, [256, 1, 1]],
 83 |    [-2, 1, Conv, [256, 1, 1]],
 84 |    [-1, 1, Conv, [128, 3, 1]],
 85 |    [-1, 1, Conv, [128, 3, 1]],
 86 |    [-1, 1, Conv, [128, 3, 1]],
 87 |    [-1, 1, Conv, [128, 3, 1]],
 88 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
 89 |    [-1, 1, Conv, [256, 1, 1]], # 63
 90 |    
 91 |    [-1, 1, Conv, [128, 1, 1]],
 92 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 93 |    [24, 1, Conv, [128, 1, 1]], # route backbone P3
 94 |    [[-1, -2], 1, Concat, [1]],
 95 |    
 96 |    [-1, 1, Conv, [128, 1, 1]],
 97 |    [-2, 1, Conv, [128, 1, 1]],
 98 |    [-1, 1, Conv, [64, 3, 1]],
 99 |    [-1, 1, Conv, [64, 3, 1]],
100 |    [-1, 1, Conv, [64, 3, 1]],
101 |    [-1, 1, Conv, [64, 3, 1]],
102 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
103 |    [-1, 1, Conv, [128, 1, 1]], # 75
104 |       
105 |    [-1, 1, MP, []],
106 |    [-1, 1, Conv, [128, 1, 1]],
107 |    [-3, 1, Conv, [128, 1, 1]],
108 |    [-1, 1, Conv, [128, 3, 2]],
109 |    [[-1, -3, 63], 1, Concat, [1]],
110 |    
111 |    [-1, 1, Conv, [256, 1, 1]],
112 |    [-2, 1, Conv, [256, 1, 1]],
113 |    [-1, 1, Conv, [128, 3, 1]],
114 |    [-1, 1, Conv, [128, 3, 1]],
115 |    [-1, 1, Conv, [128, 3, 1]],
116 |    [-1, 1, Conv, [128, 3, 1]],
117 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
118 |    [-1, 1, Conv, [256, 1, 1]], # 88
119 |       
120 |    [-1, 1, MP, []],
121 |    [-1, 1, Conv, [256, 1, 1]],
122 |    [-3, 1, Conv, [256, 1, 1]],
123 |    [-1, 1, Conv, [256, 3, 2]],
124 |    [[-1, -3, 51], 1, Concat, [1]],
125 |    
126 |    [-1, 1, Conv, [512, 1, 1]],
127 |    [-2, 1, Conv, [512, 1, 1]],
128 |    [-1, 1, Conv, [256, 3, 1]],
129 |    [-1, 1, Conv, [256, 3, 1]],
130 |    [-1, 1, Conv, [256, 3, 1]],
131 |    [-1, 1, Conv, [256, 3, 1]],
132 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
133 |    [-1, 1, Conv, [512, 1, 1]], # 101
134 |    
135 |    [75, 1, RepConv, [256, 3, 1]],
136 |    [88, 1, RepConv, [512, 3, 1]],
137 |    [101, 1, RepConv, [1024, 3, 1]],
138 | 
139 |    [[102,103,104], 1, IDetect, [nc, anchors]],   # Detect(P3, P4, P5)
140 |   ]
141 | 


--------------------------------------------------------------------------------
/cfg/deploy/yolov7-tiny.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | 
  6 | # anchors
  7 | anchors:
  8 |   - [10,13, 16,30, 33,23]  # P3/8
  9 |   - [30,61, 62,45, 59,119]  # P4/16
 10 |   - [116,90, 156,198, 373,326]  # P5/32
 11 | 
 12 | # yolov7-tiny backbone
 13 | backbone:
 14 |   # [from, number, module, args] c2, k=1, s=1, p=None, g=1, act=True
 15 |   [[-1, 1, Conv, [32, 3, 2, None, 1, nn.LeakyReLU(0.1)]],  # 0-P1/2  
 16 |   
 17 |    [-1, 1, Conv, [64, 3, 2, None, 1, nn.LeakyReLU(0.1)]],  # 1-P2/4    
 18 |    
 19 |    [-1, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 20 |    [-2, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 21 |    [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 22 |    [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 23 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 24 |    [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 7
 25 |    
 26 |    [-1, 1, MP, []],  # 8-P3/8
 27 |    [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 28 |    [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 29 |    [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 30 |    [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 31 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 32 |    [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 14
 33 |    
 34 |    [-1, 1, MP, []],  # 15-P4/16
 35 |    [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 36 |    [-2, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 37 |    [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 38 |    [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 39 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 40 |    [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 21
 41 |    
 42 |    [-1, 1, MP, []],  # 22-P5/32
 43 |    [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 44 |    [-2, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 45 |    [-1, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 46 |    [-1, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 47 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 48 |    [-1, 1, Conv, [512, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 28
 49 |   ]
 50 | 
 51 | # yolov7-tiny head
 52 | head:
 53 |   [[-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 54 |    [-2, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 55 |    [-1, 1, SP, [5]],
 56 |    [-2, 1, SP, [9]],
 57 |    [-3, 1, SP, [13]],
 58 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 59 |    [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 60 |    [[-1, -7], 1, Concat, [1]],
 61 |    [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 37
 62 |   
 63 |    [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 64 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 65 |    [21, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P4
 66 |    [[-1, -2], 1, Concat, [1]],
 67 |    
 68 |    [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 69 |    [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 70 |    [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 71 |    [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 72 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 73 |    [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 47
 74 |   
 75 |    [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 76 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 77 |    [14, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P3
 78 |    [[-1, -2], 1, Concat, [1]],
 79 |    
 80 |    [-1, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 81 |    [-2, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 82 |    [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 83 |    [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 84 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 85 |    [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 57
 86 |    
 87 |    [-1, 1, Conv, [128, 3, 2, None, 1, nn.LeakyReLU(0.1)]],
 88 |    [[-1, 47], 1, Concat, [1]],
 89 |    
 90 |    [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 91 |    [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 92 |    [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 93 |    [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 94 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 95 |    [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 65
 96 |    
 97 |    [-1, 1, Conv, [256, 3, 2, None, 1, nn.LeakyReLU(0.1)]],
 98 |    [[-1, 37], 1, Concat, [1]],
 99 |    
100 |    [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
101 |    [-2, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
102 |    [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
103 |    [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
104 |    [[-1, -2, -3, -4], 1, Concat, [1]],
105 |    [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 73
106 |       
107 |    [57, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
108 |    [65, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
109 |    [73, 1, Conv, [512, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
110 | 
111 |    [[74,75,76], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
112 |   ]
113 | 


--------------------------------------------------------------------------------
/cfg/training/yolov7-tiny.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | 
  6 | # anchors
  7 | anchors:
  8 |   - [10,13, 16,30, 33,23]  # P3/8
  9 |   - [30,61, 62,45, 59,119]  # P4/16
 10 |   - [116,90, 156,198, 373,326]  # P5/32
 11 | 
 12 | # yolov7-tiny backbone
 13 | backbone:
 14 |   # [from, number, module, args] c2, k=1, s=1, p=None, g=1, act=True
 15 |   [[-1, 1, Conv, [32, 3, 2, None, 1, nn.LeakyReLU(0.1)]],  # 0-P1/2  
 16 |   
 17 |    [-1, 1, Conv, [64, 3, 2, None, 1, nn.LeakyReLU(0.1)]],  # 1-P2/4    
 18 |    
 19 |    [-1, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 20 |    [-2, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 21 |    [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 22 |    [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 23 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 24 |    [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 7
 25 |    
 26 |    [-1, 1, MP, []],  # 8-P3/8
 27 |    [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 28 |    [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 29 |    [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 30 |    [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 31 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 32 |    [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 14
 33 |    
 34 |    [-1, 1, MP, []],  # 15-P4/16
 35 |    [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 36 |    [-2, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 37 |    [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 38 |    [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 39 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 40 |    [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 21
 41 |    
 42 |    [-1, 1, MP, []],  # 22-P5/32
 43 |    [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 44 |    [-2, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 45 |    [-1, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 46 |    [-1, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 47 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 48 |    [-1, 1, Conv, [512, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 28
 49 |   ]
 50 | 
 51 | # yolov7-tiny head
 52 | head:
 53 |   [[-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 54 |    [-2, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 55 |    [-1, 1, SP, [5]],
 56 |    [-2, 1, SP, [9]],
 57 |    [-3, 1, SP, [13]],
 58 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 59 |    [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 60 |    [[-1, -7], 1, Concat, [1]],
 61 |    [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 37
 62 |   
 63 |    [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 64 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 65 |    [21, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P4
 66 |    [[-1, -2], 1, Concat, [1]],
 67 |    
 68 |    [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 69 |    [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 70 |    [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 71 |    [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 72 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 73 |    [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 47
 74 |   
 75 |    [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 76 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 77 |    [14, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P3
 78 |    [[-1, -2], 1, Concat, [1]],
 79 |    
 80 |    [-1, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 81 |    [-2, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 82 |    [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 83 |    [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 84 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 85 |    [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 57
 86 |    
 87 |    [-1, 1, Conv, [128, 3, 2, None, 1, nn.LeakyReLU(0.1)]],
 88 |    [[-1, 47], 1, Concat, [1]],
 89 |    
 90 |    [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 91 |    [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
 92 |    [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 93 |    [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
 94 |    [[-1, -2, -3, -4], 1, Concat, [1]],
 95 |    [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 65
 96 |    
 97 |    [-1, 1, Conv, [256, 3, 2, None, 1, nn.LeakyReLU(0.1)]],
 98 |    [[-1, 37], 1, Concat, [1]],
 99 |    
100 |    [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
101 |    [-2, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]],
102 |    [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
103 |    [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
104 |    [[-1, -2, -3, -4], 1, Concat, [1]],
105 |    [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]],  # 73
106 |       
107 |    [57, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
108 |    [65, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
109 |    [73, 1, Conv, [512, 3, 1, None, 1, nn.LeakyReLU(0.1)]],
110 | 
111 |    [[74,75,76], 1, IDetect, [nc, anchors]],   # Detect(P3, P4, P5)
112 |   ]
113 | 


--------------------------------------------------------------------------------
/cfg/deploy/yolov7x.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | 
  6 | # anchors
  7 | anchors:
  8 |   - [12,16, 19,36, 40,28]  # P3/8
  9 |   - [36,75, 76,55, 72,146]  # P4/16
 10 |   - [142,110, 192,243, 459,401]  # P5/32
 11 | 
 12 | # yolov7x backbone
 13 | backbone:
 14 |   # [from, number, module, args]
 15 |   [[-1, 1, Conv, [40, 3, 1]],  # 0
 16 |   
 17 |    [-1, 1, Conv, [80, 3, 2]],  # 1-P1/2      
 18 |    [-1, 1, Conv, [80, 3, 1]],
 19 |    
 20 |    [-1, 1, Conv, [160, 3, 2]],  # 3-P2/4  
 21 |    [-1, 1, Conv, [64, 1, 1]],
 22 |    [-2, 1, Conv, [64, 1, 1]],
 23 |    [-1, 1, Conv, [64, 3, 1]],
 24 |    [-1, 1, Conv, [64, 3, 1]],
 25 |    [-1, 1, Conv, [64, 3, 1]],
 26 |    [-1, 1, Conv, [64, 3, 1]],
 27 |    [-1, 1, Conv, [64, 3, 1]],
 28 |    [-1, 1, Conv, [64, 3, 1]],
 29 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 30 |    [-1, 1, Conv, [320, 1, 1]],  # 13
 31 |          
 32 |    [-1, 1, MP, []],
 33 |    [-1, 1, Conv, [160, 1, 1]],
 34 |    [-3, 1, Conv, [160, 1, 1]],
 35 |    [-1, 1, Conv, [160, 3, 2]],
 36 |    [[-1, -3], 1, Concat, [1]],  # 18-P3/8  
 37 |    [-1, 1, Conv, [128, 1, 1]],
 38 |    [-2, 1, Conv, [128, 1, 1]],
 39 |    [-1, 1, Conv, [128, 3, 1]],
 40 |    [-1, 1, Conv, [128, 3, 1]],
 41 |    [-1, 1, Conv, [128, 3, 1]],
 42 |    [-1, 1, Conv, [128, 3, 1]],
 43 |    [-1, 1, Conv, [128, 3, 1]],
 44 |    [-1, 1, Conv, [128, 3, 1]],
 45 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 46 |    [-1, 1, Conv, [640, 1, 1]],  # 28
 47 |          
 48 |    [-1, 1, MP, []],
 49 |    [-1, 1, Conv, [320, 1, 1]],
 50 |    [-3, 1, Conv, [320, 1, 1]],
 51 |    [-1, 1, Conv, [320, 3, 2]],
 52 |    [[-1, -3], 1, Concat, [1]],  # 33-P4/16  
 53 |    [-1, 1, Conv, [256, 1, 1]],
 54 |    [-2, 1, Conv, [256, 1, 1]],
 55 |    [-1, 1, Conv, [256, 3, 1]],
 56 |    [-1, 1, Conv, [256, 3, 1]],
 57 |    [-1, 1, Conv, [256, 3, 1]],
 58 |    [-1, 1, Conv, [256, 3, 1]],
 59 |    [-1, 1, Conv, [256, 3, 1]],
 60 |    [-1, 1, Conv, [256, 3, 1]],
 61 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 62 |    [-1, 1, Conv, [1280, 1, 1]],  # 43
 63 |          
 64 |    [-1, 1, MP, []],
 65 |    [-1, 1, Conv, [640, 1, 1]],
 66 |    [-3, 1, Conv, [640, 1, 1]],
 67 |    [-1, 1, Conv, [640, 3, 2]],
 68 |    [[-1, -3], 1, Concat, [1]],  # 48-P5/32  
 69 |    [-1, 1, Conv, [256, 1, 1]],
 70 |    [-2, 1, Conv, [256, 1, 1]],
 71 |    [-1, 1, Conv, [256, 3, 1]],
 72 |    [-1, 1, Conv, [256, 3, 1]],
 73 |    [-1, 1, Conv, [256, 3, 1]],
 74 |    [-1, 1, Conv, [256, 3, 1]],
 75 |    [-1, 1, Conv, [256, 3, 1]],
 76 |    [-1, 1, Conv, [256, 3, 1]],
 77 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 78 |    [-1, 1, Conv, [1280, 1, 1]],  # 58
 79 |   ]
 80 | 
 81 | # yolov7x head
 82 | head:
 83 |   [[-1, 1, SPPCSPC, [640]], # 59
 84 |   
 85 |    [-1, 1, Conv, [320, 1, 1]],
 86 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 87 |    [43, 1, Conv, [320, 1, 1]], # route backbone P4
 88 |    [[-1, -2], 1, Concat, [1]],
 89 |    
 90 |    [-1, 1, Conv, [256, 1, 1]],
 91 |    [-2, 1, Conv, [256, 1, 1]],
 92 |    [-1, 1, Conv, [256, 3, 1]],
 93 |    [-1, 1, Conv, [256, 3, 1]],
 94 |    [-1, 1, Conv, [256, 3, 1]],
 95 |    [-1, 1, Conv, [256, 3, 1]],
 96 |    [-1, 1, Conv, [256, 3, 1]],
 97 |    [-1, 1, Conv, [256, 3, 1]],
 98 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 99 |    [-1, 1, Conv, [320, 1, 1]], # 73
100 |    
101 |    [-1, 1, Conv, [160, 1, 1]],
102 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
103 |    [28, 1, Conv, [160, 1, 1]], # route backbone P3
104 |    [[-1, -2], 1, Concat, [1]],
105 |    
106 |    [-1, 1, Conv, [128, 1, 1]],
107 |    [-2, 1, Conv, [128, 1, 1]],
108 |    [-1, 1, Conv, [128, 3, 1]],
109 |    [-1, 1, Conv, [128, 3, 1]],
110 |    [-1, 1, Conv, [128, 3, 1]],
111 |    [-1, 1, Conv, [128, 3, 1]],
112 |    [-1, 1, Conv, [128, 3, 1]],
113 |    [-1, 1, Conv, [128, 3, 1]],
114 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
115 |    [-1, 1, Conv, [160, 1, 1]], # 87
116 |       
117 |    [-1, 1, MP, []],
118 |    [-1, 1, Conv, [160, 1, 1]],
119 |    [-3, 1, Conv, [160, 1, 1]],
120 |    [-1, 1, Conv, [160, 3, 2]],
121 |    [[-1, -3, 73], 1, Concat, [1]],
122 |    
123 |    [-1, 1, Conv, [256, 1, 1]],
124 |    [-2, 1, Conv, [256, 1, 1]],
125 |    [-1, 1, Conv, [256, 3, 1]],
126 |    [-1, 1, Conv, [256, 3, 1]],
127 |    [-1, 1, Conv, [256, 3, 1]],
128 |    [-1, 1, Conv, [256, 3, 1]],
129 |    [-1, 1, Conv, [256, 3, 1]],
130 |    [-1, 1, Conv, [256, 3, 1]],
131 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
132 |    [-1, 1, Conv, [320, 1, 1]], # 102
133 |       
134 |    [-1, 1, MP, []],
135 |    [-1, 1, Conv, [320, 1, 1]],
136 |    [-3, 1, Conv, [320, 1, 1]],
137 |    [-1, 1, Conv, [320, 3, 2]],
138 |    [[-1, -3, 59], 1, Concat, [1]],
139 |    
140 |    [-1, 1, Conv, [512, 1, 1]],
141 |    [-2, 1, Conv, [512, 1, 1]],
142 |    [-1, 1, Conv, [512, 3, 1]],
143 |    [-1, 1, Conv, [512, 3, 1]],
144 |    [-1, 1, Conv, [512, 3, 1]],
145 |    [-1, 1, Conv, [512, 3, 1]],
146 |    [-1, 1, Conv, [512, 3, 1]],
147 |    [-1, 1, Conv, [512, 3, 1]],
148 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
149 |    [-1, 1, Conv, [640, 1, 1]], # 117
150 |    
151 |    [87, 1, Conv, [320, 3, 1]],
152 |    [102, 1, Conv, [640, 3, 1]],
153 |    [117, 1, Conv, [1280, 3, 1]],
154 | 
155 |    [[118,119,120], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
156 |   ]
157 | 


--------------------------------------------------------------------------------
/cfg/training/yolov7x.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | 
  6 | # anchors
  7 | anchors:
  8 |   - [12,16, 19,36, 40,28]  # P3/8
  9 |   - [36,75, 76,55, 72,146]  # P4/16
 10 |   - [142,110, 192,243, 459,401]  # P5/32
 11 | 
 12 | # yolov7 backbone
 13 | backbone:
 14 |   # [from, number, module, args]
 15 |   [[-1, 1, Conv, [40, 3, 1]],  # 0
 16 |   
 17 |    [-1, 1, Conv, [80, 3, 2]],  # 1-P1/2      
 18 |    [-1, 1, Conv, [80, 3, 1]],
 19 |    
 20 |    [-1, 1, Conv, [160, 3, 2]],  # 3-P2/4  
 21 |    [-1, 1, Conv, [64, 1, 1]],
 22 |    [-2, 1, Conv, [64, 1, 1]],
 23 |    [-1, 1, Conv, [64, 3, 1]],
 24 |    [-1, 1, Conv, [64, 3, 1]],
 25 |    [-1, 1, Conv, [64, 3, 1]],
 26 |    [-1, 1, Conv, [64, 3, 1]],
 27 |    [-1, 1, Conv, [64, 3, 1]],
 28 |    [-1, 1, Conv, [64, 3, 1]],
 29 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 30 |    [-1, 1, Conv, [320, 1, 1]],  # 13
 31 |          
 32 |    [-1, 1, MP, []],
 33 |    [-1, 1, Conv, [160, 1, 1]],
 34 |    [-3, 1, Conv, [160, 1, 1]],
 35 |    [-1, 1, Conv, [160, 3, 2]],
 36 |    [[-1, -3], 1, Concat, [1]],  # 18-P3/8  
 37 |    [-1, 1, Conv, [128, 1, 1]],
 38 |    [-2, 1, Conv, [128, 1, 1]],
 39 |    [-1, 1, Conv, [128, 3, 1]],
 40 |    [-1, 1, Conv, [128, 3, 1]],
 41 |    [-1, 1, Conv, [128, 3, 1]],
 42 |    [-1, 1, Conv, [128, 3, 1]],
 43 |    [-1, 1, Conv, [128, 3, 1]],
 44 |    [-1, 1, Conv, [128, 3, 1]],
 45 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 46 |    [-1, 1, Conv, [640, 1, 1]],  # 28
 47 |          
 48 |    [-1, 1, MP, []],
 49 |    [-1, 1, Conv, [320, 1, 1]],
 50 |    [-3, 1, Conv, [320, 1, 1]],
 51 |    [-1, 1, Conv, [320, 3, 2]],
 52 |    [[-1, -3], 1, Concat, [1]],  # 33-P4/16  
 53 |    [-1, 1, Conv, [256, 1, 1]],
 54 |    [-2, 1, Conv, [256, 1, 1]],
 55 |    [-1, 1, Conv, [256, 3, 1]],
 56 |    [-1, 1, Conv, [256, 3, 1]],
 57 |    [-1, 1, Conv, [256, 3, 1]],
 58 |    [-1, 1, Conv, [256, 3, 1]],
 59 |    [-1, 1, Conv, [256, 3, 1]],
 60 |    [-1, 1, Conv, [256, 3, 1]],
 61 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 62 |    [-1, 1, Conv, [1280, 1, 1]],  # 43
 63 |          
 64 |    [-1, 1, MP, []],
 65 |    [-1, 1, Conv, [640, 1, 1]],
 66 |    [-3, 1, Conv, [640, 1, 1]],
 67 |    [-1, 1, Conv, [640, 3, 2]],
 68 |    [[-1, -3], 1, Concat, [1]],  # 48-P5/32  
 69 |    [-1, 1, Conv, [256, 1, 1]],
 70 |    [-2, 1, Conv, [256, 1, 1]],
 71 |    [-1, 1, Conv, [256, 3, 1]],
 72 |    [-1, 1, Conv, [256, 3, 1]],
 73 |    [-1, 1, Conv, [256, 3, 1]],
 74 |    [-1, 1, Conv, [256, 3, 1]],
 75 |    [-1, 1, Conv, [256, 3, 1]],
 76 |    [-1, 1, Conv, [256, 3, 1]],
 77 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 78 |    [-1, 1, Conv, [1280, 1, 1]],  # 58
 79 |   ]
 80 | 
 81 | # yolov7 head
 82 | head:
 83 |   [[-1, 1, SPPCSPC, [640]], # 59
 84 |   
 85 |    [-1, 1, Conv, [320, 1, 1]],
 86 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 87 |    [43, 1, Conv, [320, 1, 1]], # route backbone P4
 88 |    [[-1, -2], 1, Concat, [1]],
 89 |    
 90 |    [-1, 1, Conv, [256, 1, 1]],
 91 |    [-2, 1, Conv, [256, 1, 1]],
 92 |    [-1, 1, Conv, [256, 3, 1]],
 93 |    [-1, 1, Conv, [256, 3, 1]],
 94 |    [-1, 1, Conv, [256, 3, 1]],
 95 |    [-1, 1, Conv, [256, 3, 1]],
 96 |    [-1, 1, Conv, [256, 3, 1]],
 97 |    [-1, 1, Conv, [256, 3, 1]],
 98 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 99 |    [-1, 1, Conv, [320, 1, 1]], # 73
100 |    
101 |    [-1, 1, Conv, [160, 1, 1]],
102 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
103 |    [28, 1, Conv, [160, 1, 1]], # route backbone P3
104 |    [[-1, -2], 1, Concat, [1]],
105 |    
106 |    [-1, 1, Conv, [128, 1, 1]],
107 |    [-2, 1, Conv, [128, 1, 1]],
108 |    [-1, 1, Conv, [128, 3, 1]],
109 |    [-1, 1, Conv, [128, 3, 1]],
110 |    [-1, 1, Conv, [128, 3, 1]],
111 |    [-1, 1, Conv, [128, 3, 1]],
112 |    [-1, 1, Conv, [128, 3, 1]],
113 |    [-1, 1, Conv, [128, 3, 1]],
114 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
115 |    [-1, 1, Conv, [160, 1, 1]], # 87
116 |       
117 |    [-1, 1, MP, []],
118 |    [-1, 1, Conv, [160, 1, 1]],
119 |    [-3, 1, Conv, [160, 1, 1]],
120 |    [-1, 1, Conv, [160, 3, 2]],
121 |    [[-1, -3, 73], 1, Concat, [1]],
122 |    
123 |    [-1, 1, Conv, [256, 1, 1]],
124 |    [-2, 1, Conv, [256, 1, 1]],
125 |    [-1, 1, Conv, [256, 3, 1]],
126 |    [-1, 1, Conv, [256, 3, 1]],
127 |    [-1, 1, Conv, [256, 3, 1]],
128 |    [-1, 1, Conv, [256, 3, 1]],
129 |    [-1, 1, Conv, [256, 3, 1]],
130 |    [-1, 1, Conv, [256, 3, 1]],
131 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
132 |    [-1, 1, Conv, [320, 1, 1]], # 102
133 |       
134 |    [-1, 1, MP, []],
135 |    [-1, 1, Conv, [320, 1, 1]],
136 |    [-3, 1, Conv, [320, 1, 1]],
137 |    [-1, 1, Conv, [320, 3, 2]],
138 |    [[-1, -3, 59], 1, Concat, [1]],
139 |    
140 |    [-1, 1, Conv, [512, 1, 1]],
141 |    [-2, 1, Conv, [512, 1, 1]],
142 |    [-1, 1, Conv, [512, 3, 1]],
143 |    [-1, 1, Conv, [512, 3, 1]],
144 |    [-1, 1, Conv, [512, 3, 1]],
145 |    [-1, 1, Conv, [512, 3, 1]],
146 |    [-1, 1, Conv, [512, 3, 1]],
147 |    [-1, 1, Conv, [512, 3, 1]],
148 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
149 |    [-1, 1, Conv, [640, 1, 1]], # 117
150 |    
151 |    [87, 1, Conv, [320, 3, 1]],
152 |    [102, 1, Conv, [640, 3, 1]],
153 |    [117, 1, Conv, [1280, 3, 1]],
154 | 
155 |    [[118,119,120], 1, IDetect, [nc, anchors]],   # Detect(P3, P4, P5)
156 |   ]
157 | 


--------------------------------------------------------------------------------
/cfg/deploy/yolov7-w6.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | 
  6 | # anchors
  7 | anchors:
  8 |   - [ 19,27,  44,40,  38,94 ]  # P3/8
  9 |   - [ 96,68,  86,152,  180,137 ]  # P4/16
 10 |   - [ 140,301,  303,264,  238,542 ]  # P5/32
 11 |   - [ 436,615,  739,380,  925,792 ]  # P6/64
 12 | 
 13 | # yolov7-w6 backbone
 14 | backbone:
 15 |   # [from, number, module, args]
 16 |   [[-1, 1, ReOrg, []],  # 0
 17 |    [-1, 1, Conv, [64, 3, 1]],  # 1-P1/2
 18 |    
 19 |    [-1, 1, Conv, [128, 3, 2]],  # 2-P2/4
 20 |    [-1, 1, Conv, [64, 1, 1]],
 21 |    [-2, 1, Conv, [64, 1, 1]],
 22 |    [-1, 1, Conv, [64, 3, 1]],
 23 |    [-1, 1, Conv, [64, 3, 1]],
 24 |    [-1, 1, Conv, [64, 3, 1]],
 25 |    [-1, 1, Conv, [64, 3, 1]],
 26 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 27 |    [-1, 1, Conv, [128, 1, 1]],  # 10
 28 |          
 29 |    [-1, 1, Conv, [256, 3, 2]],  # 11-P3/8
 30 |    [-1, 1, Conv, [128, 1, 1]],
 31 |    [-2, 1, Conv, [128, 1, 1]],
 32 |    [-1, 1, Conv, [128, 3, 1]],
 33 |    [-1, 1, Conv, [128, 3, 1]],
 34 |    [-1, 1, Conv, [128, 3, 1]],
 35 |    [-1, 1, Conv, [128, 3, 1]],
 36 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 37 |    [-1, 1, Conv, [256, 1, 1]],  # 19
 38 |          
 39 |    [-1, 1, Conv, [512, 3, 2]],  # 20-P4/16
 40 |    [-1, 1, Conv, [256, 1, 1]],
 41 |    [-2, 1, Conv, [256, 1, 1]],
 42 |    [-1, 1, Conv, [256, 3, 1]],
 43 |    [-1, 1, Conv, [256, 3, 1]],
 44 |    [-1, 1, Conv, [256, 3, 1]],
 45 |    [-1, 1, Conv, [256, 3, 1]],
 46 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 47 |    [-1, 1, Conv, [512, 1, 1]],  # 28
 48 |          
 49 |    [-1, 1, Conv, [768, 3, 2]],  # 29-P5/32
 50 |    [-1, 1, Conv, [384, 1, 1]],
 51 |    [-2, 1, Conv, [384, 1, 1]],
 52 |    [-1, 1, Conv, [384, 3, 1]],
 53 |    [-1, 1, Conv, [384, 3, 1]],
 54 |    [-1, 1, Conv, [384, 3, 1]],
 55 |    [-1, 1, Conv, [384, 3, 1]],
 56 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 57 |    [-1, 1, Conv, [768, 1, 1]],  # 37
 58 |          
 59 |    [-1, 1, Conv, [1024, 3, 2]],  # 38-P6/64
 60 |    [-1, 1, Conv, [512, 1, 1]],
 61 |    [-2, 1, Conv, [512, 1, 1]],
 62 |    [-1, 1, Conv, [512, 3, 1]],
 63 |    [-1, 1, Conv, [512, 3, 1]],
 64 |    [-1, 1, Conv, [512, 3, 1]],
 65 |    [-1, 1, Conv, [512, 3, 1]],
 66 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 67 |    [-1, 1, Conv, [1024, 1, 1]],  # 46
 68 |   ]
 69 | 
 70 | # yolov7-w6 head
 71 | head:
 72 |   [[-1, 1, SPPCSPC, [512]], # 47
 73 |   
 74 |    [-1, 1, Conv, [384, 1, 1]],
 75 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 76 |    [37, 1, Conv, [384, 1, 1]], # route backbone P5
 77 |    [[-1, -2], 1, Concat, [1]],
 78 |    
 79 |    [-1, 1, Conv, [384, 1, 1]],
 80 |    [-2, 1, Conv, [384, 1, 1]],
 81 |    [-1, 1, Conv, [192, 3, 1]],
 82 |    [-1, 1, Conv, [192, 3, 1]],
 83 |    [-1, 1, Conv, [192, 3, 1]],
 84 |    [-1, 1, Conv, [192, 3, 1]],
 85 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
 86 |    [-1, 1, Conv, [384, 1, 1]], # 59
 87 |   
 88 |    [-1, 1, Conv, [256, 1, 1]],
 89 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 90 |    [28, 1, Conv, [256, 1, 1]], # route backbone P4
 91 |    [[-1, -2], 1, Concat, [1]],
 92 |    
 93 |    [-1, 1, Conv, [256, 1, 1]],
 94 |    [-2, 1, Conv, [256, 1, 1]],
 95 |    [-1, 1, Conv, [128, 3, 1]],
 96 |    [-1, 1, Conv, [128, 3, 1]],
 97 |    [-1, 1, Conv, [128, 3, 1]],
 98 |    [-1, 1, Conv, [128, 3, 1]],
 99 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
100 |    [-1, 1, Conv, [256, 1, 1]], # 71
101 |    
102 |    [-1, 1, Conv, [128, 1, 1]],
103 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
104 |    [19, 1, Conv, [128, 1, 1]], # route backbone P3
105 |    [[-1, -2], 1, Concat, [1]],
106 |    
107 |    [-1, 1, Conv, [128, 1, 1]],
108 |    [-2, 1, Conv, [128, 1, 1]],
109 |    [-1, 1, Conv, [64, 3, 1]],
110 |    [-1, 1, Conv, [64, 3, 1]],
111 |    [-1, 1, Conv, [64, 3, 1]],
112 |    [-1, 1, Conv, [64, 3, 1]],
113 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
114 |    [-1, 1, Conv, [128, 1, 1]], # 83
115 |       
116 |    [-1, 1, Conv, [256, 3, 2]],
117 |    [[-1, 71], 1, Concat, [1]],  # cat
118 |    
119 |    [-1, 1, Conv, [256, 1, 1]],
120 |    [-2, 1, Conv, [256, 1, 1]],
121 |    [-1, 1, Conv, [128, 3, 1]],
122 |    [-1, 1, Conv, [128, 3, 1]],
123 |    [-1, 1, Conv, [128, 3, 1]],
124 |    [-1, 1, Conv, [128, 3, 1]],
125 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
126 |    [-1, 1, Conv, [256, 1, 1]], # 93
127 |       
128 |    [-1, 1, Conv, [384, 3, 2]],
129 |    [[-1, 59], 1, Concat, [1]],  # cat
130 |    
131 |    [-1, 1, Conv, [384, 1, 1]],
132 |    [-2, 1, Conv, [384, 1, 1]],
133 |    [-1, 1, Conv, [192, 3, 1]],
134 |    [-1, 1, Conv, [192, 3, 1]],
135 |    [-1, 1, Conv, [192, 3, 1]],
136 |    [-1, 1, Conv, [192, 3, 1]],
137 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
138 |    [-1, 1, Conv, [384, 1, 1]], # 103
139 |       
140 |    [-1, 1, Conv, [512, 3, 2]],
141 |    [[-1, 47], 1, Concat, [1]],  # cat
142 |    
143 |    [-1, 1, Conv, [512, 1, 1]],
144 |    [-2, 1, Conv, [512, 1, 1]],
145 |    [-1, 1, Conv, [256, 3, 1]],
146 |    [-1, 1, Conv, [256, 3, 1]],
147 |    [-1, 1, Conv, [256, 3, 1]],
148 |    [-1, 1, Conv, [256, 3, 1]],
149 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
150 |    [-1, 1, Conv, [512, 1, 1]], # 113
151 |    
152 |    [83, 1, Conv, [256, 3, 1]],
153 |    [93, 1, Conv, [512, 3, 1]],
154 |    [103, 1, Conv, [768, 3, 1]],
155 |    [113, 1, Conv, [1024, 3, 1]],
156 | 
157 |    [[114,115,116,117], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5, P6)
158 |   ]
159 | 


--------------------------------------------------------------------------------
/utils/google_utils.py:
--------------------------------------------------------------------------------
  1 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries
  2 | 
  3 | import os
  4 | import platform
  5 | import subprocess
  6 | import time
  7 | from pathlib import Path
  8 | 
  9 | import requests
 10 | import torch
 11 | 
 12 | 
 13 | def gsutil_getsize(url=''):
 14 |     # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
 15 |     s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8')
 16 |     return eval(s.split(' ')[0]) if len(s) else 0  # bytes
 17 | 
 18 | 
 19 | def attempt_download(file, repo='WongKinYiu/yolov7'):
 20 |     # Attempt file download if does not exist
 21 |     file = Path(str(file).strip().replace("'", '').lower())
 22 | 
 23 |     if not file.exists():
 24 |         try:
 25 |             response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json()  # github api
 26 |             assets = [x['name'] for x in response['assets']]  # release assets
 27 |             tag = response['tag_name']  # i.e. 'v1.0'
 28 |         except:  # fallback plan
 29 |             assets = ['yolov7.pt', 'yolov7-tiny.pt', 'yolov7x.pt', 'yolov7-d6.pt', 'yolov7-e6.pt', 
 30 |                       'yolov7-e6e.pt', 'yolov7-w6.pt']
 31 |             tag = subprocess.check_output('git tag', shell=True).decode().split()[-1]
 32 | 
 33 |         name = file.name
 34 |         if name in assets:
 35 |             msg = f'{file} missing, try downloading from https://github.com/{repo}/releases/'
 36 |             redundant = False  # second download option
 37 |             try:  # GitHub
 38 |                 url = f'https://github.com/{repo}/releases/download/{tag}/{name}'
 39 |                 print(f'Downloading {url} to {file}...')
 40 |                 torch.hub.download_url_to_file(url, file)
 41 |                 assert file.exists() and file.stat().st_size > 1E6  # check
 42 |             except Exception as e:  # GCP
 43 |                 print(f'Download error: {e}')
 44 |                 assert redundant, 'No secondary mirror'
 45 |                 url = f'https://storage.googleapis.com/{repo}/ckpt/{name}'
 46 |                 print(f'Downloading {url} to {file}...')
 47 |                 os.system(f'curl -L {url} -o {file}')  # torch.hub.download_url_to_file(url, weights)
 48 |             finally:
 49 |                 if not file.exists() or file.stat().st_size < 1E6:  # check
 50 |                     file.unlink(missing_ok=True)  # remove partial downloads
 51 |                     print(f'ERROR: Download failure: {msg}')
 52 |                 print('')
 53 |                 return
 54 | 
 55 | 
 56 | def gdrive_download(id='', file='tmp.zip'):
 57 |     # Downloads a file from Google Drive. from yolov7.utils.google_utils import *; gdrive_download()
 58 |     t = time.time()
 59 |     file = Path(file)
 60 |     cookie = Path('cookie')  # gdrive cookie
 61 |     print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
 62 |     file.unlink(missing_ok=True)  # remove existing file
 63 |     cookie.unlink(missing_ok=True)  # remove existing cookie
 64 | 
 65 |     # Attempt file download
 66 |     out = "NUL" if platform.system() == "Windows" else "/dev/null"
 67 |     os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
 68 |     if os.path.exists('cookie'):  # large file
 69 |         s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
 70 |     else:  # small file
 71 |         s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
 72 |     r = os.system(s)  # execute, capture return
 73 |     cookie.unlink(missing_ok=True)  # remove existing cookie
 74 | 
 75 |     # Error check
 76 |     if r != 0:
 77 |         file.unlink(missing_ok=True)  # remove partial
 78 |         print('Download error ')  # raise Exception('Download error')
 79 |         return r
 80 | 
 81 |     # Unzip if archive
 82 |     if file.suffix == '.zip':
 83 |         print('unzipping... ', end='')
 84 |         os.system(f'unzip -q {file}')  # unzip
 85 |         file.unlink()  # remove zip to free space
 86 | 
 87 |     print(f'Done ({time.time() - t:.1f}s)')
 88 |     return r
 89 | 
 90 | 
 91 | def get_token(cookie="./cookie"):
 92 |     with open(cookie) as f:
 93 |         for line in f:
 94 |             if "download" in line:
 95 |                 return line.split()[-1]
 96 |     return ""
 97 | 
 98 | # def upload_blob(bucket_name, source_file_name, destination_blob_name):
 99 | #     # Uploads a file to a bucket
100 | #     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
101 | #
102 | #     storage_client = storage.Client()
103 | #     bucket = storage_client.get_bucket(bucket_name)
104 | #     blob = bucket.blob(destination_blob_name)
105 | #
106 | #     blob.upload_from_filename(source_file_name)
107 | #
108 | #     print('File {} uploaded to {}.'.format(
109 | #         source_file_name,
110 | #         destination_blob_name))
111 | #
112 | #
113 | # def download_blob(bucket_name, source_blob_name, destination_file_name):
114 | #     # Uploads a blob from a bucket
115 | #     storage_client = storage.Client()
116 | #     bucket = storage_client.get_bucket(bucket_name)
117 | #     blob = bucket.blob(source_blob_name)
118 | #
119 | #     blob.download_to_filename(destination_file_name)
120 | #
121 | #     print('Blob {} downloaded to {}.'.format(
122 | #         source_blob_name,
123 | #         destination_file_name))
124 | 


--------------------------------------------------------------------------------
/cfg/training/yolov7-w6.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | 
  6 | # anchors
  7 | anchors:
  8 |   - [ 19,27,  44,40,  38,94 ]  # P3/8
  9 |   - [ 96,68,  86,152,  180,137 ]  # P4/16
 10 |   - [ 140,301,  303,264,  238,542 ]  # P5/32
 11 |   - [ 436,615,  739,380,  925,792 ]  # P6/64
 12 | 
 13 | # yolov7 backbone
 14 | backbone:
 15 |   # [from, number, module, args]
 16 |   [[-1, 1, ReOrg, []],  # 0
 17 |    [-1, 1, Conv, [64, 3, 1]],  # 1-P1/2
 18 |    
 19 |    [-1, 1, Conv, [128, 3, 2]],  # 2-P2/4
 20 |    [-1, 1, Conv, [64, 1, 1]],
 21 |    [-2, 1, Conv, [64, 1, 1]],
 22 |    [-1, 1, Conv, [64, 3, 1]],
 23 |    [-1, 1, Conv, [64, 3, 1]],
 24 |    [-1, 1, Conv, [64, 3, 1]],
 25 |    [-1, 1, Conv, [64, 3, 1]],
 26 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 27 |    [-1, 1, Conv, [128, 1, 1]],  # 10
 28 |          
 29 |    [-1, 1, Conv, [256, 3, 2]],  # 11-P3/8
 30 |    [-1, 1, Conv, [128, 1, 1]],
 31 |    [-2, 1, Conv, [128, 1, 1]],
 32 |    [-1, 1, Conv, [128, 3, 1]],
 33 |    [-1, 1, Conv, [128, 3, 1]],
 34 |    [-1, 1, Conv, [128, 3, 1]],
 35 |    [-1, 1, Conv, [128, 3, 1]],
 36 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 37 |    [-1, 1, Conv, [256, 1, 1]],  # 19
 38 |          
 39 |    [-1, 1, Conv, [512, 3, 2]],  # 20-P4/16
 40 |    [-1, 1, Conv, [256, 1, 1]],
 41 |    [-2, 1, Conv, [256, 1, 1]],
 42 |    [-1, 1, Conv, [256, 3, 1]],
 43 |    [-1, 1, Conv, [256, 3, 1]],
 44 |    [-1, 1, Conv, [256, 3, 1]],
 45 |    [-1, 1, Conv, [256, 3, 1]],
 46 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 47 |    [-1, 1, Conv, [512, 1, 1]],  # 28
 48 |          
 49 |    [-1, 1, Conv, [768, 3, 2]],  # 29-P5/32
 50 |    [-1, 1, Conv, [384, 1, 1]],
 51 |    [-2, 1, Conv, [384, 1, 1]],
 52 |    [-1, 1, Conv, [384, 3, 1]],
 53 |    [-1, 1, Conv, [384, 3, 1]],
 54 |    [-1, 1, Conv, [384, 3, 1]],
 55 |    [-1, 1, Conv, [384, 3, 1]],
 56 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 57 |    [-1, 1, Conv, [768, 1, 1]],  # 37
 58 |          
 59 |    [-1, 1, Conv, [1024, 3, 2]],  # 38-P6/64
 60 |    [-1, 1, Conv, [512, 1, 1]],
 61 |    [-2, 1, Conv, [512, 1, 1]],
 62 |    [-1, 1, Conv, [512, 3, 1]],
 63 |    [-1, 1, Conv, [512, 3, 1]],
 64 |    [-1, 1, Conv, [512, 3, 1]],
 65 |    [-1, 1, Conv, [512, 3, 1]],
 66 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 67 |    [-1, 1, Conv, [1024, 1, 1]],  # 46
 68 |   ]
 69 | 
 70 | # yolov7 head
 71 | head:
 72 |   [[-1, 1, SPPCSPC, [512]], # 47
 73 |   
 74 |    [-1, 1, Conv, [384, 1, 1]],
 75 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 76 |    [37, 1, Conv, [384, 1, 1]], # route backbone P5
 77 |    [[-1, -2], 1, Concat, [1]],
 78 |    
 79 |    [-1, 1, Conv, [384, 1, 1]],
 80 |    [-2, 1, Conv, [384, 1, 1]],
 81 |    [-1, 1, Conv, [192, 3, 1]],
 82 |    [-1, 1, Conv, [192, 3, 1]],
 83 |    [-1, 1, Conv, [192, 3, 1]],
 84 |    [-1, 1, Conv, [192, 3, 1]],
 85 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
 86 |    [-1, 1, Conv, [384, 1, 1]], # 59
 87 |   
 88 |    [-1, 1, Conv, [256, 1, 1]],
 89 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 90 |    [28, 1, Conv, [256, 1, 1]], # route backbone P4
 91 |    [[-1, -2], 1, Concat, [1]],
 92 |    
 93 |    [-1, 1, Conv, [256, 1, 1]],
 94 |    [-2, 1, Conv, [256, 1, 1]],
 95 |    [-1, 1, Conv, [128, 3, 1]],
 96 |    [-1, 1, Conv, [128, 3, 1]],
 97 |    [-1, 1, Conv, [128, 3, 1]],
 98 |    [-1, 1, Conv, [128, 3, 1]],
 99 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
100 |    [-1, 1, Conv, [256, 1, 1]], # 71
101 |    
102 |    [-1, 1, Conv, [128, 1, 1]],
103 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
104 |    [19, 1, Conv, [128, 1, 1]], # route backbone P3
105 |    [[-1, -2], 1, Concat, [1]],
106 |    
107 |    [-1, 1, Conv, [128, 1, 1]],
108 |    [-2, 1, Conv, [128, 1, 1]],
109 |    [-1, 1, Conv, [64, 3, 1]],
110 |    [-1, 1, Conv, [64, 3, 1]],
111 |    [-1, 1, Conv, [64, 3, 1]],
112 |    [-1, 1, Conv, [64, 3, 1]],
113 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
114 |    [-1, 1, Conv, [128, 1, 1]], # 83
115 |       
116 |    [-1, 1, Conv, [256, 3, 2]],
117 |    [[-1, 71], 1, Concat, [1]],  # cat
118 |    
119 |    [-1, 1, Conv, [256, 1, 1]],
120 |    [-2, 1, Conv, [256, 1, 1]],
121 |    [-1, 1, Conv, [128, 3, 1]],
122 |    [-1, 1, Conv, [128, 3, 1]],
123 |    [-1, 1, Conv, [128, 3, 1]],
124 |    [-1, 1, Conv, [128, 3, 1]],
125 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
126 |    [-1, 1, Conv, [256, 1, 1]], # 93
127 |       
128 |    [-1, 1, Conv, [384, 3, 2]],
129 |    [[-1, 59], 1, Concat, [1]],  # cat
130 |    
131 |    [-1, 1, Conv, [384, 1, 1]],
132 |    [-2, 1, Conv, [384, 1, 1]],
133 |    [-1, 1, Conv, [192, 3, 1]],
134 |    [-1, 1, Conv, [192, 3, 1]],
135 |    [-1, 1, Conv, [192, 3, 1]],
136 |    [-1, 1, Conv, [192, 3, 1]],
137 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
138 |    [-1, 1, Conv, [384, 1, 1]], # 103
139 |       
140 |    [-1, 1, Conv, [512, 3, 2]],
141 |    [[-1, 47], 1, Concat, [1]],  # cat
142 |    
143 |    [-1, 1, Conv, [512, 1, 1]],
144 |    [-2, 1, Conv, [512, 1, 1]],
145 |    [-1, 1, Conv, [256, 3, 1]],
146 |    [-1, 1, Conv, [256, 3, 1]],
147 |    [-1, 1, Conv, [256, 3, 1]],
148 |    [-1, 1, Conv, [256, 3, 1]],
149 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
150 |    [-1, 1, Conv, [512, 1, 1]], # 113
151 |    
152 |    [83, 1, Conv, [256, 3, 1]],
153 |    [93, 1, Conv, [512, 3, 1]],
154 |    [103, 1, Conv, [768, 3, 1]],
155 |    [113, 1, Conv, [1024, 3, 1]],
156 |    
157 |    [83, 1, Conv, [320, 3, 1]],
158 |    [71, 1, Conv, [640, 3, 1]],
159 |    [59, 1, Conv, [960, 3, 1]],
160 |    [47, 1, Conv, [1280, 3, 1]],
161 | 
162 |    [[114,115,116,117,118,119,120,121], 1, IAuxDetect, [nc, anchors]],   # Detect(P3, P4, P5, P6)
163 |   ]
164 | 


--------------------------------------------------------------------------------
/cfg/yolov7-mask.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | pooler_scale: 0.25
  6 | 
  7 | # anchors
  8 | anchors:
  9 |   - [12,16, 19,36, 40,28]  # P3/8
 10 |   - [36,75, 76,55, 72,146]  # P4/16
 11 |   - [142,110, 192,243, 459,401]  # P5/32
 12 | 
 13 | # yolov7 backbone
 14 | backbone:
 15 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
 16 |   
 17 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2      
 18 |    [-1, 1, Conv, [64, 3, 1]],
 19 |    
 20 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4  
 21 |    [-1, 1, Conv, [64, 1, 1]],
 22 |    [-2, 1, Conv, [64, 1, 1]],
 23 |    [-1, 1, Conv, [64, 3, 1]],
 24 |    [-1, 1, Conv, [64, 3, 1]],
 25 |    [-1, 1, Conv, [64, 3, 1]],
 26 |    [-1, 1, Conv, [64, 3, 1]],
 27 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 28 |    [-1, 1, Conv, [256, 1, 1]],  # 11
 29 |          
 30 |    [-1, 1, MP, []],
 31 |    [-1, 1, Conv, [128, 1, 1]],
 32 |    [-3, 1, Conv, [128, 1, 1]],
 33 |    [-1, 1, Conv, [128, 3, 2]],
 34 |    [[-1, -3], 1, Concat, [1]],  # 16-P3/8  
 35 |    [-1, 1, Conv, [128, 1, 1]],
 36 |    [-2, 1, Conv, [128, 1, 1]],
 37 |    [-1, 1, Conv, [128, 3, 1]],
 38 |    [-1, 1, Conv, [128, 3, 1]],
 39 |    [-1, 1, Conv, [128, 3, 1]],
 40 |    [-1, 1, Conv, [128, 3, 1]],
 41 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 42 |    [-1, 1, Conv, [512, 1, 1]],  # 24
 43 |          
 44 |    [-1, 1, MP, []],
 45 |    [-1, 1, Conv, [256, 1, 1]],
 46 |    [-3, 1, Conv, [256, 1, 1]],
 47 |    [-1, 1, Conv, [256, 3, 2]],
 48 |    [[-1, -3], 1, Concat, [1]],  # 29-P4/16  
 49 |    [-1, 1, Conv, [256, 1, 1]],
 50 |    [-2, 1, Conv, [256, 1, 1]],
 51 |    [-1, 1, Conv, [256, 3, 1]],
 52 |    [-1, 1, Conv, [256, 3, 1]],
 53 |    [-1, 1, Conv, [256, 3, 1]],
 54 |    [-1, 1, Conv, [256, 3, 1]],
 55 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 56 |    [-1, 1, Conv, [1024, 1, 1]],  # 37
 57 |          
 58 |    [-1, 1, MP, []],
 59 |    [-1, 1, Conv, [512, 1, 1]],
 60 |    [-3, 1, Conv, [512, 1, 1]],
 61 |    [-1, 1, Conv, [512, 3, 2]],
 62 |    [[-1, -3], 1, Concat, [1]],  # 42-P5/32  
 63 |    [-1, 1, Conv, [256, 1, 1]],
 64 |    [-2, 1, Conv, [256, 1, 1]],
 65 |    [-1, 1, Conv, [256, 3, 1]],
 66 |    [-1, 1, Conv, [256, 3, 1]],
 67 |    [-1, 1, Conv, [256, 3, 1]],
 68 |    [-1, 1, Conv, [256, 3, 1]],
 69 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 70 |    [-1, 1, Conv, [1024, 1, 1]],  # 50
 71 |   ]
 72 | 
 73 | # yolov7 head
 74 | head:
 75 |   [[-1, 1, SPPCSPC, [512]], # 51
 76 |   
 77 |    [-1, 1, Conv, [256, 1, 1]],
 78 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 79 |    [37, 1, Conv, [256, 1, 1]], # route backbone P4
 80 |    [[-1, -2], 1, Concat, [1]],
 81 |    
 82 |    [-1, 1, Conv, [256, 1, 1]],
 83 |    [-2, 1, Conv, [256, 1, 1]],
 84 |    [-1, 1, Conv, [128, 3, 1]],
 85 |    [-1, 1, Conv, [128, 3, 1]],
 86 |    [-1, 1, Conv, [128, 3, 1]],
 87 |    [-1, 1, Conv, [128, 3, 1]],
 88 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
 89 |    [-1, 1, Conv, [256, 1, 1]], # 63
 90 |    
 91 |    [-1, 1, Conv, [128, 1, 1]],
 92 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 93 |    [24, 1, Conv, [128, 1, 1]], # route backbone P3
 94 |    [[-1, -2], 1, Concat, [1]],
 95 |    
 96 |    [-1, 1, Conv, [128, 1, 1]],
 97 |    [-2, 1, Conv, [128, 1, 1]],
 98 |    [-1, 1, Conv, [64, 3, 1]],
 99 |    [-1, 1, Conv, [64, 3, 1]],
100 |    [-1, 1, Conv, [64, 3, 1]],
101 |    [-1, 1, Conv, [64, 3, 1]],
102 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
103 |    [-1, 1, Conv, [128, 1, 1]], # 75
104 |       
105 |    [-1, 1, MP, []],
106 |    [-1, 1, Conv, [128, 1, 1]],
107 |    [-3, 1, Conv, [128, 1, 1]],
108 |    [-1, 1, Conv, [128, 3, 2]],
109 |    [[-1, -3, 63], 1, Concat, [1]],
110 |    
111 |    [-1, 1, Conv, [256, 1, 1]],
112 |    [-2, 1, Conv, [256, 1, 1]],
113 |    [-1, 1, Conv, [128, 3, 1]],
114 |    [-1, 1, Conv, [128, 3, 1]],
115 |    [-1, 1, Conv, [128, 3, 1]],
116 |    [-1, 1, Conv, [128, 3, 1]],
117 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
118 |    [-1, 1, Conv, [256, 1, 1]], # 88
119 |       
120 |    [-1, 1, MP, []],
121 |    [-1, 1, Conv, [256, 1, 1]],
122 |    [-3, 1, Conv, [256, 1, 1]],
123 |    [-1, 1, Conv, [256, 3, 2]],
124 |    [[-1, -3, 51], 1, Concat, [1]],
125 |    
126 |    [-1, 1, Conv, [512, 1, 1]],
127 |    [-2, 1, Conv, [512, 1, 1]],
128 |    [-1, 1, Conv, [256, 3, 1]],
129 |    [-1, 1, Conv, [256, 3, 1]],
130 |    [-1, 1, Conv, [256, 3, 1]],
131 |    [-1, 1, Conv, [256, 3, 1]],
132 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
133 |    [-1, 1, Conv, [512, 1, 1]], # 101
134 |    
135 |    [75, 1, Conv, [256, 3, 1] ],
136 |    [88, 1, Conv, [512, 3, 1] ],
137 |    [101, 1, Conv, [1024, 3, 1]],
138 |    
139 |    [[102, 103, 104], 1, Merge, [[256, 512, 1024]]],  # 105
140 |    
141 |    [75, 1, Conv, [64, 1, 1] ],
142 |    [-1, 1, nn.Upsample, [None, 2, 'nearest'] ],
143 |    [11, 1, Conv, [64, 1, 1] ], # route backbone P2
144 |    [[-1, -2], 1, Concat, [1] ],
145 |    [-1, 2, BottleneckCSPB, [64] ], 
146 |    [-1, 1, Conv, [64, 1, 1] ], # 111
147 | 
148 |    [[75, 88, 101], 1, Refine, [128, 3, 1]],
149 |    [-1, 1, Conv, [128, 3, 1]],
150 |    [-1, 1, nn.Upsample, [None, 2, 'bilinear']],
151 |    [-1, 1, Conv, [128, 3, 1]],
152 |    [111, 1, Conv, [128, 3, 1]],
153 |    [[-1, -2], 1, Shortcut, [1]],
154 |    [-1, 1, Conv, [128, 3, 1]],
155 |    [-1, 1, nn.Conv2d, [4, 1]],  # 119
156 |   
157 |    [[75, 63, 51], 1, Refine, [128, 3, 1]],
158 |    [-1, 1, Conv, [128, 3, 1]],
159 |    [-1, 1, nn.Upsample, [None, 2, 'bilinear']],
160 |    [-1, 1, Conv, [128, 3, 1]],
161 |    [111, 1, Conv, [128, 3, 1]],
162 |    [[-1, -2], 1, Shortcut, [1]],
163 |    [-1, 1, Conv, [128, 3, 1]],
164 |    [-1, 1, nn.Conv2d, [1, 1]],  # 127
165 |    
166 |    [[105, 119, 127], 1, MT, [nc, anchors, 980]],   # Detect(P3, P4, P5)
167 |   ]
168 |   
169 | 


--------------------------------------------------------------------------------
/cfg/deploy/yolov7-e6.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | 
  6 | # anchors
  7 | anchors:
  8 |   - [ 19,27,  44,40,  38,94 ]  # P3/8
  9 |   - [ 96,68,  86,152,  180,137 ]  # P4/16
 10 |   - [ 140,301,  303,264,  238,542 ]  # P5/32
 11 |   - [ 436,615,  739,380,  925,792 ]  # P6/64
 12 | 
 13 | # yolov7-e6 backbone
 14 | backbone:
 15 |   # [from, number, module, args],
 16 |   [[-1, 1, ReOrg, []],  # 0
 17 |    [-1, 1, Conv, [80, 3, 1]],  # 1-P1/2
 18 |    
 19 |    [-1, 1, DownC, [160]],  # 2-P2/4  
 20 |    [-1, 1, Conv, [64, 1, 1]],
 21 |    [-2, 1, Conv, [64, 1, 1]],
 22 |    [-1, 1, Conv, [64, 3, 1]],
 23 |    [-1, 1, Conv, [64, 3, 1]],
 24 |    [-1, 1, Conv, [64, 3, 1]],
 25 |    [-1, 1, Conv, [64, 3, 1]],
 26 |    [-1, 1, Conv, [64, 3, 1]],
 27 |    [-1, 1, Conv, [64, 3, 1]],
 28 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 29 |    [-1, 1, Conv, [160, 1, 1]],  # 12
 30 |          
 31 |    [-1, 1, DownC, [320]],  # 13-P3/8  
 32 |    [-1, 1, Conv, [128, 1, 1]],
 33 |    [-2, 1, Conv, [128, 1, 1]],
 34 |    [-1, 1, Conv, [128, 3, 1]],
 35 |    [-1, 1, Conv, [128, 3, 1]],
 36 |    [-1, 1, Conv, [128, 3, 1]],
 37 |    [-1, 1, Conv, [128, 3, 1]],
 38 |    [-1, 1, Conv, [128, 3, 1]],
 39 |    [-1, 1, Conv, [128, 3, 1]],
 40 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 41 |    [-1, 1, Conv, [320, 1, 1]],  # 23
 42 |          
 43 |    [-1, 1, DownC, [640]],  # 24-P4/16  
 44 |    [-1, 1, Conv, [256, 1, 1]],
 45 |    [-2, 1, Conv, [256, 1, 1]],
 46 |    [-1, 1, Conv, [256, 3, 1]],
 47 |    [-1, 1, Conv, [256, 3, 1]],
 48 |    [-1, 1, Conv, [256, 3, 1]],
 49 |    [-1, 1, Conv, [256, 3, 1]],
 50 |    [-1, 1, Conv, [256, 3, 1]],
 51 |    [-1, 1, Conv, [256, 3, 1]],
 52 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 53 |    [-1, 1, Conv, [640, 1, 1]],  # 34
 54 |          
 55 |    [-1, 1, DownC, [960]],  # 35-P5/32  
 56 |    [-1, 1, Conv, [384, 1, 1]],
 57 |    [-2, 1, Conv, [384, 1, 1]],
 58 |    [-1, 1, Conv, [384, 3, 1]],
 59 |    [-1, 1, Conv, [384, 3, 1]],
 60 |    [-1, 1, Conv, [384, 3, 1]],
 61 |    [-1, 1, Conv, [384, 3, 1]],
 62 |    [-1, 1, Conv, [384, 3, 1]],
 63 |    [-1, 1, Conv, [384, 3, 1]],
 64 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 65 |    [-1, 1, Conv, [960, 1, 1]],  # 45
 66 |          
 67 |    [-1, 1, DownC, [1280]],  # 46-P6/64  
 68 |    [-1, 1, Conv, [512, 1, 1]],
 69 |    [-2, 1, Conv, [512, 1, 1]],
 70 |    [-1, 1, Conv, [512, 3, 1]],
 71 |    [-1, 1, Conv, [512, 3, 1]],
 72 |    [-1, 1, Conv, [512, 3, 1]],
 73 |    [-1, 1, Conv, [512, 3, 1]],
 74 |    [-1, 1, Conv, [512, 3, 1]],
 75 |    [-1, 1, Conv, [512, 3, 1]],
 76 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 77 |    [-1, 1, Conv, [1280, 1, 1]],  # 56
 78 |   ]
 79 | 
 80 | # yolov7-e6 head
 81 | head:
 82 |   [[-1, 1, SPPCSPC, [640]], # 57
 83 |   
 84 |    [-1, 1, Conv, [480, 1, 1]],
 85 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 86 |    [45, 1, Conv, [480, 1, 1]], # route backbone P5
 87 |    [[-1, -2], 1, Concat, [1]],
 88 |    
 89 |    [-1, 1, Conv, [384, 1, 1]],
 90 |    [-2, 1, Conv, [384, 1, 1]],
 91 |    [-1, 1, Conv, [192, 3, 1]],
 92 |    [-1, 1, Conv, [192, 3, 1]],
 93 |    [-1, 1, Conv, [192, 3, 1]],
 94 |    [-1, 1, Conv, [192, 3, 1]],
 95 |    [-1, 1, Conv, [192, 3, 1]],
 96 |    [-1, 1, Conv, [192, 3, 1]],
 97 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
 98 |    [-1, 1, Conv, [480, 1, 1]], # 71
 99 |   
100 |    [-1, 1, Conv, [320, 1, 1]],
101 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
102 |    [34, 1, Conv, [320, 1, 1]], # route backbone P4
103 |    [[-1, -2], 1, Concat, [1]],
104 |    
105 |    [-1, 1, Conv, [256, 1, 1]],
106 |    [-2, 1, Conv, [256, 1, 1]],
107 |    [-1, 1, Conv, [128, 3, 1]],
108 |    [-1, 1, Conv, [128, 3, 1]],
109 |    [-1, 1, Conv, [128, 3, 1]],
110 |    [-1, 1, Conv, [128, 3, 1]],
111 |    [-1, 1, Conv, [128, 3, 1]],
112 |    [-1, 1, Conv, [128, 3, 1]],
113 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
114 |    [-1, 1, Conv, [320, 1, 1]], # 85
115 |    
116 |    [-1, 1, Conv, [160, 1, 1]],
117 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
118 |    [23, 1, Conv, [160, 1, 1]], # route backbone P3
119 |    [[-1, -2], 1, Concat, [1]],
120 |    
121 |    [-1, 1, Conv, [128, 1, 1]],
122 |    [-2, 1, Conv, [128, 1, 1]],
123 |    [-1, 1, Conv, [64, 3, 1]],
124 |    [-1, 1, Conv, [64, 3, 1]],
125 |    [-1, 1, Conv, [64, 3, 1]],
126 |    [-1, 1, Conv, [64, 3, 1]],
127 |    [-1, 1, Conv, [64, 3, 1]],
128 |    [-1, 1, Conv, [64, 3, 1]],
129 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
130 |    [-1, 1, Conv, [160, 1, 1]], # 99
131 |       
132 |    [-1, 1, DownC, [320]],
133 |    [[-1, 85], 1, Concat, [1]],
134 |    
135 |    [-1, 1, Conv, [256, 1, 1]],
136 |    [-2, 1, Conv, [256, 1, 1]],
137 |    [-1, 1, Conv, [128, 3, 1]],
138 |    [-1, 1, Conv, [128, 3, 1]],
139 |    [-1, 1, Conv, [128, 3, 1]],
140 |    [-1, 1, Conv, [128, 3, 1]],
141 |    [-1, 1, Conv, [128, 3, 1]],
142 |    [-1, 1, Conv, [128, 3, 1]],
143 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
144 |    [-1, 1, Conv, [320, 1, 1]], # 111
145 |       
146 |    [-1, 1, DownC, [480]],
147 |    [[-1, 71], 1, Concat, [1]],
148 |    
149 |    [-1, 1, Conv, [384, 1, 1]],
150 |    [-2, 1, Conv, [384, 1, 1]],
151 |    [-1, 1, Conv, [192, 3, 1]],
152 |    [-1, 1, Conv, [192, 3, 1]],
153 |    [-1, 1, Conv, [192, 3, 1]],
154 |    [-1, 1, Conv, [192, 3, 1]],
155 |    [-1, 1, Conv, [192, 3, 1]],
156 |    [-1, 1, Conv, [192, 3, 1]],
157 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
158 |    [-1, 1, Conv, [480, 1, 1]], # 123
159 |       
160 |    [-1, 1, DownC, [640]],
161 |    [[-1, 57], 1, Concat, [1]],
162 |    
163 |    [-1, 1, Conv, [512, 1, 1]],
164 |    [-2, 1, Conv, [512, 1, 1]],
165 |    [-1, 1, Conv, [256, 3, 1]],
166 |    [-1, 1, Conv, [256, 3, 1]],
167 |    [-1, 1, Conv, [256, 3, 1]],
168 |    [-1, 1, Conv, [256, 3, 1]],
169 |    [-1, 1, Conv, [256, 3, 1]],
170 |    [-1, 1, Conv, [256, 3, 1]],
171 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
172 |    [-1, 1, Conv, [640, 1, 1]], # 135
173 |    
174 |    [99, 1, Conv, [320, 3, 1]],
175 |    [111, 1, Conv, [640, 3, 1]],
176 |    [123, 1, Conv, [960, 3, 1]],
177 |    [135, 1, Conv, [1280, 3, 1]],
178 | 
179 |    [[136,137,138,139], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5, P6)
180 |   ]
181 | 


--------------------------------------------------------------------------------
/utils/add_nms.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import onnx
  3 | from onnx import shape_inference
  4 | try:
  5 |     import onnx_graphsurgeon as gs
  6 | except Exception as e:
  7 |     print('Import onnx_graphsurgeon failure: %s' % e)
  8 | 
  9 | import logging
 10 | 
 11 | LOGGER = logging.getLogger(__name__)
 12 | 
 13 | class RegisterNMS(object):
 14 |     def __init__(
 15 |         self,
 16 |         onnx_model_path: str,
 17 |         precision: str = "fp32",
 18 |     ):
 19 | 
 20 |         self.graph = gs.import_onnx(onnx.load(onnx_model_path))
 21 |         assert self.graph
 22 |         LOGGER.info("ONNX graph created successfully")
 23 |         # Fold constants via ONNX-GS that PyTorch2ONNX may have missed
 24 |         self.graph.fold_constants()
 25 |         self.precision = precision
 26 |         self.batch_size = 1
 27 |     def infer(self):
 28 |         """
 29 |         Sanitize the graph by cleaning any unconnected nodes, do a topological resort,
 30 |         and fold constant inputs values. When possible, run shape inference on the
 31 |         ONNX graph to determine tensor shapes.
 32 |         """
 33 |         for _ in range(3):
 34 |             count_before = len(self.graph.nodes)
 35 | 
 36 |             self.graph.cleanup().toposort()
 37 |             try:
 38 |                 for node in self.graph.nodes:
 39 |                     for o in node.outputs:
 40 |                         o.shape = None
 41 |                 model = gs.export_onnx(self.graph)
 42 |                 model = shape_inference.infer_shapes(model)
 43 |                 self.graph = gs.import_onnx(model)
 44 |             except Exception as e:
 45 |                 LOGGER.info(f"Shape inference could not be performed at this time:\n{e}")
 46 |             try:
 47 |                 self.graph.fold_constants(fold_shapes=True)
 48 |             except TypeError as e:
 49 |                 LOGGER.error(
 50 |                     "This version of ONNX GraphSurgeon does not support folding shapes, "
 51 |                     f"please upgrade your onnx_graphsurgeon module. Error:\n{e}"
 52 |                 )
 53 |                 raise
 54 | 
 55 |             count_after = len(self.graph.nodes)
 56 |             if count_before == count_after:
 57 |                 # No new folding occurred in this iteration, so we can stop for now.
 58 |                 break
 59 | 
 60 |     def save(self, output_path):
 61 |         """
 62 |         Save the ONNX model to the given location.
 63 |         Args:
 64 |             output_path: Path pointing to the location where to write
 65 |                 out the updated ONNX model.
 66 |         """
 67 |         self.graph.cleanup().toposort()
 68 |         model = gs.export_onnx(self.graph)
 69 |         onnx.save(model, output_path)
 70 |         LOGGER.info(f"Saved ONNX model to {output_path}")
 71 | 
 72 |     def register_nms(
 73 |         self,
 74 |         *,
 75 |         score_thresh: float = 0.25,
 76 |         nms_thresh: float = 0.45,
 77 |         detections_per_img: int = 100,
 78 |     ):
 79 |         """
 80 |         Register the ``EfficientNMS_TRT`` plugin node.
 81 |         NMS expects these shapes for its input tensors:
 82 |             - box_net: [batch_size, number_boxes, 4]
 83 |             - class_net: [batch_size, number_boxes, number_labels]
 84 |         Args:
 85 |             score_thresh (float): The scalar threshold for score (low scoring boxes are removed).
 86 |             nms_thresh (float): The scalar threshold for IOU (new boxes that have high IOU
 87 |                 overlap with previously selected boxes are removed).
 88 |             detections_per_img (int): Number of best detections to keep after NMS.
 89 |         """
 90 | 
 91 |         self.infer()
 92 |         # Find the concat node at the end of the network
 93 |         op_inputs = self.graph.outputs
 94 |         op = "EfficientNMS_TRT"
 95 |         attrs = {
 96 |             "plugin_version": "1",
 97 |             "background_class": -1,  # no background class
 98 |             "max_output_boxes": detections_per_img,
 99 |             "score_threshold": score_thresh,
100 |             "iou_threshold": nms_thresh,
101 |             "score_activation": False,
102 |             "box_coding": 0,
103 |         }
104 | 
105 |         if self.precision == "fp32":
106 |             dtype_output = np.float32
107 |         elif self.precision == "fp16":
108 |             dtype_output = np.float16
109 |         else:
110 |             raise NotImplementedError(f"Currently not supports precision: {self.precision}")
111 | 
112 |         # NMS Outputs
113 |         output_num_detections = gs.Variable(
114 |             name="num_dets",
115 |             dtype=np.int32,
116 |             shape=[self.batch_size, 1],
117 |         )  # A scalar indicating the number of valid detections per batch image.
118 |         output_boxes = gs.Variable(
119 |             name="det_boxes",
120 |             dtype=dtype_output,
121 |             shape=[self.batch_size, detections_per_img, 4],
122 |         )
123 |         output_scores = gs.Variable(
124 |             name="det_scores",
125 |             dtype=dtype_output,
126 |             shape=[self.batch_size, detections_per_img],
127 |         )
128 |         output_labels = gs.Variable(
129 |             name="det_classes",
130 |             dtype=np.int32,
131 |             shape=[self.batch_size, detections_per_img],
132 |         )
133 | 
134 |         op_outputs = [output_num_detections, output_boxes, output_scores, output_labels]
135 | 
136 |         # Create the NMS Plugin node with the selected inputs. The outputs of the node will also
137 |         # become the final outputs of the graph.
138 |         self.graph.layer(op=op, name="batched_nms", inputs=op_inputs, outputs=op_outputs, attrs=attrs)
139 |         LOGGER.info(f"Created NMS plugin '{op}' with attributes: {attrs}")
140 | 
141 |         self.graph.outputs = op_outputs
142 | 
143 |         self.infer()
144 | 
145 |     def save(self, output_path):
146 |         """
147 |         Save the ONNX model to the given location.
148 |         Args:
149 |             output_path: Path pointing to the location where to write
150 |                 out the updated ONNX model.
151 |         """
152 |         self.graph.cleanup().toposort()
153 |         model = gs.export_onnx(self.graph)
154 |         onnx.save(model, output_path)
155 |         LOGGER.info(f"Saved ONNX model to {output_path}")
156 | 


--------------------------------------------------------------------------------
/cfg/training/yolov7-e6.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | 
  6 | # anchors
  7 | anchors:
  8 |   - [ 19,27,  44,40,  38,94 ]  # P3/8
  9 |   - [ 96,68,  86,152,  180,137 ]  # P4/16
 10 |   - [ 140,301,  303,264,  238,542 ]  # P5/32
 11 |   - [ 436,615,  739,380,  925,792 ]  # P6/64
 12 | 
 13 | # yolov7 backbone
 14 | backbone:
 15 |   # [from, number, module, args],
 16 |   [[-1, 1, ReOrg, []],  # 0
 17 |    [-1, 1, Conv, [80, 3, 1]],  # 1-P1/2
 18 |    
 19 |    [-1, 1, DownC, [160]],  # 2-P2/4  
 20 |    [-1, 1, Conv, [64, 1, 1]],
 21 |    [-2, 1, Conv, [64, 1, 1]],
 22 |    [-1, 1, Conv, [64, 3, 1]],
 23 |    [-1, 1, Conv, [64, 3, 1]],
 24 |    [-1, 1, Conv, [64, 3, 1]],
 25 |    [-1, 1, Conv, [64, 3, 1]],
 26 |    [-1, 1, Conv, [64, 3, 1]],
 27 |    [-1, 1, Conv, [64, 3, 1]],
 28 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 29 |    [-1, 1, Conv, [160, 1, 1]],  # 12
 30 |          
 31 |    [-1, 1, DownC, [320]],  # 13-P3/8  
 32 |    [-1, 1, Conv, [128, 1, 1]],
 33 |    [-2, 1, Conv, [128, 1, 1]],
 34 |    [-1, 1, Conv, [128, 3, 1]],
 35 |    [-1, 1, Conv, [128, 3, 1]],
 36 |    [-1, 1, Conv, [128, 3, 1]],
 37 |    [-1, 1, Conv, [128, 3, 1]],
 38 |    [-1, 1, Conv, [128, 3, 1]],
 39 |    [-1, 1, Conv, [128, 3, 1]],
 40 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 41 |    [-1, 1, Conv, [320, 1, 1]],  # 23
 42 |          
 43 |    [-1, 1, DownC, [640]],  # 24-P4/16  
 44 |    [-1, 1, Conv, [256, 1, 1]],
 45 |    [-2, 1, Conv, [256, 1, 1]],
 46 |    [-1, 1, Conv, [256, 3, 1]],
 47 |    [-1, 1, Conv, [256, 3, 1]],
 48 |    [-1, 1, Conv, [256, 3, 1]],
 49 |    [-1, 1, Conv, [256, 3, 1]],
 50 |    [-1, 1, Conv, [256, 3, 1]],
 51 |    [-1, 1, Conv, [256, 3, 1]],
 52 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 53 |    [-1, 1, Conv, [640, 1, 1]],  # 34
 54 |          
 55 |    [-1, 1, DownC, [960]],  # 35-P5/32  
 56 |    [-1, 1, Conv, [384, 1, 1]],
 57 |    [-2, 1, Conv, [384, 1, 1]],
 58 |    [-1, 1, Conv, [384, 3, 1]],
 59 |    [-1, 1, Conv, [384, 3, 1]],
 60 |    [-1, 1, Conv, [384, 3, 1]],
 61 |    [-1, 1, Conv, [384, 3, 1]],
 62 |    [-1, 1, Conv, [384, 3, 1]],
 63 |    [-1, 1, Conv, [384, 3, 1]],
 64 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 65 |    [-1, 1, Conv, [960, 1, 1]],  # 45
 66 |          
 67 |    [-1, 1, DownC, [1280]],  # 46-P6/64  
 68 |    [-1, 1, Conv, [512, 1, 1]],
 69 |    [-2, 1, Conv, [512, 1, 1]],
 70 |    [-1, 1, Conv, [512, 3, 1]],
 71 |    [-1, 1, Conv, [512, 3, 1]],
 72 |    [-1, 1, Conv, [512, 3, 1]],
 73 |    [-1, 1, Conv, [512, 3, 1]],
 74 |    [-1, 1, Conv, [512, 3, 1]],
 75 |    [-1, 1, Conv, [512, 3, 1]],
 76 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 77 |    [-1, 1, Conv, [1280, 1, 1]],  # 56
 78 |   ]
 79 | 
 80 | # yolov7 head
 81 | head:
 82 |   [[-1, 1, SPPCSPC, [640]], # 57
 83 |   
 84 |    [-1, 1, Conv, [480, 1, 1]],
 85 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 86 |    [45, 1, Conv, [480, 1, 1]], # route backbone P5
 87 |    [[-1, -2], 1, Concat, [1]],
 88 |    
 89 |    [-1, 1, Conv, [384, 1, 1]],
 90 |    [-2, 1, Conv, [384, 1, 1]],
 91 |    [-1, 1, Conv, [192, 3, 1]],
 92 |    [-1, 1, Conv, [192, 3, 1]],
 93 |    [-1, 1, Conv, [192, 3, 1]],
 94 |    [-1, 1, Conv, [192, 3, 1]],
 95 |    [-1, 1, Conv, [192, 3, 1]],
 96 |    [-1, 1, Conv, [192, 3, 1]],
 97 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
 98 |    [-1, 1, Conv, [480, 1, 1]], # 71
 99 |   
100 |    [-1, 1, Conv, [320, 1, 1]],
101 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
102 |    [34, 1, Conv, [320, 1, 1]], # route backbone P4
103 |    [[-1, -2], 1, Concat, [1]],
104 |    
105 |    [-1, 1, Conv, [256, 1, 1]],
106 |    [-2, 1, Conv, [256, 1, 1]],
107 |    [-1, 1, Conv, [128, 3, 1]],
108 |    [-1, 1, Conv, [128, 3, 1]],
109 |    [-1, 1, Conv, [128, 3, 1]],
110 |    [-1, 1, Conv, [128, 3, 1]],
111 |    [-1, 1, Conv, [128, 3, 1]],
112 |    [-1, 1, Conv, [128, 3, 1]],
113 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
114 |    [-1, 1, Conv, [320, 1, 1]], # 85
115 |    
116 |    [-1, 1, Conv, [160, 1, 1]],
117 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
118 |    [23, 1, Conv, [160, 1, 1]], # route backbone P3
119 |    [[-1, -2], 1, Concat, [1]],
120 |    
121 |    [-1, 1, Conv, [128, 1, 1]],
122 |    [-2, 1, Conv, [128, 1, 1]],
123 |    [-1, 1, Conv, [64, 3, 1]],
124 |    [-1, 1, Conv, [64, 3, 1]],
125 |    [-1, 1, Conv, [64, 3, 1]],
126 |    [-1, 1, Conv, [64, 3, 1]],
127 |    [-1, 1, Conv, [64, 3, 1]],
128 |    [-1, 1, Conv, [64, 3, 1]],
129 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
130 |    [-1, 1, Conv, [160, 1, 1]], # 99
131 |       
132 |    [-1, 1, DownC, [320]],
133 |    [[-1, 85], 1, Concat, [1]],
134 |    
135 |    [-1, 1, Conv, [256, 1, 1]],
136 |    [-2, 1, Conv, [256, 1, 1]],
137 |    [-1, 1, Conv, [128, 3, 1]],
138 |    [-1, 1, Conv, [128, 3, 1]],
139 |    [-1, 1, Conv, [128, 3, 1]],
140 |    [-1, 1, Conv, [128, 3, 1]],
141 |    [-1, 1, Conv, [128, 3, 1]],
142 |    [-1, 1, Conv, [128, 3, 1]],
143 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
144 |    [-1, 1, Conv, [320, 1, 1]], # 111
145 |       
146 |    [-1, 1, DownC, [480]],
147 |    [[-1, 71], 1, Concat, [1]],
148 |    
149 |    [-1, 1, Conv, [384, 1, 1]],
150 |    [-2, 1, Conv, [384, 1, 1]],
151 |    [-1, 1, Conv, [192, 3, 1]],
152 |    [-1, 1, Conv, [192, 3, 1]],
153 |    [-1, 1, Conv, [192, 3, 1]],
154 |    [-1, 1, Conv, [192, 3, 1]],
155 |    [-1, 1, Conv, [192, 3, 1]],
156 |    [-1, 1, Conv, [192, 3, 1]],
157 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
158 |    [-1, 1, Conv, [480, 1, 1]], # 123
159 |       
160 |    [-1, 1, DownC, [640]],
161 |    [[-1, 57], 1, Concat, [1]],
162 |    
163 |    [-1, 1, Conv, [512, 1, 1]],
164 |    [-2, 1, Conv, [512, 1, 1]],
165 |    [-1, 1, Conv, [256, 3, 1]],
166 |    [-1, 1, Conv, [256, 3, 1]],
167 |    [-1, 1, Conv, [256, 3, 1]],
168 |    [-1, 1, Conv, [256, 3, 1]],
169 |    [-1, 1, Conv, [256, 3, 1]],
170 |    [-1, 1, Conv, [256, 3, 1]],
171 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
172 |    [-1, 1, Conv, [640, 1, 1]], # 135
173 |    
174 |    [99, 1, Conv, [320, 3, 1]],
175 |    [111, 1, Conv, [640, 3, 1]],
176 |    [123, 1, Conv, [960, 3, 1]],
177 |    [135, 1, Conv, [1280, 3, 1]],
178 |    
179 |    [99, 1, Conv, [320, 3, 1]],
180 |    [85, 1, Conv, [640, 3, 1]],
181 |    [71, 1, Conv, [960, 3, 1]],
182 |    [57, 1, Conv, [1280, 3, 1]],
183 | 
184 |    [[136,137,138,139,140,141,142,143], 1, IAuxDetect, [nc, anchors]],   # Detect(P3, P4, P5, P6)
185 |   ]
186 | 


--------------------------------------------------------------------------------
/cfg/deploy/yolov7-d6.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | 
  6 | # anchors
  7 | anchors:
  8 |   - [ 19,27,  44,40,  38,94 ]  # P3/8
  9 |   - [ 96,68,  86,152,  180,137 ]  # P4/16
 10 |   - [ 140,301,  303,264,  238,542 ]  # P5/32
 11 |   - [ 436,615,  739,380,  925,792 ]  # P6/64
 12 | 
 13 | # yolov7-d6 backbone
 14 | backbone:
 15 |   # [from, number, module, args],
 16 |   [[-1, 1, ReOrg, []],  # 0
 17 |    [-1, 1, Conv, [96, 3, 1]],  # 1-P1/2
 18 |    
 19 |    [-1, 1, DownC, [192]],  # 2-P2/4  
 20 |    [-1, 1, Conv, [64, 1, 1]],
 21 |    [-2, 1, Conv, [64, 1, 1]],
 22 |    [-1, 1, Conv, [64, 3, 1]],
 23 |    [-1, 1, Conv, [64, 3, 1]],
 24 |    [-1, 1, Conv, [64, 3, 1]],
 25 |    [-1, 1, Conv, [64, 3, 1]],
 26 |    [-1, 1, Conv, [64, 3, 1]],
 27 |    [-1, 1, Conv, [64, 3, 1]],
 28 |    [-1, 1, Conv, [64, 3, 1]],
 29 |    [-1, 1, Conv, [64, 3, 1]],
 30 |    [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
 31 |    [-1, 1, Conv, [192, 1, 1]],  # 14
 32 |          
 33 |    [-1, 1, DownC, [384]],  # 15-P3/8  
 34 |    [-1, 1, Conv, [128, 1, 1]],
 35 |    [-2, 1, Conv, [128, 1, 1]],
 36 |    [-1, 1, Conv, [128, 3, 1]],
 37 |    [-1, 1, Conv, [128, 3, 1]],
 38 |    [-1, 1, Conv, [128, 3, 1]],
 39 |    [-1, 1, Conv, [128, 3, 1]],
 40 |    [-1, 1, Conv, [128, 3, 1]],
 41 |    [-1, 1, Conv, [128, 3, 1]],
 42 |    [-1, 1, Conv, [128, 3, 1]],
 43 |    [-1, 1, Conv, [128, 3, 1]],
 44 |    [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
 45 |    [-1, 1, Conv, [384, 1, 1]],  # 27
 46 |          
 47 |    [-1, 1, DownC, [768]],  # 28-P4/16  
 48 |    [-1, 1, Conv, [256, 1, 1]],
 49 |    [-2, 1, Conv, [256, 1, 1]],
 50 |    [-1, 1, Conv, [256, 3, 1]],
 51 |    [-1, 1, Conv, [256, 3, 1]],
 52 |    [-1, 1, Conv, [256, 3, 1]],
 53 |    [-1, 1, Conv, [256, 3, 1]],
 54 |    [-1, 1, Conv, [256, 3, 1]],
 55 |    [-1, 1, Conv, [256, 3, 1]],
 56 |    [-1, 1, Conv, [256, 3, 1]],
 57 |    [-1, 1, Conv, [256, 3, 1]],
 58 |    [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
 59 |    [-1, 1, Conv, [768, 1, 1]],  # 40
 60 |          
 61 |    [-1, 1, DownC, [1152]],  # 41-P5/32  
 62 |    [-1, 1, Conv, [384, 1, 1]],
 63 |    [-2, 1, Conv, [384, 1, 1]],
 64 |    [-1, 1, Conv, [384, 3, 1]],
 65 |    [-1, 1, Conv, [384, 3, 1]],
 66 |    [-1, 1, Conv, [384, 3, 1]],
 67 |    [-1, 1, Conv, [384, 3, 1]],
 68 |    [-1, 1, Conv, [384, 3, 1]],
 69 |    [-1, 1, Conv, [384, 3, 1]],
 70 |    [-1, 1, Conv, [384, 3, 1]],
 71 |    [-1, 1, Conv, [384, 3, 1]],
 72 |    [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
 73 |    [-1, 1, Conv, [1152, 1, 1]],  # 53
 74 |          
 75 |    [-1, 1, DownC, [1536]],  # 54-P6/64  
 76 |    [-1, 1, Conv, [512, 1, 1]],
 77 |    [-2, 1, Conv, [512, 1, 1]],
 78 |    [-1, 1, Conv, [512, 3, 1]],
 79 |    [-1, 1, Conv, [512, 3, 1]],
 80 |    [-1, 1, Conv, [512, 3, 1]],
 81 |    [-1, 1, Conv, [512, 3, 1]],
 82 |    [-1, 1, Conv, [512, 3, 1]],
 83 |    [-1, 1, Conv, [512, 3, 1]],
 84 |    [-1, 1, Conv, [512, 3, 1]],
 85 |    [-1, 1, Conv, [512, 3, 1]],
 86 |    [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
 87 |    [-1, 1, Conv, [1536, 1, 1]],  # 66
 88 |   ]
 89 | 
 90 | # yolov7-d6 head
 91 | head:
 92 |   [[-1, 1, SPPCSPC, [768]], # 67
 93 |   
 94 |    [-1, 1, Conv, [576, 1, 1]],
 95 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 96 |    [53, 1, Conv, [576, 1, 1]], # route backbone P5
 97 |    [[-1, -2], 1, Concat, [1]],
 98 |    
 99 |    [-1, 1, Conv, [384, 1, 1]],
100 |    [-2, 1, Conv, [384, 1, 1]],
101 |    [-1, 1, Conv, [192, 3, 1]],
102 |    [-1, 1, Conv, [192, 3, 1]],
103 |    [-1, 1, Conv, [192, 3, 1]],
104 |    [-1, 1, Conv, [192, 3, 1]],
105 |    [-1, 1, Conv, [192, 3, 1]],
106 |    [-1, 1, Conv, [192, 3, 1]],
107 |    [-1, 1, Conv, [192, 3, 1]],
108 |    [-1, 1, Conv, [192, 3, 1]],
109 |    [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
110 |    [-1, 1, Conv, [576, 1, 1]], # 83
111 |   
112 |    [-1, 1, Conv, [384, 1, 1]],
113 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
114 |    [40, 1, Conv, [384, 1, 1]], # route backbone P4
115 |    [[-1, -2], 1, Concat, [1]],
116 |    
117 |    [-1, 1, Conv, [256, 1, 1]],
118 |    [-2, 1, Conv, [256, 1, 1]],
119 |    [-1, 1, Conv, [128, 3, 1]],
120 |    [-1, 1, Conv, [128, 3, 1]],
121 |    [-1, 1, Conv, [128, 3, 1]],
122 |    [-1, 1, Conv, [128, 3, 1]],
123 |    [-1, 1, Conv, [128, 3, 1]],
124 |    [-1, 1, Conv, [128, 3, 1]],
125 |    [-1, 1, Conv, [128, 3, 1]],
126 |    [-1, 1, Conv, [128, 3, 1]],
127 |    [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
128 |    [-1, 1, Conv, [384, 1, 1]], # 99
129 |    
130 |    [-1, 1, Conv, [192, 1, 1]],
131 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
132 |    [27, 1, Conv, [192, 1, 1]], # route backbone P3
133 |    [[-1, -2], 1, Concat, [1]],
134 |    
135 |    [-1, 1, Conv, [128, 1, 1]],
136 |    [-2, 1, Conv, [128, 1, 1]],
137 |    [-1, 1, Conv, [64, 3, 1]],
138 |    [-1, 1, Conv, [64, 3, 1]],
139 |    [-1, 1, Conv, [64, 3, 1]],
140 |    [-1, 1, Conv, [64, 3, 1]],
141 |    [-1, 1, Conv, [64, 3, 1]],
142 |    [-1, 1, Conv, [64, 3, 1]],
143 |    [-1, 1, Conv, [64, 3, 1]],
144 |    [-1, 1, Conv, [64, 3, 1]],
145 |    [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
146 |    [-1, 1, Conv, [192, 1, 1]], # 115
147 |       
148 |    [-1, 1, DownC, [384]],
149 |    [[-1, 99], 1, Concat, [1]],
150 |    
151 |    [-1, 1, Conv, [256, 1, 1]],
152 |    [-2, 1, Conv, [256, 1, 1]],
153 |    [-1, 1, Conv, [128, 3, 1]],
154 |    [-1, 1, Conv, [128, 3, 1]],
155 |    [-1, 1, Conv, [128, 3, 1]],
156 |    [-1, 1, Conv, [128, 3, 1]],
157 |    [-1, 1, Conv, [128, 3, 1]],
158 |    [-1, 1, Conv, [128, 3, 1]],
159 |    [-1, 1, Conv, [128, 3, 1]],
160 |    [-1, 1, Conv, [128, 3, 1]],
161 |    [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
162 |    [-1, 1, Conv, [384, 1, 1]], # 129
163 |       
164 |    [-1, 1, DownC, [576]],
165 |    [[-1, 83], 1, Concat, [1]],
166 |    
167 |    [-1, 1, Conv, [384, 1, 1]],
168 |    [-2, 1, Conv, [384, 1, 1]],
169 |    [-1, 1, Conv, [192, 3, 1]],
170 |    [-1, 1, Conv, [192, 3, 1]],
171 |    [-1, 1, Conv, [192, 3, 1]],
172 |    [-1, 1, Conv, [192, 3, 1]],
173 |    [-1, 1, Conv, [192, 3, 1]],
174 |    [-1, 1, Conv, [192, 3, 1]],
175 |    [-1, 1, Conv, [192, 3, 1]],
176 |    [-1, 1, Conv, [192, 3, 1]],
177 |    [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
178 |    [-1, 1, Conv, [576, 1, 1]], # 143
179 |       
180 |    [-1, 1, DownC, [768]],
181 |    [[-1, 67], 1, Concat, [1]],
182 |    
183 |    [-1, 1, Conv, [512, 1, 1]],
184 |    [-2, 1, Conv, [512, 1, 1]],
185 |    [-1, 1, Conv, [256, 3, 1]],
186 |    [-1, 1, Conv, [256, 3, 1]],
187 |    [-1, 1, Conv, [256, 3, 1]],
188 |    [-1, 1, Conv, [256, 3, 1]],
189 |    [-1, 1, Conv, [256, 3, 1]],
190 |    [-1, 1, Conv, [256, 3, 1]],
191 |    [-1, 1, Conv, [256, 3, 1]],
192 |    [-1, 1, Conv, [256, 3, 1]],
193 |    [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
194 |    [-1, 1, Conv, [768, 1, 1]], # 157
195 |    
196 |    [115, 1, Conv, [384, 3, 1]],
197 |    [129, 1, Conv, [768, 3, 1]],
198 |    [143, 1, Conv, [1152, 3, 1]],
199 |    [157, 1, Conv, [1536, 3, 1]],
200 | 
201 |    [[158,159,160,161], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5, P6)
202 |   ]
203 | 


--------------------------------------------------------------------------------
/cfg/training/yolov7-d6.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | 
  6 | # anchors
  7 | anchors:
  8 |   - [ 19,27,  44,40,  38,94 ]  # P3/8
  9 |   - [ 96,68,  86,152,  180,137 ]  # P4/16
 10 |   - [ 140,301,  303,264,  238,542 ]  # P5/32
 11 |   - [ 436,615,  739,380,  925,792 ]  # P6/64
 12 | 
 13 | # yolov7 backbone
 14 | backbone:
 15 |   # [from, number, module, args],
 16 |   [[-1, 1, ReOrg, []],  # 0
 17 |    [-1, 1, Conv, [96, 3, 1]],  # 1-P1/2
 18 |    
 19 |    [-1, 1, DownC, [192]],  # 2-P2/4  
 20 |    [-1, 1, Conv, [64, 1, 1]],
 21 |    [-2, 1, Conv, [64, 1, 1]],
 22 |    [-1, 1, Conv, [64, 3, 1]],
 23 |    [-1, 1, Conv, [64, 3, 1]],
 24 |    [-1, 1, Conv, [64, 3, 1]],
 25 |    [-1, 1, Conv, [64, 3, 1]],
 26 |    [-1, 1, Conv, [64, 3, 1]],
 27 |    [-1, 1, Conv, [64, 3, 1]],
 28 |    [-1, 1, Conv, [64, 3, 1]],
 29 |    [-1, 1, Conv, [64, 3, 1]],
 30 |    [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
 31 |    [-1, 1, Conv, [192, 1, 1]],  # 14
 32 |          
 33 |    [-1, 1, DownC, [384]],  # 15-P3/8  
 34 |    [-1, 1, Conv, [128, 1, 1]],
 35 |    [-2, 1, Conv, [128, 1, 1]],
 36 |    [-1, 1, Conv, [128, 3, 1]],
 37 |    [-1, 1, Conv, [128, 3, 1]],
 38 |    [-1, 1, Conv, [128, 3, 1]],
 39 |    [-1, 1, Conv, [128, 3, 1]],
 40 |    [-1, 1, Conv, [128, 3, 1]],
 41 |    [-1, 1, Conv, [128, 3, 1]],
 42 |    [-1, 1, Conv, [128, 3, 1]],
 43 |    [-1, 1, Conv, [128, 3, 1]],
 44 |    [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
 45 |    [-1, 1, Conv, [384, 1, 1]],  # 27
 46 |          
 47 |    [-1, 1, DownC, [768]],  # 28-P4/16  
 48 |    [-1, 1, Conv, [256, 1, 1]],
 49 |    [-2, 1, Conv, [256, 1, 1]],
 50 |    [-1, 1, Conv, [256, 3, 1]],
 51 |    [-1, 1, Conv, [256, 3, 1]],
 52 |    [-1, 1, Conv, [256, 3, 1]],
 53 |    [-1, 1, Conv, [256, 3, 1]],
 54 |    [-1, 1, Conv, [256, 3, 1]],
 55 |    [-1, 1, Conv, [256, 3, 1]],
 56 |    [-1, 1, Conv, [256, 3, 1]],
 57 |    [-1, 1, Conv, [256, 3, 1]],
 58 |    [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
 59 |    [-1, 1, Conv, [768, 1, 1]],  # 40
 60 |          
 61 |    [-1, 1, DownC, [1152]],  # 41-P5/32  
 62 |    [-1, 1, Conv, [384, 1, 1]],
 63 |    [-2, 1, Conv, [384, 1, 1]],
 64 |    [-1, 1, Conv, [384, 3, 1]],
 65 |    [-1, 1, Conv, [384, 3, 1]],
 66 |    [-1, 1, Conv, [384, 3, 1]],
 67 |    [-1, 1, Conv, [384, 3, 1]],
 68 |    [-1, 1, Conv, [384, 3, 1]],
 69 |    [-1, 1, Conv, [384, 3, 1]],
 70 |    [-1, 1, Conv, [384, 3, 1]],
 71 |    [-1, 1, Conv, [384, 3, 1]],
 72 |    [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
 73 |    [-1, 1, Conv, [1152, 1, 1]],  # 53
 74 |          
 75 |    [-1, 1, DownC, [1536]],  # 54-P6/64  
 76 |    [-1, 1, Conv, [512, 1, 1]],
 77 |    [-2, 1, Conv, [512, 1, 1]],
 78 |    [-1, 1, Conv, [512, 3, 1]],
 79 |    [-1, 1, Conv, [512, 3, 1]],
 80 |    [-1, 1, Conv, [512, 3, 1]],
 81 |    [-1, 1, Conv, [512, 3, 1]],
 82 |    [-1, 1, Conv, [512, 3, 1]],
 83 |    [-1, 1, Conv, [512, 3, 1]],
 84 |    [-1, 1, Conv, [512, 3, 1]],
 85 |    [-1, 1, Conv, [512, 3, 1]],
 86 |    [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]],
 87 |    [-1, 1, Conv, [1536, 1, 1]],  # 66
 88 |   ]
 89 | 
 90 | # yolov7 head
 91 | head:
 92 |   [[-1, 1, SPPCSPC, [768]], # 67
 93 |   
 94 |    [-1, 1, Conv, [576, 1, 1]],
 95 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 96 |    [53, 1, Conv, [576, 1, 1]], # route backbone P5
 97 |    [[-1, -2], 1, Concat, [1]],
 98 |    
 99 |    [-1, 1, Conv, [384, 1, 1]],
100 |    [-2, 1, Conv, [384, 1, 1]],
101 |    [-1, 1, Conv, [192, 3, 1]],
102 |    [-1, 1, Conv, [192, 3, 1]],
103 |    [-1, 1, Conv, [192, 3, 1]],
104 |    [-1, 1, Conv, [192, 3, 1]],
105 |    [-1, 1, Conv, [192, 3, 1]],
106 |    [-1, 1, Conv, [192, 3, 1]],
107 |    [-1, 1, Conv, [192, 3, 1]],
108 |    [-1, 1, Conv, [192, 3, 1]],
109 |    [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
110 |    [-1, 1, Conv, [576, 1, 1]], # 83
111 |   
112 |    [-1, 1, Conv, [384, 1, 1]],
113 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
114 |    [40, 1, Conv, [384, 1, 1]], # route backbone P4
115 |    [[-1, -2], 1, Concat, [1]],
116 |    
117 |    [-1, 1, Conv, [256, 1, 1]],
118 |    [-2, 1, Conv, [256, 1, 1]],
119 |    [-1, 1, Conv, [128, 3, 1]],
120 |    [-1, 1, Conv, [128, 3, 1]],
121 |    [-1, 1, Conv, [128, 3, 1]],
122 |    [-1, 1, Conv, [128, 3, 1]],
123 |    [-1, 1, Conv, [128, 3, 1]],
124 |    [-1, 1, Conv, [128, 3, 1]],
125 |    [-1, 1, Conv, [128, 3, 1]],
126 |    [-1, 1, Conv, [128, 3, 1]],
127 |    [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
128 |    [-1, 1, Conv, [384, 1, 1]], # 99
129 |    
130 |    [-1, 1, Conv, [192, 1, 1]],
131 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
132 |    [27, 1, Conv, [192, 1, 1]], # route backbone P3
133 |    [[-1, -2], 1, Concat, [1]],
134 |    
135 |    [-1, 1, Conv, [128, 1, 1]],
136 |    [-2, 1, Conv, [128, 1, 1]],
137 |    [-1, 1, Conv, [64, 3, 1]],
138 |    [-1, 1, Conv, [64, 3, 1]],
139 |    [-1, 1, Conv, [64, 3, 1]],
140 |    [-1, 1, Conv, [64, 3, 1]],
141 |    [-1, 1, Conv, [64, 3, 1]],
142 |    [-1, 1, Conv, [64, 3, 1]],
143 |    [-1, 1, Conv, [64, 3, 1]],
144 |    [-1, 1, Conv, [64, 3, 1]],
145 |    [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
146 |    [-1, 1, Conv, [192, 1, 1]], # 115
147 |       
148 |    [-1, 1, DownC, [384]],
149 |    [[-1, 99], 1, Concat, [1]],
150 |    
151 |    [-1, 1, Conv, [256, 1, 1]],
152 |    [-2, 1, Conv, [256, 1, 1]],
153 |    [-1, 1, Conv, [128, 3, 1]],
154 |    [-1, 1, Conv, [128, 3, 1]],
155 |    [-1, 1, Conv, [128, 3, 1]],
156 |    [-1, 1, Conv, [128, 3, 1]],
157 |    [-1, 1, Conv, [128, 3, 1]],
158 |    [-1, 1, Conv, [128, 3, 1]],
159 |    [-1, 1, Conv, [128, 3, 1]],
160 |    [-1, 1, Conv, [128, 3, 1]],
161 |    [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
162 |    [-1, 1, Conv, [384, 1, 1]], # 129
163 |       
164 |    [-1, 1, DownC, [576]],
165 |    [[-1, 83], 1, Concat, [1]],
166 |    
167 |    [-1, 1, Conv, [384, 1, 1]],
168 |    [-2, 1, Conv, [384, 1, 1]],
169 |    [-1, 1, Conv, [192, 3, 1]],
170 |    [-1, 1, Conv, [192, 3, 1]],
171 |    [-1, 1, Conv, [192, 3, 1]],
172 |    [-1, 1, Conv, [192, 3, 1]],
173 |    [-1, 1, Conv, [192, 3, 1]],
174 |    [-1, 1, Conv, [192, 3, 1]],
175 |    [-1, 1, Conv, [192, 3, 1]],
176 |    [-1, 1, Conv, [192, 3, 1]],
177 |    [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
178 |    [-1, 1, Conv, [576, 1, 1]], # 143
179 |       
180 |    [-1, 1, DownC, [768]],
181 |    [[-1, 67], 1, Concat, [1]],
182 |    
183 |    [-1, 1, Conv, [512, 1, 1]],
184 |    [-2, 1, Conv, [512, 1, 1]],
185 |    [-1, 1, Conv, [256, 3, 1]],
186 |    [-1, 1, Conv, [256, 3, 1]],
187 |    [-1, 1, Conv, [256, 3, 1]],
188 |    [-1, 1, Conv, [256, 3, 1]],
189 |    [-1, 1, Conv, [256, 3, 1]],
190 |    [-1, 1, Conv, [256, 3, 1]],
191 |    [-1, 1, Conv, [256, 3, 1]],
192 |    [-1, 1, Conv, [256, 3, 1]],
193 |    [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]],
194 |    [-1, 1, Conv, [768, 1, 1]], # 157
195 |    
196 |    [115, 1, Conv, [384, 3, 1]],
197 |    [129, 1, Conv, [768, 3, 1]],
198 |    [143, 1, Conv, [1152, 3, 1]],
199 |    [157, 1, Conv, [1536, 3, 1]],
200 |    
201 |    [115, 1, Conv, [384, 3, 1]],
202 |    [99, 1, Conv, [768, 3, 1]],
203 |    [83, 1, Conv, [1152, 3, 1]],
204 |    [67, 1, Conv, [1536, 3, 1]],
205 | 
206 |    [[158,159,160,161,162,163,164,165], 1, IAuxDetect, [nc, anchors]],   # Detect(P3, P4, P5, P6)
207 |   ]
208 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Yolov7 Segmentation model with TensorRT
  2 |  **This repository implement the real-time Instance Segmentation Algorithm named Yolov7 with TensoRT.**
  3 |  
  4 | ## Some remarks 
  5 |   - The initial repository on which I build mine is from __**WongKinYiu/yolov7**__ repository (https://github.com/WongKinYiu/yolov7)), for additional information about the installation of Yolov7, refer to the original repository. 
  6 |   - This project is built upon the excellent framework **detectron2**, and you should install detectron2 first, please check official installation guide for more details. (https://github.com/facebookresearch/detectron2.git)
  7 |   - For command other than TensoRT and ONNX inference, please refer to the initial repository (e.g detect.py). 
  8 |   - If you face any problem during the parsing time, don't hesitate to drop an issue. If there aren't any, don't hesitate to drop a :star:
  9 |   - Be aware that in order to parse the model to ONNX and TensorRT, some originals files has been modified/slightly modified, don't forget to check the modifications if you come from the initial repository.
 10 |   
 11 |  
 12 |  ## Installation and dependencies
 13 |  <details>
 14 |   <summary>Click me</summary>
 15 | 
 16 |   - pip3 install requirements.txt
 17 |   - Install Pytorch (1.10.0) and TorchVision (0.11.1)
 18 |   ```js
 19 |   pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116
 20 |   
 21 |   If other versions of torch are needed, select yours by putting torch==1.11.0+cu102 for example.
 22 |   ```
 23 | 
 24 |   - Install CUDA (10.2) and cuDNN (8.0.0) : https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&Distribution=WSL-Ubuntu&target_version=2.0&target_type=deb_local
 25 |   
 26 |     - For WSL-Ubuntu :
 27 |   ```js
 28 |   sudo wget https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-wsl-ubuntu.pin
 29 |   sudo mv cuda-wsl-ubuntu.pin /etc/apt/preferences.d/cuda-repository-pin-600
 30 |   sudo wget https://developer.download.nvidia.com/compute/cuda/11.7.1/local_insta
 31 |       llers/cuda-repo-wsl-ubuntu-11-7-local_11.7.1-1_amd64.deb
 32 |   sudo dpkg -i cuda-repo-wsl-ubuntu-11-7-local_11.7.1-1_amd64.deb
 33 |   sudo cp /var/cuda-repo-wsl-ubuntu-11-7-local/cuda-96193861-keyring.gpg /usr/share/keyrings/
 34 |   sudo apt-get update
 35 |   sudo apt-get -y install cuda
 36 |   ```
 37 |   
 38 |   - Install TensorRT (8.0.1.6), if you are using an nvidia edge device, TensorRT should already be installed
 39 |   ```js
 40 |   python3 -m pip install --upgrade setuptools pip
 41 |   python3 -m pip install nvidia-pyindex
 42 |   python3 -m pip install --upgrade nvidia-tensorrt
 43 |   
 44 |   Verify installation by writing  : assert tensorrt.Builder(tensorrt.Logger())
 45 |   ```
 46 |   - Install ONNX and ONNXruntime
 47 |   ```js
 48 |   pip install onnxruntime-gpu
 49 |   pip install onnxruntime
 50 |   pip install numpy protobuf==4.21.5  
 51 |   pip install onnx
 52 |   ```
 53 |   - Install all the other packages needed to run the original SparseInst algorithm (Should be done if you have installed Dectectron2)
 54 | 
 55 |  </details>
 56 | 
 57 |  
 58 |  ## Models and Results for TensorRT and ONNX inference script:
 59 |  
 60 |  The inference speed for TensorRT are shown in the table below. Yolov7 running with TensoRT achieved more a less 3 times faster inference speed than Yolov7 running with Pytorch. Lowering the input size of the image can lead to a decent real-time speed.  
 61 |  The models from TensorRT and ONNX are built upon the Pytorch weights : Yolov7-mask.pt :  
 62 | 
 63 | ```
 64 | wget -c https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-mask.pt
 65 | ```
 66 | 
 67 |  *Note: All the computations has been done on a Nvidia Jetson TX2 Jetpack 4.6.*
 68 | 
 69 | <div align="center">
 70 | 
 71 | | Model | Input Size |  Inference Speed 
 72 | | :---:         |     :---:      |     :---: |
 73 | | Yolov7 Segmentation TensorRT     | 320    |  14.00 FPS    |
 74 | | Yolov7 Segmentation TensorRT     | 640    |  5.00 FPS     |
 75 | 
 76 | </div>
 77 | 
 78 |   Instance Segmentation  with           |  Yolov7 TensorRT
 79 | :-------------------------:|:-------------------------:
 80 | ![](results/640_trt_cv2img_VP_0.jpg)  |  ![](results/640_trt_cv2img_VP_1.jpg)
 81 | ![](results/640_trt_cv2img_VP_2.jpg)  |  ![](results/640_trt_cv2img_VP_3.jpg)
 82 | ![](results/640_trt_cv2img_VP_4.jpg)  |  ![](results/640_trt_cv2img_VP_5.jpg)
 83 |  
 84 | 
 85 |  ## Building the ONNX model  :
 86 |  
 87 |  To build the model from Pytorch to ONNX, you need to run the following command. You can set the arguments to default. Please check if the config path and the model weights path are correctly set up.
 88 |  ```
 89 |  mkdir onnx && python3 export_mask.py --weights ./yolov7-mask.pt  --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --imgsz $imgsz --input $input_image_path
 90 |  ```
 91 |  The onnx file will be saved in the created onnx directory. The input that you give will be infered right after the export operation to verify if the export was successful. if you don't want to infer the image, just add --no_infer.
 92 |  
 93 |   ## Building the TensorRT model  :
 94 |   
 95 |   To build the model from ONNX to TensorRT, you need to run the following command. You can set the arguments to default. If you have any problem while parsing the model to TensorRT, don't hesitate to ask. The exportation is based on the repository TensorRT-For-YOLO-Series (https://github.com/Linaom1214/TensorRT-For-YOLO-Series).
 96 |  ```
 97 |  git clone https://github.com/Linaom1214/TensorRT-For-YOLO-Series.git
 98 |  mkdir engine && python3  ./TensorRT-For-YOLO-Series/export.py -o $onnx_file_path -e $./engine/engine_name.engine -p fp16
 99 |  ```
100 |  
101 |   ## Testing Yolov7 with Pytorch, TensorRT and ONNX :
102 |   **TensorRT**
103 | 
104 |   To test the inference speed (FPS) of the TensorRT model, run the following command. 
105 | 
106 |  ```
107 |  python3  segment_image.py --input $input_image_path   --model $tensorrt_engine_path --onnx_model $onnx_model_path  --imgsz $image_size
108 |  ```
109 | 
110 |  You can still find the ONNX inference in the export_mask.py if you need it. 
111 | 
112 |  
113 | **Notes :**
114 | - **Input argument** can either be an image or a directory of images (directory/*)
115 |  
116 |  ## Visualizing Yolov7 with TensorRT :
117 |  **TensorRT**
118 |   1. To visualize segmentation results on your **images or directory of images**, you can run the following commands : 
119 |  
120 |  
121 |  ```
122 | python3  segment_image.py --input $input_image_path   --model $tensorrt_engine_path --onnx_model $onnx_model_path --imgsz $image_size --save_image --save_path $result_directory_path  
123 | 
124 | python3  segment_image.py --input $image_directory_path/*   --model $tensorrt_engine_path --onnx_model $onnx_model_path --imgsz $image_size --save_image --save_path $result_directory_path
125 |  ```
126 |  2. To visualize segmentation results on your **video**, you can run the following commands : 
127 |  ```
128 | python3  segment_video.py --input $input_video_path   --model $tensorrt_engine_path --onnx_model $onnx_model_path  --imgsz $image_size --save_video --save_path $result_directory_path/video_name.mp4
129 |  ```
130 | 
131 | **Notes :**
132 | - If you don't specify --save_image or --save_video, it will only infer the model and not save the outputs.
133 | 
134 | 
135 |  
136 | 


--------------------------------------------------------------------------------
/utils/autoanchor.py:
--------------------------------------------------------------------------------
  1 | # Auto-anchor utils
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import yaml
  6 | from scipy.cluster.vq import kmeans
  7 | from tqdm import tqdm
  8 | 
  9 | from utils.general import colorstr
 10 | 
 11 | 
 12 | def check_anchor_order(m):
 13 |     # Check anchor order against stride order for YOLO Detect() module m, and correct if necessary
 14 |     a = m.anchor_grid.prod(-1).view(-1)  # anchor area
 15 |     da = a[-1] - a[0]  # delta a
 16 |     ds = m.stride[-1] - m.stride[0]  # delta s
 17 |     if da.sign() != ds.sign():  # same order
 18 |         print('Reversing anchor order')
 19 |         m.anchors[:] = m.anchors.flip(0)
 20 |         m.anchor_grid[:] = m.anchor_grid.flip(0)
 21 | 
 22 | 
 23 | def check_anchors(dataset, model, thr=4.0, imgsz=640):
 24 |     # Check anchor fit to data, recompute if necessary
 25 |     prefix = colorstr('autoanchor: ')
 26 |     print(f'\n{prefix}Analyzing anchors... ', end='')
 27 |     m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1]  # Detect()
 28 |     shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
 29 |     scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1))  # augment scale
 30 |     wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float()  # wh
 31 | 
 32 |     def metric(k):  # compute metric
 33 |         r = wh[:, None] / k[None]
 34 |         x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
 35 |         best = x.max(1)[0]  # best_x
 36 |         aat = (x > 1. / thr).float().sum(1).mean()  # anchors above threshold
 37 |         bpr = (best > 1. / thr).float().mean()  # best possible recall
 38 |         return bpr, aat
 39 | 
 40 |     anchors = m.anchor_grid.clone().cpu().view(-1, 2)  # current anchors
 41 |     bpr, aat = metric(anchors)
 42 |     print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='')
 43 |     if bpr < 0.98:  # threshold to recompute
 44 |         print('. Attempting to improve anchors, please wait...')
 45 |         na = m.anchor_grid.numel() // 2  # number of anchors
 46 |         try:
 47 |             anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
 48 |         except Exception as e:
 49 |             print(f'{prefix}ERROR: {e}')
 50 |         new_bpr = metric(anchors)[0]
 51 |         if new_bpr > bpr:  # replace anchors
 52 |             anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
 53 |             m.anchor_grid[:] = anchors.clone().view_as(m.anchor_grid)  # for inference
 54 |             check_anchor_order(m)
 55 |             m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1)  # loss
 56 |             print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.')
 57 |         else:
 58 |             print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.')
 59 |     print('')  # newline
 60 | 
 61 | 
 62 | def kmean_anchors(path='./data/coco.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
 63 |     """ Creates kmeans-evolved anchors from training dataset
 64 | 
 65 |         Arguments:
 66 |             path: path to dataset *.yaml, or a loaded dataset
 67 |             n: number of anchors
 68 |             img_size: image size used for training
 69 |             thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
 70 |             gen: generations to evolve anchors using genetic algorithm
 71 |             verbose: print all results
 72 | 
 73 |         Return:
 74 |             k: kmeans evolved anchors
 75 | 
 76 |         Usage:
 77 |             from utils.autoanchor import *; _ = kmean_anchors()
 78 |     """
 79 |     thr = 1. / thr
 80 |     prefix = colorstr('autoanchor: ')
 81 | 
 82 |     def metric(k, wh):  # compute metrics
 83 |         r = wh[:, None] / k[None]
 84 |         x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
 85 |         # x = wh_iou(wh, torch.tensor(k))  # iou metric
 86 |         return x, x.max(1)[0]  # x, best_x
 87 | 
 88 |     def anchor_fitness(k):  # mutation fitness
 89 |         _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
 90 |         return (best * (best > thr).float()).mean()  # fitness
 91 | 
 92 |     def print_results(k):
 93 |         k = k[np.argsort(k.prod(1))]  # sort small to large
 94 |         x, best = metric(k, wh0)
 95 |         bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n  # best possible recall, anch > thr
 96 |         print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr')
 97 |         print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, '
 98 |               f'past_thr={x[x > thr].mean():.3f}-mean: ', end='')
 99 |         for i, x in enumerate(k):
100 |             print('%i,%i' % (round(x[0]), round(x[1])), end=',  ' if i < len(k) - 1 else '\n')  # use in *.cfg
101 |         return k
102 | 
103 |     if isinstance(path, str):  # *.yaml file
104 |         with open(path) as f:
105 |             data_dict = yaml.load(f, Loader=yaml.SafeLoader)  # model dict
106 |         from utils.datasets import LoadImagesAndLabels
107 |         dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
108 |     else:
109 |         dataset = path  # dataset
110 | 
111 |     # Get label wh
112 |     shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
113 |     wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)])  # wh
114 | 
115 |     # Filter
116 |     i = (wh0 < 3.0).any(1).sum()
117 |     if i:
118 |         print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
119 |     wh = wh0[(wh0 >= 2.0).any(1)]  # filter > 2 pixels
120 |     # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1)  # multiply by random scale 0-1
121 | 
122 |     # Kmeans calculation
123 |     print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...')
124 |     s = wh.std(0)  # sigmas for whitening
125 |     k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
126 |     assert len(k) == n, print(f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}')
127 |     k *= s
128 |     wh = torch.tensor(wh, dtype=torch.float32)  # filtered
129 |     wh0 = torch.tensor(wh0, dtype=torch.float32)  # unfiltered
130 |     k = print_results(k)
131 | 
132 |     # Plot
133 |     # k, d = [None] * 20, [None] * 20
134 |     # for i in tqdm(range(1, 21)):
135 |     #     k[i-1], d[i-1] = kmeans(wh / s, i)  # points, mean distance
136 |     # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
137 |     # ax = ax.ravel()
138 |     # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
139 |     # fig, ax = plt.subplots(1, 2, figsize=(14, 7))  # plot wh
140 |     # ax[0].hist(wh[wh[:, 0]<100, 0],400)
141 |     # ax[1].hist(wh[wh[:, 1]<100, 1],400)
142 |     # fig.savefig('wh.png', dpi=200)
143 | 
144 |     # Evolve
145 |     npr = np.random
146 |     f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
147 |     pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:')  # progress bar
148 |     for _ in pbar:
149 |         v = np.ones(sh)
150 |         while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
151 |             v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
152 |         kg = (k.copy() * v).clip(min=2.0)
153 |         fg = anchor_fitness(kg)
154 |         if fg > f:
155 |             f, k = fg, kg.copy()
156 |             pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
157 |             if verbose:
158 |                 print_results(k)
159 | 
160 |     return print_results(k)
161 | 


--------------------------------------------------------------------------------
/deploy/triton-inference-server/README.md:
--------------------------------------------------------------------------------
  1 | # YOLOv7 on Triton Inference Server
  2 | 
  3 | Instructions to deploy YOLOv7 as TensorRT engine to [Triton Inference Server](https://github.com/NVIDIA/triton-inference-server).
  4 | 
  5 | Triton Inference Server takes care of model deployment with many out-of-the-box benefits, like a GRPC and HTTP interface, automatic scheduling on multiple GPUs, shared memory (even on GPU), dynamic server-side batching, health metrics and memory resource management.
  6 | 
  7 | There are no additional dependencies needed to run this deployment, except a working docker daemon with GPU support.
  8 | 
  9 | ## Export TensorRT
 10 | 
 11 | See https://github.com/WongKinYiu/yolov7#export for more info.
 12 | 
 13 | ```bash
 14 | # Pytorch Yolov7 -> ONNX with grid, EfficientNMS plugin and dynamic batch size
 15 | python export.py --weights ./yolov7.pt --grid --end2end --dynamic-batch --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640
 16 | # ONNX -> TensorRT with trtexec and docker
 17 | docker run -it --rm --gpus=all nvcr.io/nvidia/tensorrt:22.06-py3
 18 | # Copy onnx -> container: docker cp yolov7.onnx <container-id>:/workspace/
 19 | # Export with FP16 precision, min batch 1, opt batch 8 and max batch 8
 20 | ./tensorrt/bin/trtexec --onnx=yolov7.onnx --minShapes=images:1x3x640x640 --optShapes=images:8x3x640x640 --maxShapes=images:8x3x640x640 --fp16 --workspace=4096 --saveEngine=yolov7-fp16-1x8x8.engine --timingCacheFile=timing.cache
 21 | # Test engine
 22 | ./tensorrt/bin/trtexec --loadEngine=yolov7-fp16-1x8x8.engine
 23 | # Copy engine -> host: docker cp <container-id>:/workspace/yolov7-fp16-1x8x8.engine .
 24 | ```
 25 | 
 26 | Example output of test with RTX 3090.
 27 | 
 28 | ```
 29 | [I] === Performance summary ===
 30 | [I] Throughput: 73.4985 qps
 31 | [I] Latency: min = 14.8578 ms, max = 15.8344 ms, mean = 15.07 ms, median = 15.0422 ms, percentile(99%) = 15.7443 ms
 32 | [I] End-to-End Host Latency: min = 25.8715 ms, max = 28.4102 ms, mean = 26.672 ms, median = 26.6082 ms, percentile(99%) = 27.8314 ms
 33 | [I] Enqueue Time: min = 0.793701 ms, max = 1.47144 ms, mean = 1.2008 ms, median = 1.28644 ms, percentile(99%) = 1.38965 ms
 34 | [I] H2D Latency: min = 1.50073 ms, max = 1.52454 ms, mean = 1.51225 ms, median = 1.51404 ms, percentile(99%) = 1.51941 ms
 35 | [I] GPU Compute Time: min = 13.3386 ms, max = 14.3186 ms, mean = 13.5448 ms, median = 13.5178 ms, percentile(99%) = 14.2151 ms
 36 | [I] D2H Latency: min = 0.00878906 ms, max = 0.0172729 ms, mean = 0.0128844 ms, median = 0.0125732 ms, percentile(99%) = 0.0166016 ms
 37 | [I] Total Host Walltime: 3.04768 s
 38 | [I] Total GPU Compute Time: 3.03404 s
 39 | [I] Explanations of the performance metrics are printed in the verbose logs.
 40 | ```
 41 | Note: 73.5 qps x batch 8 = 588 fps @ ~15ms latency.
 42 | 
 43 | ## Model Repository
 44 | 
 45 | See [Triton Model Repository Documentation](https://github.com/triton-inference-server/server/blob/main/docs/model_repository.md#model-repository) for more info.
 46 | 
 47 | ```bash
 48 | # Create folder structure
 49 | mkdir -p triton-deploy/models/yolov7/1/
 50 | touch triton-deploy/models/yolov7/config.pbtxt
 51 | # Place model
 52 | mv yolov7-fp16-1x8x8.engine triton-deploy/models/yolov7/1/model.plan
 53 | ```
 54 | 
 55 | ## Model Configuration
 56 | 
 57 | See [Triton Model Configuration Documentation](https://github.com/triton-inference-server/server/blob/main/docs/model_configuration.md#model-configuration) for more info.
 58 | 
 59 | Minimal configuration for `triton-deploy/models/yolov7/config.pbtxt`:
 60 | 
 61 | ```
 62 | name: "yolov7"
 63 | platform: "tensorrt_plan"
 64 | max_batch_size: 8
 65 | dynamic_batching { }
 66 | ```
 67 | 
 68 | Example repository:
 69 | 
 70 | ```bash
 71 | $ tree triton-deploy/
 72 | triton-deploy/
 73 | └── models
 74 |     └── yolov7
 75 |         ├── 1
 76 |         │   └── model.plan
 77 |         └── config.pbtxt
 78 | 
 79 | 3 directories, 2 files
 80 | ```
 81 | 
 82 | ## Start Triton Inference Server
 83 | 
 84 | ```
 85 | docker run --gpus all --rm --ipc=host --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -p8000:8000 -p8001:8001 -p8002:8002 -v$(pwd)/triton-deploy/models:/models nvcr.io/nvidia/tritonserver:22.06-py3 tritonserver --model-repository=/models --strict-model-config=false --log-verbose 1
 86 | ```
 87 | 
 88 | In the log you should see:
 89 | 
 90 | ```
 91 | +--------+---------+--------+
 92 | | Model  | Version | Status |
 93 | +--------+---------+--------+
 94 | | yolov7 | 1       | READY  |
 95 | +--------+---------+--------+
 96 | ```
 97 | 
 98 | ## Performance with Model Analyzer
 99 | 
100 | See [Triton Model Analyzer Documentation](https://github.com/triton-inference-server/server/blob/main/docs/model_analyzer.md#model-analyzer) for more info.
101 | 
102 | Performance numbers @ RTX 3090 + AMD Ryzen 9 5950X
103 | 
104 | Example test for 16 concurrent clients using shared memory, each with batch size 1 requests:
105 | 
106 | ```bash
107 | docker run -it --ipc=host --net=host nvcr.io/nvidia/tritonserver:22.06-py3-sdk /bin/bash
108 | 
109 | ./install/bin/perf_analyzer -m yolov7 -u 127.0.0.1:8001 -i grpc --shared-memory system --concurrency-range 16
110 | 
111 | # Result (truncated)
112 | Concurrency: 16, throughput: 590.119 infer/sec, latency 27080 usec
113 | ```
114 | 
115 | Throughput for 16 clients with batch size 1 is the same as for a single thread running the engine at 16 batch size locally thanks to Triton [Dynamic Batching Strategy](https://github.com/triton-inference-server/server/blob/main/docs/model_configuration.md#dynamic-batcher). Result without dynamic batching (disable in model configuration) considerably worse:
116 | 
117 | ```bash
118 | # Result (truncated)
119 | Concurrency: 16, throughput: 335.587 infer/sec, latency 47616 usec
120 | ```
121 | 
122 | ## How to run model in your code
123 | 
124 | Example client can be found in client.py. It can run dummy input, images and videos.
125 | 
126 | ```bash
127 | pip3 install tritonclient[all] opencv-python
128 | python3 client.py image data/dog.jpg
129 | ```
130 | 
131 | ![exemplary output result](data/dog_result.jpg)
132 | 
133 | ```
134 | $ python3 client.py --help
135 | usage: client.py [-h] [-m MODEL] [--width WIDTH] [--height HEIGHT] [-u URL] [-o OUT] [-f FPS] [-i] [-v] [-t CLIENT_TIMEOUT] [-s] [-r ROOT_CERTIFICATES] [-p PRIVATE_KEY] [-x CERTIFICATE_CHAIN] {dummy,image,video} [input]
136 | 
137 | positional arguments:
138 |   {dummy,image,video}   Run mode. 'dummy' will send an emtpy buffer to the server to test if inference works. 'image' will process an image. 'video' will process a video.
139 |   input                 Input file to load from in image or video mode
140 | 
141 | optional arguments:
142 |   -h, --help            show this help message and exit
143 |   -m MODEL, --model MODEL
144 |                         Inference model name, default yolov7
145 |   --width WIDTH         Inference model input width, default 640
146 |   --height HEIGHT       Inference model input height, default 640
147 |   -u URL, --url URL     Inference server URL, default localhost:8001
148 |   -o OUT, --out OUT     Write output into file instead of displaying it
149 |   -f FPS, --fps FPS     Video output fps, default 24.0 FPS
150 |   -i, --model-info      Print model status, configuration and statistics
151 |   -v, --verbose         Enable verbose client output
152 |   -t CLIENT_TIMEOUT, --client-timeout CLIENT_TIMEOUT
153 |                         Client timeout in seconds, default no timeout
154 |   -s, --ssl             Enable SSL encrypted channel to the server
155 |   -r ROOT_CERTIFICATES, --root-certificates ROOT_CERTIFICATES
156 |                         File holding PEM-encoded root certificates, default none
157 |   -p PRIVATE_KEY, --private-key PRIVATE_KEY
158 |                         File holding PEM-encoded private key, default is none
159 |   -x CERTIFICATE_CHAIN, --certificate-chain CERTIFICATE_CHAIN
160 |                         File holding PEM-encoded certicate chain default is none
161 | ```
162 | 


--------------------------------------------------------------------------------
/export_mask.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import sys
  3 | import time
  4 | 
  5 | sys.path.append('./')  # to run '$ python *.py' files in subdirectories
  6 | import tensorrt as trt
  7 | import pycuda.autoinit
  8 | import pycuda.driver as cuda
  9 | import numpy as np
 10 | import argparse
 11 | import time
 12 | import numpy as np
 13 | import argparse
 14 | import onnxruntime as ort
 15 | import os
 16 | import torch
 17 | import torch.backends.cudnn as cudnn
 18 | import cv2
 19 | import onnx
 20 | import yaml
 21 | from torchvision import transforms
 22 | import tqdm
 23 | 
 24 | from utils.datasets import letterbox
 25 | from utils.general import non_max_suppression_mask_conf
 26 | from detectron2.modeling.poolers import ROIPooler
 27 | from detectron2.structures import Boxes
 28 | from detectron2.utils.memory import retry_if_cuda_oom
 29 | from detectron2.layers import paste_masks_in_image
 30 | from utils.general import set_logging
 31 | from models.experimental import attempt_load
 32 | 
 33 | 
 34 | 
 35 | 
 36 | def PostProcess(img, hyp, model, inf_out, attn, bases, sem_output):
 37 |     bases = torch.cat([bases, sem_output], dim=1)
 38 |     nb, _, height, width = img.shape
 39 |     names = model.names
 40 |     pooler_scale = model.pooler_scale
 41 |     pooler = ROIPooler(output_size=hyp['mask_resolution'], scales=(pooler_scale,), sampling_ratio=1, pooler_type='ROIAlignV2', canonical_level=2)
 42 |     
 43 |     output, output_mask = non_max_suppression_mask_conf(inf_out, attn, bases, pooler, hyp, conf_thres=0.25, iou_thres=0.65, merge=False, mask_iou=None)
 44 | 
 45 |     pred, pred_masks = output[0], output_mask[0]
 46 |     base = bases[0]
 47 |     bboxes = Boxes(pred[:, :4])
 48 |     original_pred_masks = pred_masks.view(-1, hyp['mask_resolution'], hyp['mask_resolution'])
 49 |     pred_masks = retry_if_cuda_oom(paste_masks_in_image)( original_pred_masks, bboxes, (height, width), threshold=0.5)
 50 |     pred_masks_np = pred_masks.detach().cpu().numpy()
 51 |     pred_cls = pred[:, 5].detach().cpu().numpy()
 52 |     pred_conf = pred[:, 4].detach().cpu().numpy()
 53 |     nimg = img[0].permute(1, 2, 0) * 255
 54 |     nimg = nimg.cpu().numpy().astype(np.uint8)
 55 |     nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)
 56 |     nbboxes = bboxes.tensor.detach().cpu().numpy().astype(np.int)
 57 |     pnimg = nimg.copy()
 58 | 
 59 | 
 60 |     for one_mask, bbox, cls, conf in zip(pred_masks_np, nbboxes, pred_cls, pred_conf):
 61 |         if conf < 0.25:
 62 |             continue
 63 |         color = [np.random.randint(255), np.random.randint(255), np.random.randint(255)]
 64 |         print(color)       
 65 |                             
 66 |         pnimg[one_mask] = pnimg[one_mask] * 0.5 + np.array(color, dtype=np.uint8) * 0.5
 67 |         pnimg = cv2.rectangle(pnimg, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
 68 | 
 69 |     return pnimg
 70 | if __name__ == '__main__':
 71 |     parser = argparse.ArgumentParser()
 72 |     parser.add_argument('--weights', type=str, default='./yolor-csp-c.pt', help='weights path')
 73 |     parser.add_argument('--onnx_name', type=str, default='./yolov7_mask.onnx', help='onnx filename')
 74 |     parser.add_argument('--batch-size', type=int, default=1, help='batch size')
 75 |     parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')
 76 |     parser.add_argument('--dynamic-batch', action='store_true', help='dynamic batch onnx for tensorrt and onnx-runtime')
 77 |     parser.add_argument('--grid', action='store_true', help='export Detect() layer grid')
 78 |     parser.add_argument('--end2end', action='store_true', help='export end2end onnx')
 79 |     parser.add_argument('--max-wh', type=int, default=None, help='None for tensorrt nms, int value for onnx-runtime nms')
 80 |     parser.add_argument('--topk-all', type=int, default=100, help='topk objects for every images')
 81 |     parser.add_argument('--iou-thres', type=float, default=0.45, help='iou threshold for NMS')
 82 |     parser.add_argument('--conf-thres', type=float, default=0.25, help='conf threshold for NMS')
 83 |     parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
 84 |     parser.add_argument('--simplify', action='store_true', help='simplify onnx model')
 85 |     parser.add_argument('--include-nms', action='store_true', help='export end2end onnx')
 86 |     parser.add_argument('--fp16', action='store_true', help='CoreML FP16 half-precision export')
 87 |     parser.add_argument('--int8', action='store_true', help='CoreML INT8 quantization')
 88 |     parser.add_argument("--input", nargs="+", help="A file or directory of your input data ")
 89 |     parser.add_argument('--imgsz', type=int, default=320, help='image size')  # height, width
 90 |     parser.add_argument('--no_infer', action='store_true', help='CoreML FP16 half-precision export')
 91 | 
 92 |     opt = parser.parse_args()
 93 |     opt.dynamic = opt.dynamic and not opt.end2end
 94 |     opt.dynamic = False if opt.dynamic_batch else opt.dynamic
 95 |     set_logging()
 96 |     t = time.time()
 97 | 
 98 |     
 99 |     device = torch.device( "cpu")
100 |    
101 |     
102 |     with open('data/hyp.scratch.mask.yaml') as f:
103 |             hyp = yaml.load(f, Loader=yaml.FullLoader)
104 |     device = torch.device( "cpu")
105 |     weights = opt.weights
106 |     model = attempt_load(weights, map_location=device)
107 |     _ =    model.eval()
108 |    
109 |     import time
110 |     time1 = time.time()
111 |     loop = 1
112 |     for i in range(loop):
113 |         image = cv2.imread(opt.input[0])  # 504x378 image
114 |         image = letterbox(image, (opt.imgsz,opt.imgsz), stride=64, auto=True)[0]
115 |         image_ = image.copy()
116 |         image = transforms.ToTensor()(image)
117 |         image = torch.tensor(np.array([image.numpy()]))
118 |         image = image.to(device)
119 |         img = image
120 |     y = model(image)
121 |     
122 |     try:
123 |         import onnx
124 |     
125 |         print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
126 |         f ="./onnx/"+opt.onnx_name
127 |         model.eval()
128 |         output_names = ['output']
129 |         dynamic_axes = None
130 |         if opt.grid : 
131 |             model.model[-1].concat = True
132 |         torch.onnx.export(model, image, f, verbose=True, opset_version=13, input_names=['images'],
133 |                           output_names=output_names,
134 |                           dynamic_axes=dynamic_axes)
135 | 
136 |         # Checks
137 |         onnx_model = onnx.load(f)  # load onnx model
138 |         onnx.checker.check_model(onnx_model)  # check onnx model
139 |         onnx.save(onnx_model,f)
140 |         print('ONNX export success, saved as %s' % f)
141 |     except Exception as e:
142 |         print('ONNX export failure: %s' % e)
143 | 
144 |     # Finish
145 |     print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))
146 | 
147 | 
148 | if not(opt.no_infer):
149 |     f ="./onnx/"+opt.onnx_name
150 |     image_path = opt.input
151 | 
152 |     iteration = 0
153 |     start_time_all = time.time()
154 |     w = f
155 |     providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
156 |     session = ort.InferenceSession(w, providers=providers)
157 | 
158 |     model_onnx = onnx.load(w)
159 |     input_shapes = [[d.dim_value for d in _input.type.tensor_type.shape.dim] for _input in model_onnx.graph.input]
160 |     output_shapes = [[d.dim_value for d in _output.type.tensor_type.shape.dim] for _output in model_onnx.graph.output]
161 | 
162 | 
163 | 
164 |     outname = [i.name for i in session.get_outputs()]
165 | 
166 |     inname = [i.name for i in session.get_inputs()]
167 |     time_use_trt_only = 0
168 |     time_use_trt_ = 0
169 |     for img_path in tqdm.tqdm(image_path):
170 |         start_time = time.time()
171 |         image = cv2.imread(img_path)
172 |         image = letterbox(image, (opt.imgsz, opt.imgsz), stride=64, auto=True)[0]
173 |         image_letter = image.copy()
174 |         image_ = image.copy()
175 |         image = transforms.ToTensor()(image)
176 |         image = torch.tensor(np.array([image.numpy()]))  ##tensor or numpy??
177 |         img = np.array(image)
178 |         
179 |         img = np.ascontiguousarray(img, dtype=np.float32)
180 |         inp = {inname[0]:img}
181 |         output = session.run(outname, inp)[0]
182 |         output1 = session.run(outname, inp)[1]
183 |         output2 = session.run(outname, inp)[2]
184 |         output3 = session.run(outname, inp)[3]
185 |         output4 = session.run(outname, inp)[4]
186 |         output5 = session.run(outname, inp)[5]
187 |         output6 = session.run(outname, inp)[6]
188 |         inf_out, train_out = torch.tensor(output), [torch.tensor(output2),torch.tensor(output3),torch.tensor(output4)]
189 |         attn, mask_iou, bases, sem_output = torch.tensor(output1), None, torch.tensor(output5), torch.tensor(output6)
190 |         img = torch.tensor(img)
191 |         pnimg = PostProcess(img, hyp, model, inf_out, attn, bases, sem_output)
192 | 
193 |         
194 |         save_path = "./result_onnx"
195 |         cv2.imwrite(save_path+str(int(opt.imgsz))+".jpg", pnimg)
196 |         iteration+=1
197 | 


--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
  1 | # Model validation metrics
  2 | 
  3 | from pathlib import Path
  4 | 
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | from . import general
 10 | 
 11 | 
 12 | def fitness(x):
 13 |     # Model fitness as a weighted combination of metrics
 14 |     w = [0.0, 0.0, 0.1, 0.9]  # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
 15 |     return (x[:, :4] * w).sum(1)
 16 | 
 17 | 
 18 | def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=()):
 19 |     """ Compute the average precision, given the recall and precision curves.
 20 |     Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
 21 |     # Arguments
 22 |         tp:  True positives (nparray, nx1 or nx10).
 23 |         conf:  Objectness value from 0-1 (nparray).
 24 |         pred_cls:  Predicted object classes (nparray).
 25 |         target_cls:  True object classes (nparray).
 26 |         plot:  Plot precision-recall curve at mAP@0.5
 27 |         save_dir:  Plot save directory
 28 |     # Returns
 29 |         The average precision as computed in py-faster-rcnn.
 30 |     """
 31 | 
 32 |     # Sort by objectness
 33 |     i = np.argsort(-conf)
 34 |     tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
 35 | 
 36 |     # Find unique classes
 37 |     unique_classes = np.unique(target_cls)
 38 |     nc = unique_classes.shape[0]  # number of classes, number of detections
 39 | 
 40 |     # Create Precision-Recall curve and compute AP for each class
 41 |     px, py = np.linspace(0, 1, 1000), []  # for plotting
 42 |     ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
 43 |     for ci, c in enumerate(unique_classes):
 44 |         i = pred_cls == c
 45 |         n_l = (target_cls == c).sum()  # number of labels
 46 |         n_p = i.sum()  # number of predictions
 47 | 
 48 |         if n_p == 0 or n_l == 0:
 49 |             continue
 50 |         else:
 51 |             # Accumulate FPs and TPs
 52 |             fpc = (1 - tp[i]).cumsum(0)
 53 |             tpc = tp[i].cumsum(0)
 54 | 
 55 |             # Recall
 56 |             recall = tpc / (n_l + 1e-16)  # recall curve
 57 |             r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases
 58 | 
 59 |             # Precision
 60 |             precision = tpc / (tpc + fpc)  # precision curve
 61 |             p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score
 62 | 
 63 |             # AP from recall-precision curve
 64 |             for j in range(tp.shape[1]):
 65 |                 ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
 66 |                 if plot and j == 0:
 67 |                     py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
 68 | 
 69 |     # Compute F1 (harmonic mean of precision and recall)
 70 |     f1 = 2 * p * r / (p + r + 1e-16)
 71 |     if plot:
 72 |         plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names)
 73 |         plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1')
 74 |         plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision')
 75 |         plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall')
 76 | 
 77 |     i = f1.mean(0).argmax()  # max F1 index
 78 |     return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32')
 79 | 
 80 | 
 81 | def compute_ap(recall, precision):
 82 |     """ Compute the average precision, given the recall and precision curves
 83 |     # Arguments
 84 |         recall:    The recall curve (list)
 85 |         precision: The precision curve (list)
 86 |     # Returns
 87 |         Average precision, precision curve, recall curve
 88 |     """
 89 | 
 90 |     # Append sentinel values to beginning and end
 91 |     mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01]))
 92 |     mpre = np.concatenate(([1.], precision, [0.]))
 93 | 
 94 |     # Compute the precision envelope
 95 |     mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
 96 | 
 97 |     # Integrate area under curve
 98 |     method = 'interp'  # methods: 'continuous', 'interp'
 99 |     if method == 'interp':
100 |         x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
101 |         ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
102 |     else:  # 'continuous'
103 |         i = np.where(mrec[1:] != mrec[:-1])[0]  # points where x axis (recall) changes
104 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve
105 | 
106 |     return ap, mpre, mrec
107 | 
108 | 
109 | class ConfusionMatrix:
110 |     # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
111 |     def __init__(self, nc, conf=0.25, iou_thres=0.45):
112 |         self.matrix = np.zeros((nc + 1, nc + 1))
113 |         self.nc = nc  # number of classes
114 |         self.conf = conf
115 |         self.iou_thres = iou_thres
116 | 
117 |     def process_batch(self, detections, labels):
118 |         """
119 |         Return intersection-over-union (Jaccard index) of boxes.
120 |         Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
121 |         Arguments:
122 |             detections (Array[N, 6]), x1, y1, x2, y2, conf, class
123 |             labels (Array[M, 5]), class, x1, y1, x2, y2
124 |         Returns:
125 |             None, updates confusion matrix accordingly
126 |         """
127 |         detections = detections[detections[:, 4] > self.conf]
128 |         gt_classes = labels[:, 0].int()
129 |         detection_classes = detections[:, 5].int()
130 |         iou = general.box_iou(labels[:, 1:], detections[:, :4])
131 | 
132 |         x = torch.where(iou > self.iou_thres)
133 |         if x[0].shape[0]:
134 |             matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
135 |             if x[0].shape[0] > 1:
136 |                 matches = matches[matches[:, 2].argsort()[::-1]]
137 |                 matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
138 |                 matches = matches[matches[:, 2].argsort()[::-1]]
139 |                 matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
140 |         else:
141 |             matches = np.zeros((0, 3))
142 | 
143 |         n = matches.shape[0] > 0
144 |         m0, m1, _ = matches.transpose().astype(np.int16)
145 |         for i, gc in enumerate(gt_classes):
146 |             j = m0 == i
147 |             if n and sum(j) == 1:
148 |                 self.matrix[gc, detection_classes[m1[j]]] += 1  # correct
149 |             else:
150 |                 self.matrix[self.nc, gc] += 1  # background FP
151 | 
152 |         if n:
153 |             for i, dc in enumerate(detection_classes):
154 |                 if not any(m1 == i):
155 |                     self.matrix[dc, self.nc] += 1  # background FN
156 | 
157 |     def matrix(self):
158 |         return self.matrix
159 | 
160 |     def plot(self, save_dir='', names=()):
161 |         try:
162 |             import seaborn as sn
163 | 
164 |             array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6)  # normalize
165 |             array[array < 0.005] = np.nan  # don't annotate (would appear as 0.00)
166 | 
167 |             fig = plt.figure(figsize=(12, 9), tight_layout=True)
168 |             sn.set(font_scale=1.0 if self.nc < 50 else 0.8)  # for label size
169 |             labels = (0 < len(names) < 99) and len(names) == self.nc  # apply names to ticklabels
170 |             sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True,
171 |                        xticklabels=names + ['background FP'] if labels else "auto",
172 |                        yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1))
173 |             fig.axes[0].set_xlabel('True')
174 |             fig.axes[0].set_ylabel('Predicted')
175 |             fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
176 |         except Exception as e:
177 |             pass
178 | 
179 |     def print(self):
180 |         for i in range(self.nc + 1):
181 |             print(' '.join(map(str, self.matrix[i])))
182 | 
183 | 
184 | # Plots ----------------------------------------------------------------------------------------------------------------
185 | 
186 | def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()):
187 |     # Precision-recall curve
188 |     fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
189 |     py = np.stack(py, axis=1)
190 | 
191 |     if 0 < len(names) < 21:  # display per-class legend if < 21 classes
192 |         for i, y in enumerate(py.T):
193 |             ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}')  # plot(recall, precision)
194 |     else:
195 |         ax.plot(px, py, linewidth=1, color='grey')  # plot(recall, precision)
196 | 
197 |     ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
198 |     ax.set_xlabel('Recall')
199 |     ax.set_ylabel('Precision')
200 |     ax.set_xlim(0, 1)
201 |     ax.set_ylim(0, 1)
202 |     plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
203 |     fig.savefig(Path(save_dir), dpi=250)
204 | 
205 | 
206 | def plot_mc_curve(px, py, save_dir='mc_curve.png', names=(), xlabel='Confidence', ylabel='Metric'):
207 |     # Metric-confidence curve
208 |     fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
209 | 
210 |     if 0 < len(names) < 21:  # display per-class legend if < 21 classes
211 |         for i, y in enumerate(py):
212 |             ax.plot(px, y, linewidth=1, label=f'{names[i]}')  # plot(confidence, metric)
213 |     else:
214 |         ax.plot(px, py.T, linewidth=1, color='grey')  # plot(confidence, metric)
215 | 
216 |     y = py.mean(0)
217 |     ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}')
218 |     ax.set_xlabel(xlabel)
219 |     ax.set_ylabel(ylabel)
220 |     ax.set_xlim(0, 1)
221 |     ax.set_ylim(0, 1)
222 |     plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
223 |     fig.savefig(Path(save_dir), dpi=250)
224 | 


--------------------------------------------------------------------------------
/cfg/deploy/yolov7-e6e.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | 
  6 | # anchors
  7 | anchors:
  8 |   - [ 19,27,  44,40,  38,94 ]  # P3/8
  9 |   - [ 96,68,  86,152,  180,137 ]  # P4/16
 10 |   - [ 140,301,  303,264,  238,542 ]  # P5/32
 11 |   - [ 436,615,  739,380,  925,792 ]  # P6/64
 12 | 
 13 | # yolov7-e6e backbone
 14 | backbone:
 15 |   # [from, number, module, args],
 16 |   [[-1, 1, ReOrg, []],  # 0
 17 |    [-1, 1, Conv, [80, 3, 1]],  # 1-P1/2
 18 |    
 19 |    [-1, 1, DownC, [160]],  # 2-P2/4  
 20 |    [-1, 1, Conv, [64, 1, 1]],
 21 |    [-2, 1, Conv, [64, 1, 1]],
 22 |    [-1, 1, Conv, [64, 3, 1]],
 23 |    [-1, 1, Conv, [64, 3, 1]],
 24 |    [-1, 1, Conv, [64, 3, 1]],
 25 |    [-1, 1, Conv, [64, 3, 1]],
 26 |    [-1, 1, Conv, [64, 3, 1]],
 27 |    [-1, 1, Conv, [64, 3, 1]],
 28 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 29 |    [-1, 1, Conv, [160, 1, 1]],  # 12
 30 |    [-11, 1, Conv, [64, 1, 1]],
 31 |    [-12, 1, Conv, [64, 1, 1]],
 32 |    [-1, 1, Conv, [64, 3, 1]],
 33 |    [-1, 1, Conv, [64, 3, 1]],
 34 |    [-1, 1, Conv, [64, 3, 1]],
 35 |    [-1, 1, Conv, [64, 3, 1]],
 36 |    [-1, 1, Conv, [64, 3, 1]],
 37 |    [-1, 1, Conv, [64, 3, 1]],
 38 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 39 |    [-1, 1, Conv, [160, 1, 1]],  # 22
 40 |    [[-1, -11], 1, Shortcut, [1]],  # 23
 41 |          
 42 |    [-1, 1, DownC, [320]],  # 24-P3/8  
 43 |    [-1, 1, Conv, [128, 1, 1]],
 44 |    [-2, 1, Conv, [128, 1, 1]],
 45 |    [-1, 1, Conv, [128, 3, 1]],
 46 |    [-1, 1, Conv, [128, 3, 1]],
 47 |    [-1, 1, Conv, [128, 3, 1]],
 48 |    [-1, 1, Conv, [128, 3, 1]],
 49 |    [-1, 1, Conv, [128, 3, 1]],
 50 |    [-1, 1, Conv, [128, 3, 1]],
 51 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 52 |    [-1, 1, Conv, [320, 1, 1]],  # 34
 53 |    [-11, 1, Conv, [128, 1, 1]],
 54 |    [-12, 1, Conv, [128, 1, 1]],
 55 |    [-1, 1, Conv, [128, 3, 1]],
 56 |    [-1, 1, Conv, [128, 3, 1]],
 57 |    [-1, 1, Conv, [128, 3, 1]],
 58 |    [-1, 1, Conv, [128, 3, 1]],
 59 |    [-1, 1, Conv, [128, 3, 1]],
 60 |    [-1, 1, Conv, [128, 3, 1]],
 61 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 62 |    [-1, 1, Conv, [320, 1, 1]],  # 44
 63 |    [[-1, -11], 1, Shortcut, [1]],  # 45
 64 |          
 65 |    [-1, 1, DownC, [640]],  # 46-P4/16  
 66 |    [-1, 1, Conv, [256, 1, 1]],
 67 |    [-2, 1, Conv, [256, 1, 1]],
 68 |    [-1, 1, Conv, [256, 3, 1]],
 69 |    [-1, 1, Conv, [256, 3, 1]],
 70 |    [-1, 1, Conv, [256, 3, 1]],
 71 |    [-1, 1, Conv, [256, 3, 1]],
 72 |    [-1, 1, Conv, [256, 3, 1]],
 73 |    [-1, 1, Conv, [256, 3, 1]],
 74 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 75 |    [-1, 1, Conv, [640, 1, 1]],  # 56
 76 |    [-11, 1, Conv, [256, 1, 1]],
 77 |    [-12, 1, Conv, [256, 1, 1]],
 78 |    [-1, 1, Conv, [256, 3, 1]],
 79 |    [-1, 1, Conv, [256, 3, 1]],
 80 |    [-1, 1, Conv, [256, 3, 1]],
 81 |    [-1, 1, Conv, [256, 3, 1]],
 82 |    [-1, 1, Conv, [256, 3, 1]],
 83 |    [-1, 1, Conv, [256, 3, 1]],
 84 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 85 |    [-1, 1, Conv, [640, 1, 1]],  # 66
 86 |    [[-1, -11], 1, Shortcut, [1]],  # 67
 87 |          
 88 |    [-1, 1, DownC, [960]],  # 68-P5/32  
 89 |    [-1, 1, Conv, [384, 1, 1]],
 90 |    [-2, 1, Conv, [384, 1, 1]],
 91 |    [-1, 1, Conv, [384, 3, 1]],
 92 |    [-1, 1, Conv, [384, 3, 1]],
 93 |    [-1, 1, Conv, [384, 3, 1]],
 94 |    [-1, 1, Conv, [384, 3, 1]],
 95 |    [-1, 1, Conv, [384, 3, 1]],
 96 |    [-1, 1, Conv, [384, 3, 1]],
 97 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 98 |    [-1, 1, Conv, [960, 1, 1]],  # 78
 99 |    [-11, 1, Conv, [384, 1, 1]],
100 |    [-12, 1, Conv, [384, 1, 1]],
101 |    [-1, 1, Conv, [384, 3, 1]],
102 |    [-1, 1, Conv, [384, 3, 1]],
103 |    [-1, 1, Conv, [384, 3, 1]],
104 |    [-1, 1, Conv, [384, 3, 1]],
105 |    [-1, 1, Conv, [384, 3, 1]],
106 |    [-1, 1, Conv, [384, 3, 1]],
107 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
108 |    [-1, 1, Conv, [960, 1, 1]],  # 88
109 |    [[-1, -11], 1, Shortcut, [1]],  # 89
110 |          
111 |    [-1, 1, DownC, [1280]],  # 90-P6/64  
112 |    [-1, 1, Conv, [512, 1, 1]],
113 |    [-2, 1, Conv, [512, 1, 1]],
114 |    [-1, 1, Conv, [512, 3, 1]],
115 |    [-1, 1, Conv, [512, 3, 1]],
116 |    [-1, 1, Conv, [512, 3, 1]],
117 |    [-1, 1, Conv, [512, 3, 1]],
118 |    [-1, 1, Conv, [512, 3, 1]],
119 |    [-1, 1, Conv, [512, 3, 1]],
120 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
121 |    [-1, 1, Conv, [1280, 1, 1]],  # 100 
122 |    [-11, 1, Conv, [512, 1, 1]],
123 |    [-12, 1, Conv, [512, 1, 1]],
124 |    [-1, 1, Conv, [512, 3, 1]],
125 |    [-1, 1, Conv, [512, 3, 1]],
126 |    [-1, 1, Conv, [512, 3, 1]],
127 |    [-1, 1, Conv, [512, 3, 1]],
128 |    [-1, 1, Conv, [512, 3, 1]],
129 |    [-1, 1, Conv, [512, 3, 1]],
130 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
131 |    [-1, 1, Conv, [1280, 1, 1]],  # 110
132 |    [[-1, -11], 1, Shortcut, [1]],  # 111 
133 |   ]
134 | 
135 | # yolov7-e6e head
136 | head:
137 |   [[-1, 1, SPPCSPC, [640]], # 112
138 |   
139 |    [-1, 1, Conv, [480, 1, 1]],
140 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
141 |    [89, 1, Conv, [480, 1, 1]], # route backbone P5
142 |    [[-1, -2], 1, Concat, [1]],
143 |    
144 |    [-1, 1, Conv, [384, 1, 1]],
145 |    [-2, 1, Conv, [384, 1, 1]],
146 |    [-1, 1, Conv, [192, 3, 1]],
147 |    [-1, 1, Conv, [192, 3, 1]],
148 |    [-1, 1, Conv, [192, 3, 1]],
149 |    [-1, 1, Conv, [192, 3, 1]],
150 |    [-1, 1, Conv, [192, 3, 1]],
151 |    [-1, 1, Conv, [192, 3, 1]],
152 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
153 |    [-1, 1, Conv, [480, 1, 1]], # 126
154 |    [-11, 1, Conv, [384, 1, 1]],
155 |    [-12, 1, Conv, [384, 1, 1]],
156 |    [-1, 1, Conv, [192, 3, 1]],
157 |    [-1, 1, Conv, [192, 3, 1]],
158 |    [-1, 1, Conv, [192, 3, 1]],
159 |    [-1, 1, Conv, [192, 3, 1]],
160 |    [-1, 1, Conv, [192, 3, 1]],
161 |    [-1, 1, Conv, [192, 3, 1]],
162 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
163 |    [-1, 1, Conv, [480, 1, 1]], # 136
164 |    [[-1, -11], 1, Shortcut, [1]],  # 137
165 |   
166 |    [-1, 1, Conv, [320, 1, 1]],
167 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
168 |    [67, 1, Conv, [320, 1, 1]], # route backbone P4
169 |    [[-1, -2], 1, Concat, [1]],
170 |    
171 |    [-1, 1, Conv, [256, 1, 1]],
172 |    [-2, 1, Conv, [256, 1, 1]],
173 |    [-1, 1, Conv, [128, 3, 1]],
174 |    [-1, 1, Conv, [128, 3, 1]],
175 |    [-1, 1, Conv, [128, 3, 1]],
176 |    [-1, 1, Conv, [128, 3, 1]],
177 |    [-1, 1, Conv, [128, 3, 1]],
178 |    [-1, 1, Conv, [128, 3, 1]],
179 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
180 |    [-1, 1, Conv, [320, 1, 1]], # 151
181 |    [-11, 1, Conv, [256, 1, 1]],
182 |    [-12, 1, Conv, [256, 1, 1]],
183 |    [-1, 1, Conv, [128, 3, 1]],
184 |    [-1, 1, Conv, [128, 3, 1]],
185 |    [-1, 1, Conv, [128, 3, 1]],
186 |    [-1, 1, Conv, [128, 3, 1]],
187 |    [-1, 1, Conv, [128, 3, 1]],
188 |    [-1, 1, Conv, [128, 3, 1]],
189 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
190 |    [-1, 1, Conv, [320, 1, 1]], # 161
191 |    [[-1, -11], 1, Shortcut, [1]],  # 162
192 |    
193 |    [-1, 1, Conv, [160, 1, 1]],
194 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
195 |    [45, 1, Conv, [160, 1, 1]], # route backbone P3
196 |    [[-1, -2], 1, Concat, [1]],
197 |    
198 |    [-1, 1, Conv, [128, 1, 1]],
199 |    [-2, 1, Conv, [128, 1, 1]],
200 |    [-1, 1, Conv, [64, 3, 1]],
201 |    [-1, 1, Conv, [64, 3, 1]],
202 |    [-1, 1, Conv, [64, 3, 1]],
203 |    [-1, 1, Conv, [64, 3, 1]],
204 |    [-1, 1, Conv, [64, 3, 1]],
205 |    [-1, 1, Conv, [64, 3, 1]],
206 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
207 |    [-1, 1, Conv, [160, 1, 1]], # 176
208 |    [-11, 1, Conv, [128, 1, 1]],
209 |    [-12, 1, Conv, [128, 1, 1]],
210 |    [-1, 1, Conv, [64, 3, 1]],
211 |    [-1, 1, Conv, [64, 3, 1]],
212 |    [-1, 1, Conv, [64, 3, 1]],
213 |    [-1, 1, Conv, [64, 3, 1]],
214 |    [-1, 1, Conv, [64, 3, 1]],
215 |    [-1, 1, Conv, [64, 3, 1]],
216 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
217 |    [-1, 1, Conv, [160, 1, 1]], # 186
218 |    [[-1, -11], 1, Shortcut, [1]],  # 187
219 |       
220 |    [-1, 1, DownC, [320]],
221 |    [[-1, 162], 1, Concat, [1]],
222 |    
223 |    [-1, 1, Conv, [256, 1, 1]],
224 |    [-2, 1, Conv, [256, 1, 1]],
225 |    [-1, 1, Conv, [128, 3, 1]],
226 |    [-1, 1, Conv, [128, 3, 1]],
227 |    [-1, 1, Conv, [128, 3, 1]],
228 |    [-1, 1, Conv, [128, 3, 1]],
229 |    [-1, 1, Conv, [128, 3, 1]],
230 |    [-1, 1, Conv, [128, 3, 1]],
231 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
232 |    [-1, 1, Conv, [320, 1, 1]], # 199
233 |    [-11, 1, Conv, [256, 1, 1]],
234 |    [-12, 1, Conv, [256, 1, 1]],
235 |    [-1, 1, Conv, [128, 3, 1]],
236 |    [-1, 1, Conv, [128, 3, 1]],
237 |    [-1, 1, Conv, [128, 3, 1]],
238 |    [-1, 1, Conv, [128, 3, 1]],
239 |    [-1, 1, Conv, [128, 3, 1]],
240 |    [-1, 1, Conv, [128, 3, 1]],
241 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
242 |    [-1, 1, Conv, [320, 1, 1]], # 209
243 |    [[-1, -11], 1, Shortcut, [1]],  # 210
244 |       
245 |    [-1, 1, DownC, [480]],
246 |    [[-1, 137], 1, Concat, [1]],
247 |    
248 |    [-1, 1, Conv, [384, 1, 1]],
249 |    [-2, 1, Conv, [384, 1, 1]],
250 |    [-1, 1, Conv, [192, 3, 1]],
251 |    [-1, 1, Conv, [192, 3, 1]],
252 |    [-1, 1, Conv, [192, 3, 1]],
253 |    [-1, 1, Conv, [192, 3, 1]],
254 |    [-1, 1, Conv, [192, 3, 1]],
255 |    [-1, 1, Conv, [192, 3, 1]],
256 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
257 |    [-1, 1, Conv, [480, 1, 1]], # 222
258 |    [-11, 1, Conv, [384, 1, 1]],
259 |    [-12, 1, Conv, [384, 1, 1]],
260 |    [-1, 1, Conv, [192, 3, 1]],
261 |    [-1, 1, Conv, [192, 3, 1]],
262 |    [-1, 1, Conv, [192, 3, 1]],
263 |    [-1, 1, Conv, [192, 3, 1]],
264 |    [-1, 1, Conv, [192, 3, 1]],
265 |    [-1, 1, Conv, [192, 3, 1]],
266 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
267 |    [-1, 1, Conv, [480, 1, 1]], # 232
268 |    [[-1, -11], 1, Shortcut, [1]],  # 233
269 |       
270 |    [-1, 1, DownC, [640]],
271 |    [[-1, 112], 1, Concat, [1]],
272 |    
273 |    [-1, 1, Conv, [512, 1, 1]],
274 |    [-2, 1, Conv, [512, 1, 1]],
275 |    [-1, 1, Conv, [256, 3, 1]],
276 |    [-1, 1, Conv, [256, 3, 1]],
277 |    [-1, 1, Conv, [256, 3, 1]],
278 |    [-1, 1, Conv, [256, 3, 1]],
279 |    [-1, 1, Conv, [256, 3, 1]],
280 |    [-1, 1, Conv, [256, 3, 1]],
281 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
282 |    [-1, 1, Conv, [640, 1, 1]], # 245
283 |    [-11, 1, Conv, [512, 1, 1]],
284 |    [-12, 1, Conv, [512, 1, 1]],
285 |    [-1, 1, Conv, [256, 3, 1]],
286 |    [-1, 1, Conv, [256, 3, 1]],
287 |    [-1, 1, Conv, [256, 3, 1]],
288 |    [-1, 1, Conv, [256, 3, 1]],
289 |    [-1, 1, Conv, [256, 3, 1]],
290 |    [-1, 1, Conv, [256, 3, 1]],
291 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
292 |    [-1, 1, Conv, [640, 1, 1]], # 255
293 |    [[-1, -11], 1, Shortcut, [1]],  # 256
294 |    
295 |    [187, 1, Conv, [320, 3, 1]],
296 |    [210, 1, Conv, [640, 3, 1]],
297 |    [233, 1, Conv, [960, 3, 1]],
298 |    [256, 1, Conv, [1280, 3, 1]],
299 | 
300 |    [[257,258,259,260], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5, P6)
301 |   ]
302 | 


--------------------------------------------------------------------------------
/cfg/training/yolov7-e6e.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | 
  6 | # anchors
  7 | anchors:
  8 |   - [ 19,27,  44,40,  38,94 ]  # P3/8
  9 |   - [ 96,68,  86,152,  180,137 ]  # P4/16
 10 |   - [ 140,301,  303,264,  238,542 ]  # P5/32
 11 |   - [ 436,615,  739,380,  925,792 ]  # P6/64
 12 | 
 13 | # yolov7 backbone
 14 | backbone:
 15 |   # [from, number, module, args],
 16 |   [[-1, 1, ReOrg, []],  # 0
 17 |    [-1, 1, Conv, [80, 3, 1]],  # 1-P1/2
 18 |    
 19 |    [-1, 1, DownC, [160]],  # 2-P2/4  
 20 |    [-1, 1, Conv, [64, 1, 1]],
 21 |    [-2, 1, Conv, [64, 1, 1]],
 22 |    [-1, 1, Conv, [64, 3, 1]],
 23 |    [-1, 1, Conv, [64, 3, 1]],
 24 |    [-1, 1, Conv, [64, 3, 1]],
 25 |    [-1, 1, Conv, [64, 3, 1]],
 26 |    [-1, 1, Conv, [64, 3, 1]],
 27 |    [-1, 1, Conv, [64, 3, 1]],
 28 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 29 |    [-1, 1, Conv, [160, 1, 1]],  # 12
 30 |    [-11, 1, Conv, [64, 1, 1]],
 31 |    [-12, 1, Conv, [64, 1, 1]],
 32 |    [-1, 1, Conv, [64, 3, 1]],
 33 |    [-1, 1, Conv, [64, 3, 1]],
 34 |    [-1, 1, Conv, [64, 3, 1]],
 35 |    [-1, 1, Conv, [64, 3, 1]],
 36 |    [-1, 1, Conv, [64, 3, 1]],
 37 |    [-1, 1, Conv, [64, 3, 1]],
 38 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 39 |    [-1, 1, Conv, [160, 1, 1]],  # 22
 40 |    [[-1, -11], 1, Shortcut, [1]],  # 23
 41 |          
 42 |    [-1, 1, DownC, [320]],  # 24-P3/8  
 43 |    [-1, 1, Conv, [128, 1, 1]],
 44 |    [-2, 1, Conv, [128, 1, 1]],
 45 |    [-1, 1, Conv, [128, 3, 1]],
 46 |    [-1, 1, Conv, [128, 3, 1]],
 47 |    [-1, 1, Conv, [128, 3, 1]],
 48 |    [-1, 1, Conv, [128, 3, 1]],
 49 |    [-1, 1, Conv, [128, 3, 1]],
 50 |    [-1, 1, Conv, [128, 3, 1]],
 51 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 52 |    [-1, 1, Conv, [320, 1, 1]],  # 34
 53 |    [-11, 1, Conv, [128, 1, 1]],
 54 |    [-12, 1, Conv, [128, 1, 1]],
 55 |    [-1, 1, Conv, [128, 3, 1]],
 56 |    [-1, 1, Conv, [128, 3, 1]],
 57 |    [-1, 1, Conv, [128, 3, 1]],
 58 |    [-1, 1, Conv, [128, 3, 1]],
 59 |    [-1, 1, Conv, [128, 3, 1]],
 60 |    [-1, 1, Conv, [128, 3, 1]],
 61 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 62 |    [-1, 1, Conv, [320, 1, 1]],  # 44
 63 |    [[-1, -11], 1, Shortcut, [1]],  # 45
 64 |          
 65 |    [-1, 1, DownC, [640]],  # 46-P4/16  
 66 |    [-1, 1, Conv, [256, 1, 1]],
 67 |    [-2, 1, Conv, [256, 1, 1]],
 68 |    [-1, 1, Conv, [256, 3, 1]],
 69 |    [-1, 1, Conv, [256, 3, 1]],
 70 |    [-1, 1, Conv, [256, 3, 1]],
 71 |    [-1, 1, Conv, [256, 3, 1]],
 72 |    [-1, 1, Conv, [256, 3, 1]],
 73 |    [-1, 1, Conv, [256, 3, 1]],
 74 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 75 |    [-1, 1, Conv, [640, 1, 1]],  # 56
 76 |    [-11, 1, Conv, [256, 1, 1]],
 77 |    [-12, 1, Conv, [256, 1, 1]],
 78 |    [-1, 1, Conv, [256, 3, 1]],
 79 |    [-1, 1, Conv, [256, 3, 1]],
 80 |    [-1, 1, Conv, [256, 3, 1]],
 81 |    [-1, 1, Conv, [256, 3, 1]],
 82 |    [-1, 1, Conv, [256, 3, 1]],
 83 |    [-1, 1, Conv, [256, 3, 1]],
 84 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 85 |    [-1, 1, Conv, [640, 1, 1]],  # 66
 86 |    [[-1, -11], 1, Shortcut, [1]],  # 67
 87 |          
 88 |    [-1, 1, DownC, [960]],  # 68-P5/32  
 89 |    [-1, 1, Conv, [384, 1, 1]],
 90 |    [-2, 1, Conv, [384, 1, 1]],
 91 |    [-1, 1, Conv, [384, 3, 1]],
 92 |    [-1, 1, Conv, [384, 3, 1]],
 93 |    [-1, 1, Conv, [384, 3, 1]],
 94 |    [-1, 1, Conv, [384, 3, 1]],
 95 |    [-1, 1, Conv, [384, 3, 1]],
 96 |    [-1, 1, Conv, [384, 3, 1]],
 97 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
 98 |    [-1, 1, Conv, [960, 1, 1]],  # 78
 99 |    [-11, 1, Conv, [384, 1, 1]],
100 |    [-12, 1, Conv, [384, 1, 1]],
101 |    [-1, 1, Conv, [384, 3, 1]],
102 |    [-1, 1, Conv, [384, 3, 1]],
103 |    [-1, 1, Conv, [384, 3, 1]],
104 |    [-1, 1, Conv, [384, 3, 1]],
105 |    [-1, 1, Conv, [384, 3, 1]],
106 |    [-1, 1, Conv, [384, 3, 1]],
107 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
108 |    [-1, 1, Conv, [960, 1, 1]],  # 88
109 |    [[-1, -11], 1, Shortcut, [1]],  # 89
110 |          
111 |    [-1, 1, DownC, [1280]],  # 90-P6/64  
112 |    [-1, 1, Conv, [512, 1, 1]],
113 |    [-2, 1, Conv, [512, 1, 1]],
114 |    [-1, 1, Conv, [512, 3, 1]],
115 |    [-1, 1, Conv, [512, 3, 1]],
116 |    [-1, 1, Conv, [512, 3, 1]],
117 |    [-1, 1, Conv, [512, 3, 1]],
118 |    [-1, 1, Conv, [512, 3, 1]],
119 |    [-1, 1, Conv, [512, 3, 1]],
120 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
121 |    [-1, 1, Conv, [1280, 1, 1]],  # 100 
122 |    [-11, 1, Conv, [512, 1, 1]],
123 |    [-12, 1, Conv, [512, 1, 1]],
124 |    [-1, 1, Conv, [512, 3, 1]],
125 |    [-1, 1, Conv, [512, 3, 1]],
126 |    [-1, 1, Conv, [512, 3, 1]],
127 |    [-1, 1, Conv, [512, 3, 1]],
128 |    [-1, 1, Conv, [512, 3, 1]],
129 |    [-1, 1, Conv, [512, 3, 1]],
130 |    [[-1, -3, -5, -7, -8], 1, Concat, [1]],
131 |    [-1, 1, Conv, [1280, 1, 1]],  # 110
132 |    [[-1, -11], 1, Shortcut, [1]],  # 111 
133 |   ]
134 | 
135 | # yolov7 head
136 | head:
137 |   [[-1, 1, SPPCSPC, [640]], # 112
138 |   
139 |    [-1, 1, Conv, [480, 1, 1]],
140 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
141 |    [89, 1, Conv, [480, 1, 1]], # route backbone P5
142 |    [[-1, -2], 1, Concat, [1]],
143 |    
144 |    [-1, 1, Conv, [384, 1, 1]],
145 |    [-2, 1, Conv, [384, 1, 1]],
146 |    [-1, 1, Conv, [192, 3, 1]],
147 |    [-1, 1, Conv, [192, 3, 1]],
148 |    [-1, 1, Conv, [192, 3, 1]],
149 |    [-1, 1, Conv, [192, 3, 1]],
150 |    [-1, 1, Conv, [192, 3, 1]],
151 |    [-1, 1, Conv, [192, 3, 1]],
152 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
153 |    [-1, 1, Conv, [480, 1, 1]], # 126
154 |    [-11, 1, Conv, [384, 1, 1]],
155 |    [-12, 1, Conv, [384, 1, 1]],
156 |    [-1, 1, Conv, [192, 3, 1]],
157 |    [-1, 1, Conv, [192, 3, 1]],
158 |    [-1, 1, Conv, [192, 3, 1]],
159 |    [-1, 1, Conv, [192, 3, 1]],
160 |    [-1, 1, Conv, [192, 3, 1]],
161 |    [-1, 1, Conv, [192, 3, 1]],
162 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
163 |    [-1, 1, Conv, [480, 1, 1]], # 136
164 |    [[-1, -11], 1, Shortcut, [1]],  # 137
165 |   
166 |    [-1, 1, Conv, [320, 1, 1]],
167 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
168 |    [67, 1, Conv, [320, 1, 1]], # route backbone P4
169 |    [[-1, -2], 1, Concat, [1]],
170 |    
171 |    [-1, 1, Conv, [256, 1, 1]],
172 |    [-2, 1, Conv, [256, 1, 1]],
173 |    [-1, 1, Conv, [128, 3, 1]],
174 |    [-1, 1, Conv, [128, 3, 1]],
175 |    [-1, 1, Conv, [128, 3, 1]],
176 |    [-1, 1, Conv, [128, 3, 1]],
177 |    [-1, 1, Conv, [128, 3, 1]],
178 |    [-1, 1, Conv, [128, 3, 1]],
179 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
180 |    [-1, 1, Conv, [320, 1, 1]], # 151
181 |    [-11, 1, Conv, [256, 1, 1]],
182 |    [-12, 1, Conv, [256, 1, 1]],
183 |    [-1, 1, Conv, [128, 3, 1]],
184 |    [-1, 1, Conv, [128, 3, 1]],
185 |    [-1, 1, Conv, [128, 3, 1]],
186 |    [-1, 1, Conv, [128, 3, 1]],
187 |    [-1, 1, Conv, [128, 3, 1]],
188 |    [-1, 1, Conv, [128, 3, 1]],
189 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
190 |    [-1, 1, Conv, [320, 1, 1]], # 161
191 |    [[-1, -11], 1, Shortcut, [1]],  # 162
192 |    
193 |    [-1, 1, Conv, [160, 1, 1]],
194 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
195 |    [45, 1, Conv, [160, 1, 1]], # route backbone P3
196 |    [[-1, -2], 1, Concat, [1]],
197 |    
198 |    [-1, 1, Conv, [128, 1, 1]],
199 |    [-2, 1, Conv, [128, 1, 1]],
200 |    [-1, 1, Conv, [64, 3, 1]],
201 |    [-1, 1, Conv, [64, 3, 1]],
202 |    [-1, 1, Conv, [64, 3, 1]],
203 |    [-1, 1, Conv, [64, 3, 1]],
204 |    [-1, 1, Conv, [64, 3, 1]],
205 |    [-1, 1, Conv, [64, 3, 1]],
206 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
207 |    [-1, 1, Conv, [160, 1, 1]], # 176
208 |    [-11, 1, Conv, [128, 1, 1]],
209 |    [-12, 1, Conv, [128, 1, 1]],
210 |    [-1, 1, Conv, [64, 3, 1]],
211 |    [-1, 1, Conv, [64, 3, 1]],
212 |    [-1, 1, Conv, [64, 3, 1]],
213 |    [-1, 1, Conv, [64, 3, 1]],
214 |    [-1, 1, Conv, [64, 3, 1]],
215 |    [-1, 1, Conv, [64, 3, 1]],
216 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
217 |    [-1, 1, Conv, [160, 1, 1]], # 186
218 |    [[-1, -11], 1, Shortcut, [1]],  # 187
219 |       
220 |    [-1, 1, DownC, [320]],
221 |    [[-1, 162], 1, Concat, [1]],
222 |    
223 |    [-1, 1, Conv, [256, 1, 1]],
224 |    [-2, 1, Conv, [256, 1, 1]],
225 |    [-1, 1, Conv, [128, 3, 1]],
226 |    [-1, 1, Conv, [128, 3, 1]],
227 |    [-1, 1, Conv, [128, 3, 1]],
228 |    [-1, 1, Conv, [128, 3, 1]],
229 |    [-1, 1, Conv, [128, 3, 1]],
230 |    [-1, 1, Conv, [128, 3, 1]],
231 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
232 |    [-1, 1, Conv, [320, 1, 1]], # 199
233 |    [-11, 1, Conv, [256, 1, 1]],
234 |    [-12, 1, Conv, [256, 1, 1]],
235 |    [-1, 1, Conv, [128, 3, 1]],
236 |    [-1, 1, Conv, [128, 3, 1]],
237 |    [-1, 1, Conv, [128, 3, 1]],
238 |    [-1, 1, Conv, [128, 3, 1]],
239 |    [-1, 1, Conv, [128, 3, 1]],
240 |    [-1, 1, Conv, [128, 3, 1]],
241 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
242 |    [-1, 1, Conv, [320, 1, 1]], # 209
243 |    [[-1, -11], 1, Shortcut, [1]],  # 210
244 |       
245 |    [-1, 1, DownC, [480]],
246 |    [[-1, 137], 1, Concat, [1]],
247 |    
248 |    [-1, 1, Conv, [384, 1, 1]],
249 |    [-2, 1, Conv, [384, 1, 1]],
250 |    [-1, 1, Conv, [192, 3, 1]],
251 |    [-1, 1, Conv, [192, 3, 1]],
252 |    [-1, 1, Conv, [192, 3, 1]],
253 |    [-1, 1, Conv, [192, 3, 1]],
254 |    [-1, 1, Conv, [192, 3, 1]],
255 |    [-1, 1, Conv, [192, 3, 1]],
256 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
257 |    [-1, 1, Conv, [480, 1, 1]], # 222
258 |    [-11, 1, Conv, [384, 1, 1]],
259 |    [-12, 1, Conv, [384, 1, 1]],
260 |    [-1, 1, Conv, [192, 3, 1]],
261 |    [-1, 1, Conv, [192, 3, 1]],
262 |    [-1, 1, Conv, [192, 3, 1]],
263 |    [-1, 1, Conv, [192, 3, 1]],
264 |    [-1, 1, Conv, [192, 3, 1]],
265 |    [-1, 1, Conv, [192, 3, 1]],
266 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
267 |    [-1, 1, Conv, [480, 1, 1]], # 232
268 |    [[-1, -11], 1, Shortcut, [1]],  # 233
269 |       
270 |    [-1, 1, DownC, [640]],
271 |    [[-1, 112], 1, Concat, [1]],
272 |    
273 |    [-1, 1, Conv, [512, 1, 1]],
274 |    [-2, 1, Conv, [512, 1, 1]],
275 |    [-1, 1, Conv, [256, 3, 1]],
276 |    [-1, 1, Conv, [256, 3, 1]],
277 |    [-1, 1, Conv, [256, 3, 1]],
278 |    [-1, 1, Conv, [256, 3, 1]],
279 |    [-1, 1, Conv, [256, 3, 1]],
280 |    [-1, 1, Conv, [256, 3, 1]],
281 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
282 |    [-1, 1, Conv, [640, 1, 1]], # 245
283 |    [-11, 1, Conv, [512, 1, 1]],
284 |    [-12, 1, Conv, [512, 1, 1]],
285 |    [-1, 1, Conv, [256, 3, 1]],
286 |    [-1, 1, Conv, [256, 3, 1]],
287 |    [-1, 1, Conv, [256, 3, 1]],
288 |    [-1, 1, Conv, [256, 3, 1]],
289 |    [-1, 1, Conv, [256, 3, 1]],
290 |    [-1, 1, Conv, [256, 3, 1]],
291 |    [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]],
292 |    [-1, 1, Conv, [640, 1, 1]], # 255
293 |    [[-1, -11], 1, Shortcut, [1]],  # 256
294 |    
295 |    [187, 1, Conv, [320, 3, 1]],
296 |    [210, 1, Conv, [640, 3, 1]],
297 |    [233, 1, Conv, [960, 3, 1]],
298 |    [256, 1, Conv, [1280, 3, 1]],
299 |    
300 |    [186, 1, Conv, [320, 3, 1]],
301 |    [161, 1, Conv, [640, 3, 1]],
302 |    [136, 1, Conv, [960, 3, 1]],
303 |    [112, 1, Conv, [1280, 3, 1]],
304 | 
305 |    [[257,258,259,260,261,262,263,264], 1, IAuxDetect, [nc, anchors]],   # Detect(P3, P4, P5, P6)
306 |   ]
307 | 


--------------------------------------------------------------------------------
/segment_video.py:
--------------------------------------------------------------------------------
  1 | import tensorrt as trt
  2 | import pycuda.autoinit
  3 | import pycuda.driver as cuda
  4 | import torch.backends.cudnn as cudnn
  5 | import numpy as np
  6 | import cv2
  7 | import argparse
  8 | import time
  9 | import onnxruntime as ort
 10 | import onnx
 11 | import os
 12 | import torch
 13 | import yaml
 14 | import tqdm
 15 | import glob
 16 | from PIL import Image
 17 | from skimage.io._plugins.pil_plugin import ndarray_to_pil, pil_to_ndarray
 18 | from utils.datasets import LoadStreams, LoadImages
 19 | from utils.datasets import letterbox
 20 | from torchvision import transforms
 21 | from models.experimental import attempt_load
 22 | from utils.general import non_max_suppression_mask_conf
 23 | 
 24 | 
 25 | from detectron2.modeling.poolers import ROIPooler
 26 | from detectron2.structures import Boxes
 27 | from detectron2.utils.memory import retry_if_cuda_oom
 28 | from detectron2.layers import paste_masks_in_image
 29 | 
 30 | class HostDeviceMem(object):
 31 |     def __init__(self, host_mem, device_mem):
 32 |         self.host = host_mem
 33 |         self.device = device_mem
 34 | 
 35 |     def __str__(self):
 36 |         return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
 37 | 
 38 |     def __repr__(self):
 39 |         return self.__str__()
 40 | 
 41 | class BaseEngine(object):
 42 |     def __init__(self, engine_path, imgsz=(320,320)):
 43 |         self.imgsz = imgsz
 44 |         self.mean = None
 45 |         self.std = None
 46 |         self.class_names = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 47 |          'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 48 |          'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 49 |          'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
 50 |          'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
 51 |          'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
 52 |          'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
 53 |          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
 54 |          'hair drier', 'toothbrush' ]
 55 | 
 56 |         f = args.onnx_model
 57 |         model_onnx = onnx.load(f)
 58 |         self.input_shapes = [[d.dim_value for d in _input.type.tensor_type.shape.dim] for _input in model_onnx.graph.input]
 59 | 
 60 | 
 61 |         logger = trt.Logger(trt.Logger.WARNING)
 62 |         trt.init_libnvinfer_plugins(logger,'')
 63 |         runtime = trt.Runtime(logger)
 64 |         with open(engine_path, "rb") as f:
 65 |             serialized_engine = f.read()
 66 |         engine = runtime.deserialize_cuda_engine(serialized_engine)
 67 |         self.context = engine.create_execution_context()
 68 |         self.inputs, self.outputs, self.bindings = [], [], []
 69 |         self.stream = cuda.Stream()
 70 |         for binding in engine:
 71 |             size = trt.volume(engine.get_binding_shape(binding))
 72 |             dtype = trt.nptype(engine.get_binding_dtype(binding))
 73 |             host_mem = cuda.pagelocked_empty(size, dtype)
 74 |             device_mem = cuda.mem_alloc(host_mem.nbytes)
 75 |             self.bindings.append(int(device_mem))
 76 |             if engine.binding_is_input(binding):
 77 |                 self.inputs.append(HostDeviceMem(host_mem, device_mem))
 78 |             else:
 79 |                 self.outputs.append(HostDeviceMem(host_mem, device_mem))
 80 |     def PreProcess(self, image_path):
 81 |         image = cv2.imread(image_path)
 82 |         real_image = image.copy()
 83 |         img = letterbox(image, self.imgsz, stride=64, auto=True)[0]
 84 |         if (np.shape(image) != self.input_shapes[0][2:4]): #Not the same shape as the input of the onnx model, needs to implement dynamical shape
 85 |             image = (cv2.resize(image, self.input_shapes[0][2:4]))
 86 |         img = transforms.ToTensor()(img)
 87 |         img = torch.unsqueeze(img, 0)
 88 |         return img, real_image
 89 |     def PostProcess(self,img, hyp, inf_out, attn, bases, sem_output, real_image):
 90 |         bases = torch.cat([bases, sem_output], dim=1)
 91 |         nb, _, height, width = img.shape
 92 |         pooler_scale = 0.25 #model.pooler_scale
 93 |         pooler = ROIPooler(output_size=hyp['mask_resolution'], scales=(pooler_scale,), sampling_ratio=1, pooler_type='ROIAlignV2', canonical_level=2)
 94 |         
 95 |         output, output_mask = non_max_suppression_mask_conf(inf_out, attn, bases, pooler, hyp, conf_thres=0.25, iou_thres=0.65, merge=False, mask_iou=None)
 96 | 
 97 |         pred, pred_masks = output[0], output_mask[0]
 98 |         if pred is not None : 
 99 |             bboxes = Boxes(pred[:, :4])
100 |             original_pred_masks = pred_masks.view(-1, hyp['mask_resolution'], hyp['mask_resolution'])
101 |             pred_masks = retry_if_cuda_oom(paste_masks_in_image)( original_pred_masks, bboxes, (height, width), threshold=0.5)
102 |             pred_masks_np = pred_masks.detach().cpu().numpy()
103 |             pred_conf = pred[:, 4].detach().cpu().numpy()
104 |             nimg = img[0].permute(1, 2, 0) * 255
105 |             nimg = nimg.cpu().numpy().astype(np.uint8)
106 |             nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)
107 |             pnimg = nimg.copy()
108 |             nimg[:,:] = nimg[:,:]*0
109 |             cnimg = nimg.copy()
110 |             ite = 0
111 |             for one_mask, conf in zip(pred_masks_np, pred_conf):
112 |                 cnimg[:,:] = cnimg[:,:]*0
113 |                 if conf < 0.25:
114 |                     continue
115 |                 color = [0,255,0]                           
116 |                 pnimg[one_mask] = pnimg[one_mask] * 0.5 + np.array(color, dtype=np.uint8) * 0.5
117 |                 cnimg[one_mask] = cnimg[one_mask]*0 + 255 
118 |                 nimg[one_mask] = nimg[one_mask]*0 + 255
119 |                 ite +=1
120 |         else : 
121 |             nimg = img[0].permute(1, 2, 0) * 255
122 |             nimg = nimg.cpu().numpy().astype(np.uint8)
123 |             nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)
124 |             pnimg = nimg.copy()
125 |             real_image =  real_image
126 |         return pnimg, nimg, real_image
127 | 
128 |     def infer(self, img):
129 |         img = np.ascontiguousarray(img, dtype=np.float32)
130 |         self.inputs[0].host = img
131 |         for inp in self.inputs:
132 |             cuda.memcpy_htod_async(inp.device, inp.host, self.stream)
133 |         self.context.execute_async_v2(
134 |             bindings=self.bindings,
135 |             stream_handle=self.stream.handle)
136 |         for out in self.outputs:
137 |             cuda.memcpy_dtoh_async(out.host, out.device, self.stream)
138 |         self.stream.synchronize()
139 |         data = [out.host for out in self.outputs]
140 |         return data
141 | 
142 |     def inference(self, dataset, conf=0.25):
143 |         with open('data/hyp.scratch.mask.yaml') as f:
144 |             hyp = yaml.load(f, Loader=yaml.FullLoader)
145 |         vid_writer = None
146 |         vid_path  = None
147 |         imh_path_alone = "data/horses.jpg"
148 |         img, real_image = self.PreProcess(imh_path_alone)
149 |         for _ in  range(5):
150 |             output = self.infer(img) #dry run
151 |         iteration = 0
152 |         
153 |         for path, im0s, vid_cap in dataset:
154 |             real_image = im0s.copy()
155 |             img = letterbox(im0s, self.imgsz, stride=64, auto=True)[0]
156 |             if (np.shape(image)[0:2] != self.input_shapes[0][2:4]): #Not the same shape as the input of the onnx model, needs to implement dynamical shape
157 |                 print("/!\ Shape of the input " + str(np.shape(image)[0:2]) + " different from the input size of the ONNX model "+ str(self.input_shapes[0][2:4])+", have to resize the image.")
158 |                 image = (cv2.resize(image, (self.input_shapes[0][3], self.input_shapes[0][2])))
159 |             img = transforms.ToTensor()(img)
160 |             img = torch.unsqueeze(img, 0)
161 |             
162 |             output = self.infer(img)
163 | 
164 |             for i in range(len(output)):
165 |                 output[i] = torch.tensor(output[i])
166 |             inf_out = torch.reshape((output[5]), (1, len((output[5]))//85,85))
167 |             attn = torch.reshape((output[6]), (1, (len((output[6]))//980),980))
168 |             bases = torch.reshape( (output[0]), (1, 4, ((len(output[0])//(self.imgsz[0]//4))//4), (self.imgsz[0]//4)))
169 |             sem_output = torch.reshape((output[1]), (1, 1, (len(output[1])//(self.imgsz[0]//4)), (self.imgsz[0]//4)))
170 |             
171 |             pnimg, nimg, real_image = self.PostProcess(img, hyp, inf_out, attn, bases, sem_output, real_image)
172 |             
173 |             if args.save_video:
174 |             
175 |                 if vid_path != args.save_path:  # new video
176 |                     vid_path = args.save_path
177 |                     if isinstance(vid_writer, cv2.VideoWriter):
178 |                         vid_writer.release()  # release previous video writer
179 |                     if vid_cap:  # video
180 |                         fps = vid_cap.get(cv2.CAP_PROP_FPS)
181 |                         w = int(self.imgsz[0])
182 |                         h = int(self.imgsz[1])
183 |                     vid_writer = cv2.VideoWriter(str(args.save_path), cv2.VideoWriter_fourcc(*'mp4v'), fps, (h, w))
184 |                 vid_writer.write(pnimg)
185 |             iteration += 1
186 |           
187 | def get_parser():        
188 |     parser = argparse.ArgumentParser(
189 |             description="Detectron2 demo for builtin models")
190 |     parser.add_argument(
191 |     "--input",
192 |     default="data/horses.jpg",
193 |     nargs="+",
194 |     help="A file or directory of your input data "
195 |     "If not given, will show output in an OpenCV window.",
196 |     )
197 |     parser.add_argument(
198 |     "--model",
199 |     default='./engineyolov7-mask.engine',
200 |     help="A file or directory of your model ",
201 |     )
202 |     parser.add_argument(
203 |     "--model_onnx",
204 |     default='onnx/yolov7-mask.onnx',
205 |     help="A file or directory of your model ",
206 |     )
207 |     parser.add_argument(
208 |     "--imgsz",
209 |     default=640,
210 |     type=int,
211 |     help="A file or directory of your model ",
212 |     )
213 |     parser.add_argument(
214 |     "--save_video",
215 |     action="store_true",
216 |     )
217 |     parser.add_argument(
218 |     "--save_path",
219 |     help="A file or directory of your output images ",
220 |     )
221 | 
222 |     return parser
223 | 
224 | args = get_parser().parse_args()
225 | arg_input = args.input
226 | if (args.save_path is None and args.save_video):
227 |     print("You need a result directory : mkdir results && --save_path results/name_video.mp4")
228 |     exit(0)
229 | dataset = LoadImages(arg_input[0], img_size=320, stride=64)
230 | pred = BaseEngine(engine_path=args.model, imgsz=(args.imgsz,args.imgsz))
231 | origin_img = pred.inference(dataset)
232 | 
233 | 
234 | 


--------------------------------------------------------------------------------