├── utils ├── __init__.py ├── aws │ ├── __init__.py │ ├── mime.sh │ ├── resume.py │ └── userdata.sh ├── wandb_logging │ ├── __init__.py │ └── log_dataset.py ├── __pycache__ │ ├── loss.cpython-36.pyc │ ├── plots.cpython-36.pyc │ ├── __init__.cpython-36.pyc │ ├── add_nms.cpython-36.pyc │ ├── datasets.cpython-36.pyc │ ├── general.cpython-36.pyc │ ├── metrics.cpython-36.pyc │ ├── autoanchor.cpython-36.pyc │ ├── activations.cpython-36.pyc │ ├── carbox_auto.cpython-36.pyc │ ├── google_utils.cpython-36.pyc │ └── torch_utils.cpython-36.pyc ├── google_app_engine │ ├── additional_requirements.txt │ ├── app.yaml │ └── Dockerfile ├── activations.py ├── google_utils.py ├── add_nms.py ├── autoanchor.py └── metrics.py ├── models ├── __init__.py └── __pycache__ │ ├── yolo.cpython-36.pyc │ ├── common.cpython-36.pyc │ ├── __init__.cpython-36.pyc │ └── experimental.cpython-36.pyc ├── data ├── horses.jpg ├── coco.yaml ├── hyp.scratch.p5.yaml ├── hyp.scratch.p6.yaml ├── hyp.scratch.custom.yaml ├── hyp.scratch.tiny.yaml └── hyp.scratch.mask.yaml ├── figures ├── 000000542426.jpg ├── 000000553267.jpg ├── 000000553735.jpg ├── 000000564532.jpg ├── 000000573973.jpg └── 000000575916.jpg ├── results ├── 640_trt_cv2img_VP_0.jpg ├── 640_trt_cv2img_VP_1.jpg ├── 640_trt_cv2img_VP_2.jpg ├── 640_trt_cv2img_VP_3.jpg ├── 640_trt_cv2img_VP_4.jpg └── 640_trt_cv2img_VP_5.jpg ├── __pycache__ ├── carbox.cpython-36.pyc ├── carbox_160.cpython-36.pyc └── carbox_auto.cpython-36.pyc ├── deploy └── triton-inference-server │ ├── data │ ├── dog.jpg │ └── dog_result.jpg │ ├── boundingbox.py │ ├── labels.py │ ├── processing.py │ ├── render.py │ └── README.md ├── requirements.txt ├── cfg ├── baseline │ ├── r50-csp.yaml │ ├── x50-csp.yaml │ ├── yolov3.yaml │ ├── yolov3-spp.yaml │ ├── yolor-csp.yaml │ ├── yolov4-csp.yaml │ ├── yolor-csp-x.yaml │ ├── yolor-e6.yaml │ ├── yolor-d6.yaml │ ├── yolor-p6.yaml │ └── yolor-w6.yaml ├── deploy │ ├── yolov7-tiny-silu.yaml │ ├── yolov7.yaml │ ├── yolov7-tiny.yaml │ ├── yolov7x.yaml │ ├── yolov7-w6.yaml │ ├── yolov7-e6.yaml │ ├── yolov7-d6.yaml │ └── yolov7-e6e.yaml ├── training │ ├── yolov7.yaml │ ├── yolov7-tiny.yaml │ ├── yolov7x.yaml │ ├── yolov7-w6.yaml │ ├── yolov7-e6.yaml │ ├── yolov7-d6.yaml │ └── yolov7-e6e.yaml └── yolov7-mask.yaml ├── hubconf.py ├── README.md ├── export_mask.py └── segment_video.py /utils/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /utils/aws/__init__.py: -------------------------------------------------------------------------------- 1 | #init -------------------------------------------------------------------------------- /utils/wandb_logging/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /data/horses.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/data/horses.jpg -------------------------------------------------------------------------------- /figures/000000542426.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/figures/000000542426.jpg -------------------------------------------------------------------------------- /figures/000000553267.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/figures/000000553267.jpg -------------------------------------------------------------------------------- /figures/000000553735.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/figures/000000553735.jpg -------------------------------------------------------------------------------- /figures/000000564532.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/figures/000000564532.jpg -------------------------------------------------------------------------------- /figures/000000573973.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/figures/000000573973.jpg -------------------------------------------------------------------------------- /figures/000000575916.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/figures/000000575916.jpg -------------------------------------------------------------------------------- /results/640_trt_cv2img_VP_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/results/640_trt_cv2img_VP_0.jpg -------------------------------------------------------------------------------- /results/640_trt_cv2img_VP_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/results/640_trt_cv2img_VP_1.jpg -------------------------------------------------------------------------------- /results/640_trt_cv2img_VP_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/results/640_trt_cv2img_VP_2.jpg -------------------------------------------------------------------------------- /results/640_trt_cv2img_VP_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/results/640_trt_cv2img_VP_3.jpg -------------------------------------------------------------------------------- /results/640_trt_cv2img_VP_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/results/640_trt_cv2img_VP_4.jpg -------------------------------------------------------------------------------- /results/640_trt_cv2img_VP_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/results/640_trt_cv2img_VP_5.jpg -------------------------------------------------------------------------------- /__pycache__/carbox.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/__pycache__/carbox.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/carbox_160.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/__pycache__/carbox_160.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/carbox_auto.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/__pycache__/carbox_auto.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/yolo.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/models/__pycache__/yolo.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/loss.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/loss.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/plots.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/plots.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/common.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/models/__pycache__/common.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/add_nms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/add_nms.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/datasets.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/datasets.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/general.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/general.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/metrics.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/metrics.cpython-36.pyc -------------------------------------------------------------------------------- /deploy/triton-inference-server/data/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/deploy/triton-inference-server/data/dog.jpg -------------------------------------------------------------------------------- /models/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/models/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/autoanchor.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/autoanchor.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/experimental.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/models/__pycache__/experimental.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/activations.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/activations.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/carbox_auto.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/carbox_auto.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/google_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/google_utils.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/torch_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/utils/__pycache__/torch_utils.cpython-36.pyc -------------------------------------------------------------------------------- /utils/google_app_engine/additional_requirements.txt: -------------------------------------------------------------------------------- 1 | # add these requirements in your app on top of the existing ones 2 | pip==18.1 3 | Flask==1.0.2 4 | gunicorn==19.9.0 5 | -------------------------------------------------------------------------------- /deploy/triton-inference-server/data/dog_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leandro-svg/Yolov7_Segmentation_Tensorrt/HEAD/deploy/triton-inference-server/data/dog_result.jpg -------------------------------------------------------------------------------- /utils/google_app_engine/app.yaml: -------------------------------------------------------------------------------- 1 | runtime: custom 2 | env: flex 3 | 4 | service: yolorapp 5 | 6 | liveness_check: 7 | initial_delay_sec: 600 8 | 9 | manual_scaling: 10 | instances: 1 11 | resources: 12 | cpu: 1 13 | memory_gb: 4 14 | disk_size_gb: 20 -------------------------------------------------------------------------------- /utils/aws/mime.sh: -------------------------------------------------------------------------------- 1 | # AWS EC2 instance startup 'MIME' script https://aws.amazon.com/premiumsupport/knowledge-center/execute-user-data-ec2/ 2 | # This script will run on every instance restart, not only on first start 3 | # --- DO NOT COPY ABOVE COMMENTS WHEN PASTING INTO USERDATA --- 4 | 5 | Content-Type: multipart/mixed; boundary="//" 6 | MIME-Version: 1.0 7 | 8 | --// 9 | Content-Type: text/cloud-config; charset="us-ascii" 10 | MIME-Version: 1.0 11 | Content-Transfer-Encoding: 7bit 12 | Content-Disposition: attachment; filename="cloud-config.txt" 13 | 14 | #cloud-config 15 | cloud_final_modules: 16 | - [scripts-user, always] 17 | 18 | --// 19 | Content-Type: text/x-shellscript; charset="us-ascii" 20 | MIME-Version: 1.0 21 | Content-Transfer-Encoding: 7bit 22 | Content-Disposition: attachment; filename="userdata.txt" 23 | 24 | #!/bin/bash 25 | # --- paste contents of userdata.sh here --- 26 | --// 27 | -------------------------------------------------------------------------------- /utils/wandb_logging/log_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import yaml 4 | 5 | from wandb_utils import WandbLogger 6 | 7 | WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' 8 | 9 | 10 | def create_dataset_artifact(opt): 11 | with open(opt.data) as f: 12 | data = yaml.load(f, Loader=yaml.SafeLoader) # data dict 13 | logger = WandbLogger(opt, '', None, data, job_type='Dataset Creation') 14 | 15 | 16 | if __name__ == '__main__': 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--data', type=str, default='data/coco.yaml', help='data.yaml path') 19 | parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') 20 | parser.add_argument('--project', type=str, default='YOLOR', help='name of W&B Project') 21 | opt = parser.parse_args() 22 | opt.resume = False # Explicitly disallow resume check for dataset upload job 23 | 24 | create_dataset_artifact(opt) 25 | -------------------------------------------------------------------------------- /utils/google_app_engine/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/google-appengine/python 2 | 3 | # Create a virtualenv for dependencies. This isolates these packages from 4 | # system-level packages. 5 | # Use -p python3 or -p python3.7 to select python version. Default is version 2. 6 | RUN virtualenv /env -p python3 7 | 8 | # Setting these environment variables are the same as running 9 | # source /env/bin/activate. 10 | ENV VIRTUAL_ENV /env 11 | ENV PATH /env/bin:$PATH 12 | 13 | RUN apt-get update && apt-get install -y python-opencv 14 | 15 | # Copy the application's requirements.txt and run pip to install all 16 | # dependencies into the virtualenv. 17 | ADD requirements.txt /app/requirements.txt 18 | RUN pip install -r /app/requirements.txt 19 | 20 | # Add the application source code. 21 | ADD . /app 22 | 23 | # Run a WSGI server to serve the application. gunicorn must be declared as 24 | # a dependency in requirements.txt. 25 | CMD gunicorn -b :$PORT main:app 26 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Usage: pip install -r requirements.txt 2 | 3 | # Base ---------------------------------------- 4 | matplotlib>=3.2.2 5 | numpy>=1.18.5 6 | opencv-python>=4.1.1 7 | Pillow>=7.1.2 8 | PyYAML>=5.3.1 9 | requests>=2.23.0 10 | scipy>=1.4.1 11 | torch>=1.7.0,!=1.12.0 12 | torchvision>=0.8.1,!=0.13.0 13 | tqdm>=4.41.0 14 | protobuf<4.21.3 15 | 16 | # Logging ------------------------------------- 17 | tensorboard>=2.4.1 18 | # wandb 19 | 20 | # Plotting ------------------------------------ 21 | pandas>=1.1.4 22 | seaborn>=0.11.0 23 | 24 | # Export -------------------------------------- 25 | # coremltools>=4.1 # CoreML export 26 | # onnx>=1.9.0 # ONNX export 27 | # onnx-simplifier>=0.3.6 # ONNX simplifier 28 | # scikit-learn==0.19.2 # CoreML quantization 29 | # tensorflow>=2.4.1 # TFLite export 30 | # tensorflowjs>=3.9.0 # TF.js export 31 | # openvino-dev # OpenVINO export 32 | 33 | # Extras -------------------------------------- 34 | ipython # interactive notebook 35 | psutil # system utilization 36 | thop # FLOPs computation 37 | # albumentations>=1.0.3 38 | # pycocotools>=2.0 # COCO mAP 39 | # roboflow 40 | -------------------------------------------------------------------------------- /deploy/triton-inference-server/boundingbox.py: -------------------------------------------------------------------------------- 1 | class BoundingBox: 2 | def __init__(self, classID, confidence, x1, x2, y1, y2, image_width, image_height): 3 | self.classID = classID 4 | self.confidence = confidence 5 | self.x1 = x1 6 | self.x2 = x2 7 | self.y1 = y1 8 | self.y2 = y2 9 | self.u1 = x1 / image_width 10 | self.u2 = x2 / image_width 11 | self.v1 = y1 / image_height 12 | self.v2 = y2 / image_height 13 | 14 | def box(self): 15 | return (self.x1, self.y1, self.x2, self.y2) 16 | 17 | def width(self): 18 | return self.x2 - self.x1 19 | 20 | def height(self): 21 | return self.y2 - self.y1 22 | 23 | def center_absolute(self): 24 | return (0.5 * (self.x1 + self.x2), 0.5 * (self.y1 + self.y2)) 25 | 26 | def center_normalized(self): 27 | return (0.5 * (self.u1 + self.u2), 0.5 * (self.v1 + self.v2)) 28 | 29 | def size_absolute(self): 30 | return (self.x2 - self.x1, self.y2 - self.y1) 31 | 32 | def size_normalized(self): 33 | return (self.u2 - self.u1, self.v2 - self.v1) 34 | -------------------------------------------------------------------------------- /utils/aws/resume.py: -------------------------------------------------------------------------------- 1 | # Resume all interrupted trainings in yolor/ dir including DDP trainings 2 | # Usage: $ python utils/aws/resume.py 3 | 4 | import os 5 | import sys 6 | from pathlib import Path 7 | 8 | import torch 9 | import yaml 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | 13 | port = 0 # --master_port 14 | path = Path('').resolve() 15 | for last in path.rglob('*/**/last.pt'): 16 | ckpt = torch.load(last) 17 | if ckpt['optimizer'] is None: 18 | continue 19 | 20 | # Load opt.yaml 21 | with open(last.parent.parent / 'opt.yaml') as f: 22 | opt = yaml.load(f, Loader=yaml.SafeLoader) 23 | 24 | # Get device count 25 | d = opt['device'].split(',') # devices 26 | nd = len(d) # number of devices 27 | ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1) # distributed data parallel 28 | 29 | if ddp: # multi-GPU 30 | port += 1 31 | cmd = f'python -m torch.distributed.launch --nproc_per_node {nd} --master_port {port} train.py --resume {last}' 32 | else: # single-GPU 33 | cmd = f'python train.py --resume {last}' 34 | 35 | cmd += ' > /dev/null 2>&1 &' # redirect output to dev/null and run in daemon thread 36 | print(cmd) 37 | os.system(cmd) 38 | -------------------------------------------------------------------------------- /utils/aws/userdata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # AWS EC2 instance startup script https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html 3 | # This script will run only once on first instance start (for a re-start script see mime.sh) 4 | # /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir 5 | # Use >300 GB SSD 6 | 7 | cd home/ubuntu 8 | if [ ! -d yolor ]; then 9 | echo "Running first-time script." # install dependencies, download COCO, pull Docker 10 | git clone -b paper https://github.com/WongKinYiu/yolor && sudo chmod -R 777 yolor 11 | cd yolor 12 | bash data/scripts/get_coco.sh && echo "Data done." & 13 | sudo docker pull nvcr.io/nvidia/pytorch:21.08-py3 && echo "Docker done." & 14 | python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." & 15 | wait && echo "All tasks done." # finish background tasks 16 | else 17 | echo "Running re-start script." # resume interrupted runs 18 | i=0 19 | list=$(sudo docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour' 20 | while IFS= read -r id; do 21 | ((i++)) 22 | echo "restarting container $i: $id" 23 | sudo docker start $id 24 | # sudo docker exec -it $id python train.py --resume # single-GPU 25 | sudo docker exec -d $id python utils/aws/resume.py # multi-scenario 26 | done <<<"$list" 27 | fi 28 | -------------------------------------------------------------------------------- /data/coco.yaml: -------------------------------------------------------------------------------- 1 | # COCO 2017 dataset http://cocodataset.org 2 | 3 | # download command/URL (optional) 4 | download: bash ./scripts/get_coco.sh 5 | 6 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] 7 | train: ./coco/train2017.txt # 118287 images 8 | val: ./coco/val2017.txt # 5000 images 9 | test: ./coco/test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 10 | 11 | # number of classes 12 | nc: 80 13 | 14 | # class names 15 | names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 16 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 17 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 18 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 19 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 20 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 21 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 22 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 23 | 'hair drier', 'toothbrush' ] 24 | -------------------------------------------------------------------------------- /data/hyp.scratch.p5.yaml: -------------------------------------------------------------------------------- 1 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 2 | lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf) 3 | momentum: 0.937 # SGD momentum/Adam beta1 4 | weight_decay: 0.0005 # optimizer weight decay 5e-4 5 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 6 | warmup_momentum: 0.8 # warmup initial momentum 7 | warmup_bias_lr: 0.1 # warmup initial bias lr 8 | box: 0.05 # box loss gain 9 | cls: 0.3 # cls loss gain 10 | cls_pw: 1.0 # cls BCELoss positive_weight 11 | obj: 0.7 # obj loss gain (scale with pixels) 12 | obj_pw: 1.0 # obj BCELoss positive_weight 13 | iou_t: 0.20 # IoU training threshold 14 | anchor_t: 4.0 # anchor-multiple threshold 15 | # anchors: 3 # anchors per output layer (0 to ignore) 16 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 17 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 18 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 19 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 20 | degrees: 0.0 # image rotation (+/- deg) 21 | translate: 0.2 # image translation (+/- fraction) 22 | scale: 0.9 # image scale (+/- gain) 23 | shear: 0.0 # image shear (+/- deg) 24 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 25 | flipud: 0.0 # image flip up-down (probability) 26 | fliplr: 0.5 # image flip left-right (probability) 27 | mosaic: 1.0 # image mosaic (probability) 28 | mixup: 0.15 # image mixup (probability) 29 | copy_paste: 0.0 # image copy paste (probability) 30 | paste_in: 0.15 # image copy paste (probability), use 0 for faster training 31 | loss_ota: 1 # use ComputeLossOTA, use 0 for faster training -------------------------------------------------------------------------------- /data/hyp.scratch.p6.yaml: -------------------------------------------------------------------------------- 1 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 2 | lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) 3 | momentum: 0.937 # SGD momentum/Adam beta1 4 | weight_decay: 0.0005 # optimizer weight decay 5e-4 5 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 6 | warmup_momentum: 0.8 # warmup initial momentum 7 | warmup_bias_lr: 0.1 # warmup initial bias lr 8 | box: 0.05 # box loss gain 9 | cls: 0.3 # cls loss gain 10 | cls_pw: 1.0 # cls BCELoss positive_weight 11 | obj: 0.7 # obj loss gain (scale with pixels) 12 | obj_pw: 1.0 # obj BCELoss positive_weight 13 | iou_t: 0.20 # IoU training threshold 14 | anchor_t: 4.0 # anchor-multiple threshold 15 | # anchors: 3 # anchors per output layer (0 to ignore) 16 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 17 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 18 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 19 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 20 | degrees: 0.0 # image rotation (+/- deg) 21 | translate: 0.2 # image translation (+/- fraction) 22 | scale: 0.9 # image scale (+/- gain) 23 | shear: 0.0 # image shear (+/- deg) 24 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 25 | flipud: 0.0 # image flip up-down (probability) 26 | fliplr: 0.5 # image flip left-right (probability) 27 | mosaic: 1.0 # image mosaic (probability) 28 | mixup: 0.15 # image mixup (probability) 29 | copy_paste: 0.0 # image copy paste (probability) 30 | paste_in: 0.15 # image copy paste (probability), use 0 for faster training 31 | loss_ota: 1 # use ComputeLossOTA, use 0 for faster training -------------------------------------------------------------------------------- /data/hyp.scratch.custom.yaml: -------------------------------------------------------------------------------- 1 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 2 | lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf) 3 | momentum: 0.937 # SGD momentum/Adam beta1 4 | weight_decay: 0.0005 # optimizer weight decay 5e-4 5 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 6 | warmup_momentum: 0.8 # warmup initial momentum 7 | warmup_bias_lr: 0.1 # warmup initial bias lr 8 | box: 0.05 # box loss gain 9 | cls: 0.3 # cls loss gain 10 | cls_pw: 1.0 # cls BCELoss positive_weight 11 | obj: 0.7 # obj loss gain (scale with pixels) 12 | obj_pw: 1.0 # obj BCELoss positive_weight 13 | iou_t: 0.20 # IoU training threshold 14 | anchor_t: 4.0 # anchor-multiple threshold 15 | # anchors: 3 # anchors per output layer (0 to ignore) 16 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 17 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 18 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 19 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 20 | degrees: 0.0 # image rotation (+/- deg) 21 | translate: 0.2 # image translation (+/- fraction) 22 | scale: 0.5 # image scale (+/- gain) 23 | shear: 0.0 # image shear (+/- deg) 24 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 25 | flipud: 0.0 # image flip up-down (probability) 26 | fliplr: 0.5 # image flip left-right (probability) 27 | mosaic: 1.0 # image mosaic (probability) 28 | mixup: 0.0 # image mixup (probability) 29 | copy_paste: 0.0 # image copy paste (probability) 30 | paste_in: 0.0 # image copy paste (probability), use 0 for faster training 31 | loss_ota: 1 # use ComputeLossOTA, use 0 for faster training -------------------------------------------------------------------------------- /cfg/baseline/r50-csp.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [12,16, 19,36, 40,28] # P3/8 9 | - [36,75, 76,55, 72,146] # P4/16 10 | - [142,110, 192,243, 459,401] # P5/32 11 | 12 | # CSP-ResNet backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Stem, [128]], # 0-P1/2 16 | [-1, 3, ResCSPC, [128]], 17 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8 18 | [-1, 4, ResCSPC, [256]], 19 | [-1, 1, Conv, [512, 3, 2]], # 4-P3/8 20 | [-1, 6, ResCSPC, [512]], 21 | [-1, 1, Conv, [1024, 3, 2]], # 6-P3/8 22 | [-1, 3, ResCSPC, [1024]], # 7 23 | ] 24 | 25 | # CSP-Res-PAN head 26 | head: 27 | [[-1, 1, SPPCSPC, [512]], # 8 28 | [-1, 1, Conv, [256, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [5, 1, Conv, [256, 1, 1]], # route backbone P4 31 | [[-1, -2], 1, Concat, [1]], 32 | [-1, 2, ResCSPB, [256]], # 13 33 | [-1, 1, Conv, [128, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [3, 1, Conv, [128, 1, 1]], # route backbone P3 36 | [[-1, -2], 1, Concat, [1]], 37 | [-1, 2, ResCSPB, [128]], # 18 38 | [-1, 1, Conv, [256, 3, 1]], 39 | [-2, 1, Conv, [256, 3, 2]], 40 | [[-1, 13], 1, Concat, [1]], # cat 41 | [-1, 2, ResCSPB, [256]], # 22 42 | [-1, 1, Conv, [512, 3, 1]], 43 | [-2, 1, Conv, [512, 3, 2]], 44 | [[-1, 8], 1, Concat, [1]], # cat 45 | [-1, 2, ResCSPB, [512]], # 26 46 | [-1, 1, Conv, [1024, 3, 1]], 47 | 48 | [[19,23,27], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5) 49 | ] 50 | -------------------------------------------------------------------------------- /data/hyp.scratch.tiny.yaml: -------------------------------------------------------------------------------- 1 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 2 | lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) 3 | momentum: 0.937 # SGD momentum/Adam beta1 4 | weight_decay: 0.0005 # optimizer weight decay 5e-4 5 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 6 | warmup_momentum: 0.8 # warmup initial momentum 7 | warmup_bias_lr: 0.1 # warmup initial bias lr 8 | box: 0.05 # box loss gain 9 | cls: 0.5 # cls loss gain 10 | cls_pw: 1.0 # cls BCELoss positive_weight 11 | obj: 1.0 # obj loss gain (scale with pixels) 12 | obj_pw: 1.0 # obj BCELoss positive_weight 13 | iou_t: 0.20 # IoU training threshold 14 | anchor_t: 4.0 # anchor-multiple threshold 15 | # anchors: 3 # anchors per output layer (0 to ignore) 16 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 17 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 18 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 19 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 20 | degrees: 0.0 # image rotation (+/- deg) 21 | translate: 0.1 # image translation (+/- fraction) 22 | scale: 0.5 # image scale (+/- gain) 23 | shear: 0.0 # image shear (+/- deg) 24 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 25 | flipud: 0.0 # image flip up-down (probability) 26 | fliplr: 0.5 # image flip left-right (probability) 27 | mosaic: 1.0 # image mosaic (probability) 28 | mixup: 0.05 # image mixup (probability) 29 | copy_paste: 0.0 # image copy paste (probability) 30 | paste_in: 0.05 # image copy paste (probability), use 0 for faster training 31 | loss_ota: 1 # use ComputeLossOTA, use 0 for faster training 32 | -------------------------------------------------------------------------------- /cfg/baseline/x50-csp.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [12,16, 19,36, 40,28] # P3/8 9 | - [36,75, 76,55, 72,146] # P4/16 10 | - [142,110, 192,243, 459,401] # P5/32 11 | 12 | # CSP-ResNeXt backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Stem, [128]], # 0-P1/2 16 | [-1, 3, ResXCSPC, [128]], 17 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8 18 | [-1, 4, ResXCSPC, [256]], 19 | [-1, 1, Conv, [512, 3, 2]], # 4-P3/8 20 | [-1, 6, ResXCSPC, [512]], 21 | [-1, 1, Conv, [1024, 3, 2]], # 6-P3/8 22 | [-1, 3, ResXCSPC, [1024]], # 7 23 | ] 24 | 25 | # CSP-ResX-PAN head 26 | head: 27 | [[-1, 1, SPPCSPC, [512]], # 8 28 | [-1, 1, Conv, [256, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [5, 1, Conv, [256, 1, 1]], # route backbone P4 31 | [[-1, -2], 1, Concat, [1]], 32 | [-1, 2, ResXCSPB, [256]], # 13 33 | [-1, 1, Conv, [128, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [3, 1, Conv, [128, 1, 1]], # route backbone P3 36 | [[-1, -2], 1, Concat, [1]], 37 | [-1, 2, ResXCSPB, [128]], # 18 38 | [-1, 1, Conv, [256, 3, 1]], 39 | [-2, 1, Conv, [256, 3, 2]], 40 | [[-1, 13], 1, Concat, [1]], # cat 41 | [-1, 2, ResXCSPB, [256]], # 22 42 | [-1, 1, Conv, [512, 3, 1]], 43 | [-2, 1, Conv, [512, 3, 2]], 44 | [[-1, 8], 1, Concat, [1]], # cat 45 | [-1, 2, ResXCSPB, [512]], # 26 46 | [-1, 1, Conv, [1024, 3, 1]], 47 | 48 | [[19,23,27], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5) 49 | ] 50 | -------------------------------------------------------------------------------- /data/hyp.scratch.mask.yaml: -------------------------------------------------------------------------------- 1 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 2 | lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf) 3 | momentum: 0.937 # SGD momentum/Adam beta1 4 | weight_decay: 0.0005 # optimizer weight decay 5e-4 5 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 6 | warmup_momentum: 0.8 # warmup initial momentum 7 | warmup_bias_lr: 0.1 # warmup initial bias lr 8 | box: 0.05 # box loss gain 9 | cls: 0.3 # cls loss gain 10 | cls_pw: 1.0 # cls BCELoss positive_weight 11 | obj: 0.7 # obj loss gain (scale with pixels) 12 | obj_pw: 1.0 # obj BCELoss positive_weight 13 | mask: 0.05 # mask loss gain 14 | mask_pw: 1.0 # obj BCELoss positive_weight 15 | pointrend: 0.05 # pointrend loss gain 16 | iou_t: 0.20 # IoU training threshold 17 | anchor_t: 4.0 # anchor-multiple threshold 18 | # anchors: 3 # anchors per output layer (0 to ignore) 19 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 20 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 21 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 22 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 23 | degrees: 0.0 # image rotation (+/- deg) 24 | translate: 0.1 # image translation (+/- fraction) 25 | scale: 0.5 # image scale (+/- gain) 26 | shear: 0.0 # image shear (+/- deg) 27 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 28 | flipud: 0.0 # image flip up-down (probability) 29 | fliplr: 0.5 # image flip left-right (probability) 30 | mosaic: 1.0 # image mosaic (probability) 31 | mixup: 0. # image mixup (probability) 32 | copy_paste: 0. #15 # image copy paste (probability) 33 | paste_in: 0. # image copy paste (probability) 34 | attn_resolution: 14 35 | num_base: 5 36 | mask_resolution: 56 37 | 38 | -------------------------------------------------------------------------------- /cfg/baseline/yolov3.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3 head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, Conv, [512, [1, 1]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /cfg/baseline/yolov3-spp.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3-SPP head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, SPP, [512, [5, 9, 13]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /deploy/triton-inference-server/labels.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | class COCOLabels(Enum): 4 | PERSON = 0 5 | BICYCLE = 1 6 | CAR = 2 7 | MOTORBIKE = 3 8 | AEROPLANE = 4 9 | BUS = 5 10 | TRAIN = 6 11 | TRUCK = 7 12 | BOAT = 8 13 | TRAFFIC_LIGHT = 9 14 | FIRE_HYDRANT = 10 15 | STOP_SIGN = 11 16 | PARKING_METER = 12 17 | BENCH = 13 18 | BIRD = 14 19 | CAT = 15 20 | DOG = 16 21 | HORSE = 17 22 | SHEEP = 18 23 | COW = 19 24 | ELEPHANT = 20 25 | BEAR = 21 26 | ZEBRA = 22 27 | GIRAFFE = 23 28 | BACKPACK = 24 29 | UMBRELLA = 25 30 | HANDBAG = 26 31 | TIE = 27 32 | SUITCASE = 28 33 | FRISBEE = 29 34 | SKIS = 30 35 | SNOWBOARD = 31 36 | SPORTS_BALL = 32 37 | KITE = 33 38 | BASEBALL_BAT = 34 39 | BASEBALL_GLOVE = 35 40 | SKATEBOARD = 36 41 | SURFBOARD = 37 42 | TENNIS_RACKET = 38 43 | BOTTLE = 39 44 | WINE_GLASS = 40 45 | CUP = 41 46 | FORK = 42 47 | KNIFE = 43 48 | SPOON = 44 49 | BOWL = 45 50 | BANANA = 46 51 | APPLE = 47 52 | SANDWICH = 48 53 | ORANGE = 49 54 | BROCCOLI = 50 55 | CARROT = 51 56 | HOT_DOG = 52 57 | PIZZA = 53 58 | DONUT = 54 59 | CAKE = 55 60 | CHAIR = 56 61 | SOFA = 57 62 | POTTEDPLANT = 58 63 | BED = 59 64 | DININGTABLE = 60 65 | TOILET = 61 66 | TVMONITOR = 62 67 | LAPTOP = 63 68 | MOUSE = 64 69 | REMOTE = 65 70 | KEYBOARD = 66 71 | CELL_PHONE = 67 72 | MICROWAVE = 68 73 | OVEN = 69 74 | TOASTER = 70 75 | SINK = 71 76 | REFRIGERATOR = 72 77 | BOOK = 73 78 | CLOCK = 74 79 | VASE = 75 80 | SCISSORS = 76 81 | TEDDY_BEAR = 77 82 | HAIR_DRIER = 78 83 | TOOTHBRUSH = 79 84 | -------------------------------------------------------------------------------- /cfg/baseline/yolor-csp.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [12,16, 19,36, 40,28] # P3/8 9 | - [36,75, 76,55, 72,146] # P4/16 10 | - [142,110, 192,243, 459,401] # P5/32 11 | 12 | # CSP-Darknet backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, BottleneckCSPC, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, BottleneckCSPC, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, BottleneckCSPC, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, BottleneckCSPC, [1024]], # 10 26 | ] 27 | 28 | # CSP-Dark-PAN head 29 | head: 30 | [[-1, 1, SPPCSPC, [512]], # 11 31 | [-1, 1, Conv, [256, 1, 1]], 32 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 33 | [8, 1, Conv, [256, 1, 1]], # route backbone P4 34 | [[-1, -2], 1, Concat, [1]], 35 | [-1, 2, BottleneckCSPB, [256]], # 16 36 | [-1, 1, Conv, [128, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [6, 1, Conv, [128, 1, 1]], # route backbone P3 39 | [[-1, -2], 1, Concat, [1]], 40 | [-1, 2, BottleneckCSPB, [128]], # 21 41 | [-1, 1, Conv, [256, 3, 1]], 42 | [-2, 1, Conv, [256, 3, 2]], 43 | [[-1, 16], 1, Concat, [1]], # cat 44 | [-1, 2, BottleneckCSPB, [256]], # 25 45 | [-1, 1, Conv, [512, 3, 1]], 46 | [-2, 1, Conv, [512, 3, 2]], 47 | [[-1, 11], 1, Concat, [1]], # cat 48 | [-1, 2, BottleneckCSPB, [512]], # 29 49 | [-1, 1, Conv, [1024, 3, 1]], 50 | 51 | [[22,26,30], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5) 52 | ] 53 | -------------------------------------------------------------------------------- /cfg/baseline/yolov4-csp.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [12,16, 19,36, 40,28] # P3/8 9 | - [36,75, 76,55, 72,146] # P4/16 10 | - [142,110, 192,243, 459,401] # P5/32 11 | 12 | # CSP-Darknet backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, BottleneckCSPC, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, BottleneckCSPC, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, BottleneckCSPC, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, BottleneckCSPC, [1024]], # 10 26 | ] 27 | 28 | # CSP-Dark-PAN head 29 | head: 30 | [[-1, 1, SPPCSPC, [512]], # 11 31 | [-1, 1, Conv, [256, 1, 1]], 32 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 33 | [8, 1, Conv, [256, 1, 1]], # route backbone P4 34 | [[-1, -2], 1, Concat, [1]], 35 | [-1, 2, BottleneckCSPB, [256]], # 16 36 | [-1, 1, Conv, [128, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [6, 1, Conv, [128, 1, 1]], # route backbone P3 39 | [[-1, -2], 1, Concat, [1]], 40 | [-1, 2, BottleneckCSPB, [128]], # 21 41 | [-1, 1, Conv, [256, 3, 1]], 42 | [-2, 1, Conv, [256, 3, 2]], 43 | [[-1, 16], 1, Concat, [1]], # cat 44 | [-1, 2, BottleneckCSPB, [256]], # 25 45 | [-1, 1, Conv, [512, 3, 1]], 46 | [-2, 1, Conv, [512, 3, 2]], 47 | [[-1, 11], 1, Concat, [1]], # cat 48 | [-1, 2, BottleneckCSPB, [512]], # 29 49 | [-1, 1, Conv, [1024, 3, 1]], 50 | 51 | [[22,26,30], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 52 | ] 53 | -------------------------------------------------------------------------------- /cfg/baseline/yolor-csp-x.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.33 # model depth multiple 4 | width_multiple: 1.25 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [12,16, 19,36, 40,28] # P3/8 9 | - [36,75, 76,55, 72,146] # P4/16 10 | - [142,110, 192,243, 459,401] # P5/32 11 | 12 | # CSP-Darknet backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, BottleneckCSPC, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, BottleneckCSPC, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, BottleneckCSPC, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, BottleneckCSPC, [1024]], # 10 26 | ] 27 | 28 | # CSP-Dark-PAN head 29 | head: 30 | [[-1, 1, SPPCSPC, [512]], # 11 31 | [-1, 1, Conv, [256, 1, 1]], 32 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 33 | [8, 1, Conv, [256, 1, 1]], # route backbone P4 34 | [[-1, -2], 1, Concat, [1]], 35 | [-1, 2, BottleneckCSPB, [256]], # 16 36 | [-1, 1, Conv, [128, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [6, 1, Conv, [128, 1, 1]], # route backbone P3 39 | [[-1, -2], 1, Concat, [1]], 40 | [-1, 2, BottleneckCSPB, [128]], # 21 41 | [-1, 1, Conv, [256, 3, 1]], 42 | [-2, 1, Conv, [256, 3, 2]], 43 | [[-1, 16], 1, Concat, [1]], # cat 44 | [-1, 2, BottleneckCSPB, [256]], # 25 45 | [-1, 1, Conv, [512, 3, 1]], 46 | [-2, 1, Conv, [512, 3, 2]], 47 | [[-1, 11], 1, Concat, [1]], # cat 48 | [-1, 2, BottleneckCSPB, [512]], # 29 49 | [-1, 1, Conv, [1024, 3, 1]], 50 | 51 | [[22,26,30], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5) 52 | ] 53 | -------------------------------------------------------------------------------- /cfg/baseline/yolor-e6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # expand model depth 4 | width_multiple: 1.25 # expand layer channels 5 | 6 | # anchors 7 | anchors: 8 | - [ 19,27, 44,40, 38,94 ] # P3/8 9 | - [ 96,68, 86,152, 180,137 ] # P4/16 10 | - [ 140,301, 303,264, 238,542 ] # P5/32 11 | - [ 436,615, 739,380, 925,792 ] # P6/64 12 | 13 | # CSP-Darknet backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, ReOrg, []], # 0 17 | [-1, 1, Conv, [64, 3, 1]], # 1-P1/2 18 | [-1, 1, DownC, [128]], # 2-P2/4 19 | [-1, 3, BottleneckCSPA, [128]], 20 | [-1, 1, DownC, [256]], # 4-P3/8 21 | [-1, 7, BottleneckCSPA, [256]], 22 | [-1, 1, DownC, [512]], # 6-P4/16 23 | [-1, 7, BottleneckCSPA, [512]], 24 | [-1, 1, DownC, [768]], # 8-P5/32 25 | [-1, 3, BottleneckCSPA, [768]], 26 | [-1, 1, DownC, [1024]], # 10-P6/64 27 | [-1, 3, BottleneckCSPA, [1024]], # 11 28 | ] 29 | 30 | # CSP-Dark-PAN head 31 | head: 32 | [[-1, 1, SPPCSPC, [512]], # 12 33 | [-1, 1, Conv, [384, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [-6, 1, Conv, [384, 1, 1]], # route backbone P5 36 | [[-1, -2], 1, Concat, [1]], 37 | [-1, 3, BottleneckCSPB, [384]], # 17 38 | [-1, 1, Conv, [256, 1, 1]], 39 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 40 | [-13, 1, Conv, [256, 1, 1]], # route backbone P4 41 | [[-1, -2], 1, Concat, [1]], 42 | [-1, 3, BottleneckCSPB, [256]], # 22 43 | [-1, 1, Conv, [128, 1, 1]], 44 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 45 | [-20, 1, Conv, [128, 1, 1]], # route backbone P3 46 | [[-1, -2], 1, Concat, [1]], 47 | [-1, 3, BottleneckCSPB, [128]], # 27 48 | [-1, 1, Conv, [256, 3, 1]], 49 | [-2, 1, DownC, [256]], 50 | [[-1, 22], 1, Concat, [1]], # cat 51 | [-1, 3, BottleneckCSPB, [256]], # 31 52 | [-1, 1, Conv, [512, 3, 1]], 53 | [-2, 1, DownC, [384]], 54 | [[-1, 17], 1, Concat, [1]], # cat 55 | [-1, 3, BottleneckCSPB, [384]], # 35 56 | [-1, 1, Conv, [768, 3, 1]], 57 | [-2, 1, DownC, [512]], 58 | [[-1, 12], 1, Concat, [1]], # cat 59 | [-1, 3, BottleneckCSPB, [512]], # 39 60 | [-1, 1, Conv, [1024, 3, 1]], 61 | 62 | [[28,32,36,40], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5, P6) 63 | ] -------------------------------------------------------------------------------- /cfg/baseline/yolor-d6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # expand model depth 4 | width_multiple: 1.25 # expand layer channels 5 | 6 | # anchors 7 | anchors: 8 | - [ 19,27, 44,40, 38,94 ] # P3/8 9 | - [ 96,68, 86,152, 180,137 ] # P4/16 10 | - [ 140,301, 303,264, 238,542 ] # P5/32 11 | - [ 436,615, 739,380, 925,792 ] # P6/64 12 | 13 | # CSP-Darknet backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, ReOrg, []], # 0 17 | [-1, 1, Conv, [64, 3, 1]], # 1-P1/2 18 | [-1, 1, DownC, [128]], # 2-P2/4 19 | [-1, 3, BottleneckCSPA, [128]], 20 | [-1, 1, DownC, [256]], # 4-P3/8 21 | [-1, 15, BottleneckCSPA, [256]], 22 | [-1, 1, DownC, [512]], # 6-P4/16 23 | [-1, 15, BottleneckCSPA, [512]], 24 | [-1, 1, DownC, [768]], # 8-P5/32 25 | [-1, 7, BottleneckCSPA, [768]], 26 | [-1, 1, DownC, [1024]], # 10-P6/64 27 | [-1, 7, BottleneckCSPA, [1024]], # 11 28 | ] 29 | 30 | # CSP-Dark-PAN head 31 | head: 32 | [[-1, 1, SPPCSPC, [512]], # 12 33 | [-1, 1, Conv, [384, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [-6, 1, Conv, [384, 1, 1]], # route backbone P5 36 | [[-1, -2], 1, Concat, [1]], 37 | [-1, 3, BottleneckCSPB, [384]], # 17 38 | [-1, 1, Conv, [256, 1, 1]], 39 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 40 | [-13, 1, Conv, [256, 1, 1]], # route backbone P4 41 | [[-1, -2], 1, Concat, [1]], 42 | [-1, 3, BottleneckCSPB, [256]], # 22 43 | [-1, 1, Conv, [128, 1, 1]], 44 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 45 | [-20, 1, Conv, [128, 1, 1]], # route backbone P3 46 | [[-1, -2], 1, Concat, [1]], 47 | [-1, 3, BottleneckCSPB, [128]], # 27 48 | [-1, 1, Conv, [256, 3, 1]], 49 | [-2, 1, DownC, [256]], 50 | [[-1, 22], 1, Concat, [1]], # cat 51 | [-1, 3, BottleneckCSPB, [256]], # 31 52 | [-1, 1, Conv, [512, 3, 1]], 53 | [-2, 1, DownC, [384]], 54 | [[-1, 17], 1, Concat, [1]], # cat 55 | [-1, 3, BottleneckCSPB, [384]], # 35 56 | [-1, 1, Conv, [768, 3, 1]], 57 | [-2, 1, DownC, [512]], 58 | [[-1, 12], 1, Concat, [1]], # cat 59 | [-1, 3, BottleneckCSPB, [512]], # 39 60 | [-1, 1, Conv, [1024, 3, 1]], 61 | 62 | [[28,32,36,40], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5, P6) 63 | ] -------------------------------------------------------------------------------- /cfg/baseline/yolor-p6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # expand model depth 4 | width_multiple: 1.0 # expand layer channels 5 | 6 | # anchors 7 | anchors: 8 | - [ 19,27, 44,40, 38,94 ] # P3/8 9 | - [ 96,68, 86,152, 180,137 ] # P4/16 10 | - [ 140,301, 303,264, 238,542 ] # P5/32 11 | - [ 436,615, 739,380, 925,792 ] # P6/64 12 | 13 | # CSP-Darknet backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, ReOrg, []], # 0 17 | [-1, 1, Conv, [64, 3, 1]], # 1-P1/2 18 | [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 19 | [-1, 3, BottleneckCSPA, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 4-P3/8 21 | [-1, 7, BottleneckCSPA, [256]], 22 | [-1, 1, Conv, [384, 3, 2]], # 6-P4/16 23 | [-1, 7, BottleneckCSPA, [384]], 24 | [-1, 1, Conv, [512, 3, 2]], # 8-P5/32 25 | [-1, 3, BottleneckCSPA, [512]], 26 | [-1, 1, Conv, [640, 3, 2]], # 10-P6/64 27 | [-1, 3, BottleneckCSPA, [640]], # 11 28 | ] 29 | 30 | # CSP-Dark-PAN head 31 | head: 32 | [[-1, 1, SPPCSPC, [320]], # 12 33 | [-1, 1, Conv, [256, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [-6, 1, Conv, [256, 1, 1]], # route backbone P5 36 | [[-1, -2], 1, Concat, [1]], 37 | [-1, 3, BottleneckCSPB, [256]], # 17 38 | [-1, 1, Conv, [192, 1, 1]], 39 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 40 | [-13, 1, Conv, [192, 1, 1]], # route backbone P4 41 | [[-1, -2], 1, Concat, [1]], 42 | [-1, 3, BottleneckCSPB, [192]], # 22 43 | [-1, 1, Conv, [128, 1, 1]], 44 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 45 | [-20, 1, Conv, [128, 1, 1]], # route backbone P3 46 | [[-1, -2], 1, Concat, [1]], 47 | [-1, 3, BottleneckCSPB, [128]], # 27 48 | [-1, 1, Conv, [256, 3, 1]], 49 | [-2, 1, Conv, [192, 3, 2]], 50 | [[-1, 22], 1, Concat, [1]], # cat 51 | [-1, 3, BottleneckCSPB, [192]], # 31 52 | [-1, 1, Conv, [384, 3, 1]], 53 | [-2, 1, Conv, [256, 3, 2]], 54 | [[-1, 17], 1, Concat, [1]], # cat 55 | [-1, 3, BottleneckCSPB, [256]], # 35 56 | [-1, 1, Conv, [512, 3, 1]], 57 | [-2, 1, Conv, [320, 3, 2]], 58 | [[-1, 12], 1, Concat, [1]], # cat 59 | [-1, 3, BottleneckCSPB, [320]], # 39 60 | [-1, 1, Conv, [640, 3, 1]], 61 | 62 | [[28,32,36,40], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5, P6) 63 | ] -------------------------------------------------------------------------------- /cfg/baseline/yolor-w6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # expand model depth 4 | width_multiple: 1.0 # expand layer channels 5 | 6 | # anchors 7 | anchors: 8 | - [ 19,27, 44,40, 38,94 ] # P3/8 9 | - [ 96,68, 86,152, 180,137 ] # P4/16 10 | - [ 140,301, 303,264, 238,542 ] # P5/32 11 | - [ 436,615, 739,380, 925,792 ] # P6/64 12 | 13 | # CSP-Darknet backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, ReOrg, []], # 0 17 | [-1, 1, Conv, [64, 3, 1]], # 1-P1/2 18 | [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 19 | [-1, 3, BottleneckCSPA, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 4-P3/8 21 | [-1, 7, BottleneckCSPA, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 6-P4/16 23 | [-1, 7, BottleneckCSPA, [512]], 24 | [-1, 1, Conv, [768, 3, 2]], # 8-P5/32 25 | [-1, 3, BottleneckCSPA, [768]], 26 | [-1, 1, Conv, [1024, 3, 2]], # 10-P6/64 27 | [-1, 3, BottleneckCSPA, [1024]], # 11 28 | ] 29 | 30 | # CSP-Dark-PAN head 31 | head: 32 | [[-1, 1, SPPCSPC, [512]], # 12 33 | [-1, 1, Conv, [384, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [-6, 1, Conv, [384, 1, 1]], # route backbone P5 36 | [[-1, -2], 1, Concat, [1]], 37 | [-1, 3, BottleneckCSPB, [384]], # 17 38 | [-1, 1, Conv, [256, 1, 1]], 39 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 40 | [-13, 1, Conv, [256, 1, 1]], # route backbone P4 41 | [[-1, -2], 1, Concat, [1]], 42 | [-1, 3, BottleneckCSPB, [256]], # 22 43 | [-1, 1, Conv, [128, 1, 1]], 44 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 45 | [-20, 1, Conv, [128, 1, 1]], # route backbone P3 46 | [[-1, -2], 1, Concat, [1]], 47 | [-1, 3, BottleneckCSPB, [128]], # 27 48 | [-1, 1, Conv, [256, 3, 1]], 49 | [-2, 1, Conv, [256, 3, 2]], 50 | [[-1, 22], 1, Concat, [1]], # cat 51 | [-1, 3, BottleneckCSPB, [256]], # 31 52 | [-1, 1, Conv, [512, 3, 1]], 53 | [-2, 1, Conv, [384, 3, 2]], 54 | [[-1, 17], 1, Concat, [1]], # cat 55 | [-1, 3, BottleneckCSPB, [384]], # 35 56 | [-1, 1, Conv, [768, 3, 1]], 57 | [-2, 1, Conv, [512, 3, 2]], 58 | [[-1, 12], 1, Concat, [1]], # cat 59 | [-1, 3, BottleneckCSPB, [512]], # 39 60 | [-1, 1, Conv, [1024, 3, 1]], 61 | 62 | [[28,32,36,40], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5, P6) 63 | ] -------------------------------------------------------------------------------- /deploy/triton-inference-server/processing.py: -------------------------------------------------------------------------------- 1 | from boundingbox import BoundingBox 2 | 3 | import cv2 4 | import numpy as np 5 | 6 | def preprocess(img, input_shape, letter_box=True): 7 | if letter_box: 8 | img_h, img_w, _ = img.shape 9 | new_h, new_w = input_shape[0], input_shape[1] 10 | offset_h, offset_w = 0, 0 11 | if (new_w / img_w) <= (new_h / img_h): 12 | new_h = int(img_h * new_w / img_w) 13 | offset_h = (input_shape[0] - new_h) // 2 14 | else: 15 | new_w = int(img_w * new_h / img_h) 16 | offset_w = (input_shape[1] - new_w) // 2 17 | resized = cv2.resize(img, (new_w, new_h)) 18 | img = np.full((input_shape[0], input_shape[1], 3), 127, dtype=np.uint8) 19 | img[offset_h:(offset_h + new_h), offset_w:(offset_w + new_w), :] = resized 20 | else: 21 | img = cv2.resize(img, (input_shape[1], input_shape[0])) 22 | 23 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 24 | img = img.transpose((2, 0, 1)).astype(np.float32) 25 | img /= 255.0 26 | return img 27 | 28 | def postprocess(num_dets, det_boxes, det_scores, det_classes, img_w, img_h, input_shape, letter_box=True): 29 | boxes = det_boxes[0, :num_dets[0][0]] / np.array([input_shape[0], input_shape[1], input_shape[0], input_shape[1]], dtype=np.float32) 30 | scores = det_scores[0, :num_dets[0][0]] 31 | classes = det_classes[0, :num_dets[0][0]].astype(np.int) 32 | 33 | old_h, old_w = img_h, img_w 34 | offset_h, offset_w = 0, 0 35 | if letter_box: 36 | if (img_w / input_shape[1]) >= (img_h / input_shape[0]): 37 | old_h = int(input_shape[0] * img_w / input_shape[1]) 38 | offset_h = (old_h - img_h) // 2 39 | else: 40 | old_w = int(input_shape[1] * img_h / input_shape[0]) 41 | offset_w = (old_w - img_w) // 2 42 | 43 | boxes = boxes * np.array([old_w, old_h, old_w, old_h], dtype=np.float32) 44 | if letter_box: 45 | boxes -= np.array([offset_w, offset_h, offset_w, offset_h], dtype=np.float32) 46 | boxes = boxes.astype(np.int) 47 | 48 | detected_objects = [] 49 | for box, score, label in zip(boxes, scores, classes): 50 | detected_objects.append(BoundingBox(label, score, box[0], box[2], box[1], box[3], img_w, img_h)) 51 | return detected_objects 52 | -------------------------------------------------------------------------------- /utils/activations.py: -------------------------------------------------------------------------------- 1 | # Activation functions 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | # SiLU https://arxiv.org/pdf/1606.08415.pdf ---------------------------------------------------------------------------- 9 | class SiLU(nn.Module): # export-friendly version of nn.SiLU() 10 | @staticmethod 11 | def forward(x): 12 | return x * torch.sigmoid(x) 13 | 14 | 15 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() 16 | @staticmethod 17 | def forward(x): 18 | # return x * F.hardsigmoid(x) # for torchscript and CoreML 19 | return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX 20 | 21 | 22 | class MemoryEfficientSwish(nn.Module): 23 | class F(torch.autograd.Function): 24 | @staticmethod 25 | def forward(ctx, x): 26 | ctx.save_for_backward(x) 27 | return x * torch.sigmoid(x) 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | x = ctx.saved_tensors[0] 32 | sx = torch.sigmoid(x) 33 | return grad_output * (sx * (1 + x * (1 - sx))) 34 | 35 | def forward(self, x): 36 | return self.F.apply(x) 37 | 38 | 39 | # Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- 40 | class Mish(nn.Module): 41 | @staticmethod 42 | def forward(x): 43 | return x * F.softplus(x).tanh() 44 | 45 | 46 | class MemoryEfficientMish(nn.Module): 47 | class F(torch.autograd.Function): 48 | @staticmethod 49 | def forward(ctx, x): 50 | ctx.save_for_backward(x) 51 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 52 | 53 | @staticmethod 54 | def backward(ctx, grad_output): 55 | x = ctx.saved_tensors[0] 56 | sx = torch.sigmoid(x) 57 | fx = F.softplus(x).tanh() 58 | return grad_output * (fx + x * sx * (1 - fx * fx)) 59 | 60 | def forward(self, x): 61 | return self.F.apply(x) 62 | 63 | 64 | # FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- 65 | class FReLU(nn.Module): 66 | def __init__(self, c1, k=3): # ch_in, kernel 67 | super().__init__() 68 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 69 | self.bn = nn.BatchNorm2d(c1) 70 | 71 | def forward(self, x): 72 | return torch.max(x, self.bn(self.conv(x))) 73 | -------------------------------------------------------------------------------- /cfg/deploy/yolov7-tiny-silu.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv7-tiny backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 2]], # 0-P1/2 16 | 17 | [-1, 1, Conv, [64, 3, 2]], # 1-P2/4 18 | 19 | [-1, 1, Conv, [32, 1, 1]], 20 | [-2, 1, Conv, [32, 1, 1]], 21 | [-1, 1, Conv, [32, 3, 1]], 22 | [-1, 1, Conv, [32, 3, 1]], 23 | [[-1, -2, -3, -4], 1, Concat, [1]], 24 | [-1, 1, Conv, [64, 1, 1]], # 7 25 | 26 | [-1, 1, MP, []], # 8-P3/8 27 | [-1, 1, Conv, [64, 1, 1]], 28 | [-2, 1, Conv, [64, 1, 1]], 29 | [-1, 1, Conv, [64, 3, 1]], 30 | [-1, 1, Conv, [64, 3, 1]], 31 | [[-1, -2, -3, -4], 1, Concat, [1]], 32 | [-1, 1, Conv, [128, 1, 1]], # 14 33 | 34 | [-1, 1, MP, []], # 15-P4/16 35 | [-1, 1, Conv, [128, 1, 1]], 36 | [-2, 1, Conv, [128, 1, 1]], 37 | [-1, 1, Conv, [128, 3, 1]], 38 | [-1, 1, Conv, [128, 3, 1]], 39 | [[-1, -2, -3, -4], 1, Concat, [1]], 40 | [-1, 1, Conv, [256, 1, 1]], # 21 41 | 42 | [-1, 1, MP, []], # 22-P5/32 43 | [-1, 1, Conv, [256, 1, 1]], 44 | [-2, 1, Conv, [256, 1, 1]], 45 | [-1, 1, Conv, [256, 3, 1]], 46 | [-1, 1, Conv, [256, 3, 1]], 47 | [[-1, -2, -3, -4], 1, Concat, [1]], 48 | [-1, 1, Conv, [512, 1, 1]], # 28 49 | ] 50 | 51 | # YOLOv7-tiny head 52 | head: 53 | [[-1, 1, Conv, [256, 1, 1]], 54 | [-2, 1, Conv, [256, 1, 1]], 55 | [-1, 1, SP, [5]], 56 | [-2, 1, SP, [9]], 57 | [-3, 1, SP, [13]], 58 | [[-1, -2, -3, -4], 1, Concat, [1]], 59 | [-1, 1, Conv, [256, 1, 1]], 60 | [[-1, -7], 1, Concat, [1]], 61 | [-1, 1, Conv, [256, 1, 1]], # 37 62 | 63 | [-1, 1, Conv, [128, 1, 1]], 64 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 65 | [21, 1, Conv, [128, 1, 1]], # route backbone P4 66 | [[-1, -2], 1, Concat, [1]], 67 | 68 | [-1, 1, Conv, [64, 1, 1]], 69 | [-2, 1, Conv, [64, 1, 1]], 70 | [-1, 1, Conv, [64, 3, 1]], 71 | [-1, 1, Conv, [64, 3, 1]], 72 | [[-1, -2, -3, -4], 1, Concat, [1]], 73 | [-1, 1, Conv, [128, 1, 1]], # 47 74 | 75 | [-1, 1, Conv, [64, 1, 1]], 76 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 77 | [14, 1, Conv, [64, 1, 1]], # route backbone P3 78 | [[-1, -2], 1, Concat, [1]], 79 | 80 | [-1, 1, Conv, [32, 1, 1]], 81 | [-2, 1, Conv, [32, 1, 1]], 82 | [-1, 1, Conv, [32, 3, 1]], 83 | [-1, 1, Conv, [32, 3, 1]], 84 | [[-1, -2, -3, -4], 1, Concat, [1]], 85 | [-1, 1, Conv, [64, 1, 1]], # 57 86 | 87 | [-1, 1, Conv, [128, 3, 2]], 88 | [[-1, 47], 1, Concat, [1]], 89 | 90 | [-1, 1, Conv, [64, 1, 1]], 91 | [-2, 1, Conv, [64, 1, 1]], 92 | [-1, 1, Conv, [64, 3, 1]], 93 | [-1, 1, Conv, [64, 3, 1]], 94 | [[-1, -2, -3, -4], 1, Concat, [1]], 95 | [-1, 1, Conv, [128, 1, 1]], # 65 96 | 97 | [-1, 1, Conv, [256, 3, 2]], 98 | [[-1, 37], 1, Concat, [1]], 99 | 100 | [-1, 1, Conv, [128, 1, 1]], 101 | [-2, 1, Conv, [128, 1, 1]], 102 | [-1, 1, Conv, [128, 3, 1]], 103 | [-1, 1, Conv, [128, 3, 1]], 104 | [[-1, -2, -3, -4], 1, Concat, [1]], 105 | [-1, 1, Conv, [256, 1, 1]], # 73 106 | 107 | [57, 1, Conv, [128, 3, 1]], 108 | [65, 1, Conv, [256, 3, 1]], 109 | [73, 1, Conv, [512, 3, 1]], 110 | 111 | [[74,75,76], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 112 | ] 113 | -------------------------------------------------------------------------------- /deploy/triton-inference-server/render.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import cv2 4 | 5 | from math import sqrt 6 | 7 | _LINE_THICKNESS_SCALING = 500.0 8 | 9 | np.random.seed(0) 10 | RAND_COLORS = np.random.randint(50, 255, (64, 3), "int") # used for class visu 11 | RAND_COLORS[0] = [220, 220, 220] 12 | 13 | def render_box(img, box, color=(200, 200, 200)): 14 | """ 15 | Render a box. Calculates scaling and thickness automatically. 16 | :param img: image to render into 17 | :param box: (x1, y1, x2, y2) - box coordinates 18 | :param color: (b, g, r) - box color 19 | :return: updated image 20 | """ 21 | x1, y1, x2, y2 = box 22 | thickness = int( 23 | round( 24 | (img.shape[0] * img.shape[1]) 25 | / (_LINE_THICKNESS_SCALING * _LINE_THICKNESS_SCALING) 26 | ) 27 | ) 28 | thickness = max(1, thickness) 29 | img = cv2.rectangle( 30 | img, 31 | (int(x1), int(y1)), 32 | (int(x2), int(y2)), 33 | color, 34 | thickness=thickness 35 | ) 36 | return img 37 | 38 | def render_filled_box(img, box, color=(200, 200, 200)): 39 | """ 40 | Render a box. Calculates scaling and thickness automatically. 41 | :param img: image to render into 42 | :param box: (x1, y1, x2, y2) - box coordinates 43 | :param color: (b, g, r) - box color 44 | :return: updated image 45 | """ 46 | x1, y1, x2, y2 = box 47 | img = cv2.rectangle( 48 | img, 49 | (int(x1), int(y1)), 50 | (int(x2), int(y2)), 51 | color, 52 | thickness=cv2.FILLED 53 | ) 54 | return img 55 | 56 | _TEXT_THICKNESS_SCALING = 700.0 57 | _TEXT_SCALING = 520.0 58 | 59 | 60 | def get_text_size(img, text, normalised_scaling=1.0): 61 | """ 62 | Get calculated text size (as box width and height) 63 | :param img: image reference, used to determine appropriate text scaling 64 | :param text: text to display 65 | :param normalised_scaling: additional normalised scaling. Default 1.0. 66 | :return: (width, height) - width and height of text box 67 | """ 68 | thickness = int( 69 | round( 70 | (img.shape[0] * img.shape[1]) 71 | / (_TEXT_THICKNESS_SCALING * _TEXT_THICKNESS_SCALING) 72 | ) 73 | * normalised_scaling 74 | ) 75 | thickness = max(1, thickness) 76 | scaling = img.shape[0] / _TEXT_SCALING * normalised_scaling 77 | return cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, scaling, thickness)[0] 78 | 79 | 80 | def render_text(img, text, pos, color=(200, 200, 200), normalised_scaling=1.0): 81 | """ 82 | Render a text into the image. Calculates scaling and thickness automatically. 83 | :param img: image to render into 84 | :param text: text to display 85 | :param pos: (x, y) - upper left coordinates of render position 86 | :param color: (b, g, r) - text color 87 | :param normalised_scaling: additional normalised scaling. Default 1.0. 88 | :return: updated image 89 | """ 90 | x, y = pos 91 | thickness = int( 92 | round( 93 | (img.shape[0] * img.shape[1]) 94 | / (_TEXT_THICKNESS_SCALING * _TEXT_THICKNESS_SCALING) 95 | ) 96 | * normalised_scaling 97 | ) 98 | thickness = max(1, thickness) 99 | scaling = img.shape[0] / _TEXT_SCALING * normalised_scaling 100 | size = get_text_size(img, text, normalised_scaling) 101 | cv2.putText( 102 | img, 103 | text, 104 | (int(x), int(y + size[1])), 105 | cv2.FONT_HERSHEY_SIMPLEX, 106 | scaling, 107 | color, 108 | thickness=thickness, 109 | ) 110 | return img 111 | -------------------------------------------------------------------------------- /hubconf.py: -------------------------------------------------------------------------------- 1 | """PyTorch Hub models 2 | 3 | Usage: 4 | import torch 5 | model = torch.hub.load('repo', 'model') 6 | """ 7 | 8 | from pathlib import Path 9 | 10 | import torch 11 | 12 | from models.yolo import Model 13 | from utils.general import check_requirements, set_logging 14 | from utils.google_utils import attempt_download 15 | from utils.torch_utils import select_device 16 | 17 | dependencies = ['torch', 'yaml'] 18 | check_requirements(Path(__file__).parent / 'requirements.txt', exclude=('pycocotools', 'thop')) 19 | set_logging() 20 | 21 | 22 | def create(name, pretrained, channels, classes, autoshape): 23 | """Creates a specified model 24 | 25 | Arguments: 26 | name (str): name of model, i.e. 'yolov7' 27 | pretrained (bool): load pretrained weights into the model 28 | channels (int): number of input channels 29 | classes (int): number of model classes 30 | 31 | Returns: 32 | pytorch model 33 | """ 34 | try: 35 | cfg = list((Path(__file__).parent / 'cfg').rglob(f'{name}.yaml'))[0] # model.yaml path 36 | model = Model(cfg, channels, classes) 37 | if pretrained: 38 | fname = f'{name}.pt' # checkpoint filename 39 | attempt_download(fname) # download if not found locally 40 | ckpt = torch.load(fname, map_location=torch.device('cpu')) # load 41 | msd = model.state_dict() # model state_dict 42 | csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 43 | csd = {k: v for k, v in csd.items() if msd[k].shape == v.shape} # filter 44 | model.load_state_dict(csd, strict=False) # load 45 | if len(ckpt['model'].names) == classes: 46 | model.names = ckpt['model'].names # set class names attribute 47 | if autoshape: 48 | model = model.autoshape() # for file/URI/PIL/cv2/np inputs and NMS 49 | device = select_device('0' if torch.cuda.is_available() else 'cpu') # default to GPU if available 50 | return model.to(device) 51 | 52 | except Exception as e: 53 | s = 'Cache maybe be out of date, try force_reload=True.' 54 | raise Exception(s) from e 55 | 56 | 57 | def custom(path_or_model='path/to/model.pt', autoshape=True): 58 | """custom mode 59 | 60 | Arguments (3 options): 61 | path_or_model (str): 'path/to/model.pt' 62 | path_or_model (dict): torch.load('path/to/model.pt') 63 | path_or_model (nn.Module): torch.load('path/to/model.pt')['model'] 64 | 65 | Returns: 66 | pytorch model 67 | """ 68 | model = torch.load(path_or_model, map_location=torch.device('cpu')) if isinstance(path_or_model, str) else path_or_model # load checkpoint 69 | if isinstance(model, dict): 70 | model = model['ema' if model.get('ema') else 'model'] # load model 71 | 72 | hub_model = Model(model.yaml).to(next(model.parameters()).device) # create 73 | hub_model.load_state_dict(model.float().state_dict()) # load state_dict 74 | hub_model.names = model.names # class names 75 | if autoshape: 76 | hub_model = hub_model.autoshape() # for file/URI/PIL/cv2/np inputs and NMS 77 | device = select_device('0' if torch.cuda.is_available() else 'cpu') # default to GPU if available 78 | return hub_model.to(device) 79 | 80 | 81 | def yolov7(pretrained=True, channels=3, classes=80, autoshape=True): 82 | return create('yolov7', pretrained, channels, classes, autoshape) 83 | 84 | 85 | if __name__ == '__main__': 86 | model = custom(path_or_model='yolov7.pt') # custom example 87 | # model = create(name='yolov7', pretrained=True, channels=3, classes=80, autoshape=True) # pretrained example 88 | 89 | # Verify inference 90 | import numpy as np 91 | from PIL import Image 92 | 93 | imgs = [np.zeros((640, 480, 3))] 94 | 95 | results = model(imgs) # batched inference 96 | results.print() 97 | results.save() 98 | -------------------------------------------------------------------------------- /cfg/deploy/yolov7.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [12,16, 19,36, 40,28] # P3/8 9 | - [36,75, 76,55, 72,146] # P4/16 10 | - [142,110, 192,243, 459,401] # P5/32 11 | 12 | # yolov7 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | 17 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 18 | [-1, 1, Conv, [64, 3, 1]], 19 | 20 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 21 | [-1, 1, Conv, [64, 1, 1]], 22 | [-2, 1, Conv, [64, 1, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [-1, 1, Conv, [64, 3, 1]], 25 | [-1, 1, Conv, [64, 3, 1]], 26 | [-1, 1, Conv, [64, 3, 1]], 27 | [[-1, -3, -5, -6], 1, Concat, [1]], 28 | [-1, 1, Conv, [256, 1, 1]], # 11 29 | 30 | [-1, 1, MP, []], 31 | [-1, 1, Conv, [128, 1, 1]], 32 | [-3, 1, Conv, [128, 1, 1]], 33 | [-1, 1, Conv, [128, 3, 2]], 34 | [[-1, -3], 1, Concat, [1]], # 16-P3/8 35 | [-1, 1, Conv, [128, 1, 1]], 36 | [-2, 1, Conv, [128, 1, 1]], 37 | [-1, 1, Conv, [128, 3, 1]], 38 | [-1, 1, Conv, [128, 3, 1]], 39 | [-1, 1, Conv, [128, 3, 1]], 40 | [-1, 1, Conv, [128, 3, 1]], 41 | [[-1, -3, -5, -6], 1, Concat, [1]], 42 | [-1, 1, Conv, [512, 1, 1]], # 24 43 | 44 | [-1, 1, MP, []], 45 | [-1, 1, Conv, [256, 1, 1]], 46 | [-3, 1, Conv, [256, 1, 1]], 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, -3], 1, Concat, [1]], # 29-P4/16 49 | [-1, 1, Conv, [256, 1, 1]], 50 | [-2, 1, Conv, [256, 1, 1]], 51 | [-1, 1, Conv, [256, 3, 1]], 52 | [-1, 1, Conv, [256, 3, 1]], 53 | [-1, 1, Conv, [256, 3, 1]], 54 | [-1, 1, Conv, [256, 3, 1]], 55 | [[-1, -3, -5, -6], 1, Concat, [1]], 56 | [-1, 1, Conv, [1024, 1, 1]], # 37 57 | 58 | [-1, 1, MP, []], 59 | [-1, 1, Conv, [512, 1, 1]], 60 | [-3, 1, Conv, [512, 1, 1]], 61 | [-1, 1, Conv, [512, 3, 2]], 62 | [[-1, -3], 1, Concat, [1]], # 42-P5/32 63 | [-1, 1, Conv, [256, 1, 1]], 64 | [-2, 1, Conv, [256, 1, 1]], 65 | [-1, 1, Conv, [256, 3, 1]], 66 | [-1, 1, Conv, [256, 3, 1]], 67 | [-1, 1, Conv, [256, 3, 1]], 68 | [-1, 1, Conv, [256, 3, 1]], 69 | [[-1, -3, -5, -6], 1, Concat, [1]], 70 | [-1, 1, Conv, [1024, 1, 1]], # 50 71 | ] 72 | 73 | # yolov7 head 74 | head: 75 | [[-1, 1, SPPCSPC, [512]], # 51 76 | 77 | [-1, 1, Conv, [256, 1, 1]], 78 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 79 | [37, 1, Conv, [256, 1, 1]], # route backbone P4 80 | [[-1, -2], 1, Concat, [1]], 81 | 82 | [-1, 1, Conv, [256, 1, 1]], 83 | [-2, 1, Conv, [256, 1, 1]], 84 | [-1, 1, Conv, [128, 3, 1]], 85 | [-1, 1, Conv, [128, 3, 1]], 86 | [-1, 1, Conv, [128, 3, 1]], 87 | [-1, 1, Conv, [128, 3, 1]], 88 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 89 | [-1, 1, Conv, [256, 1, 1]], # 63 90 | 91 | [-1, 1, Conv, [128, 1, 1]], 92 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 93 | [24, 1, Conv, [128, 1, 1]], # route backbone P3 94 | [[-1, -2], 1, Concat, [1]], 95 | 96 | [-1, 1, Conv, [128, 1, 1]], 97 | [-2, 1, Conv, [128, 1, 1]], 98 | [-1, 1, Conv, [64, 3, 1]], 99 | [-1, 1, Conv, [64, 3, 1]], 100 | [-1, 1, Conv, [64, 3, 1]], 101 | [-1, 1, Conv, [64, 3, 1]], 102 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 103 | [-1, 1, Conv, [128, 1, 1]], # 75 104 | 105 | [-1, 1, MP, []], 106 | [-1, 1, Conv, [128, 1, 1]], 107 | [-3, 1, Conv, [128, 1, 1]], 108 | [-1, 1, Conv, [128, 3, 2]], 109 | [[-1, -3, 63], 1, Concat, [1]], 110 | 111 | [-1, 1, Conv, [256, 1, 1]], 112 | [-2, 1, Conv, [256, 1, 1]], 113 | [-1, 1, Conv, [128, 3, 1]], 114 | [-1, 1, Conv, [128, 3, 1]], 115 | [-1, 1, Conv, [128, 3, 1]], 116 | [-1, 1, Conv, [128, 3, 1]], 117 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 118 | [-1, 1, Conv, [256, 1, 1]], # 88 119 | 120 | [-1, 1, MP, []], 121 | [-1, 1, Conv, [256, 1, 1]], 122 | [-3, 1, Conv, [256, 1, 1]], 123 | [-1, 1, Conv, [256, 3, 2]], 124 | [[-1, -3, 51], 1, Concat, [1]], 125 | 126 | [-1, 1, Conv, [512, 1, 1]], 127 | [-2, 1, Conv, [512, 1, 1]], 128 | [-1, 1, Conv, [256, 3, 1]], 129 | [-1, 1, Conv, [256, 3, 1]], 130 | [-1, 1, Conv, [256, 3, 1]], 131 | [-1, 1, Conv, [256, 3, 1]], 132 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 133 | [-1, 1, Conv, [512, 1, 1]], # 101 134 | 135 | [75, 1, RepConv, [256, 3, 1]], 136 | [88, 1, RepConv, [512, 3, 1]], 137 | [101, 1, RepConv, [1024, 3, 1]], 138 | 139 | [[102,103,104], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 140 | ] 141 | -------------------------------------------------------------------------------- /cfg/training/yolov7.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [12,16, 19,36, 40,28] # P3/8 9 | - [36,75, 76,55, 72,146] # P4/16 10 | - [142,110, 192,243, 459,401] # P5/32 11 | 12 | # yolov7 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | 17 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 18 | [-1, 1, Conv, [64, 3, 1]], 19 | 20 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 21 | [-1, 1, Conv, [64, 1, 1]], 22 | [-2, 1, Conv, [64, 1, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [-1, 1, Conv, [64, 3, 1]], 25 | [-1, 1, Conv, [64, 3, 1]], 26 | [-1, 1, Conv, [64, 3, 1]], 27 | [[-1, -3, -5, -6], 1, Concat, [1]], 28 | [-1, 1, Conv, [256, 1, 1]], # 11 29 | 30 | [-1, 1, MP, []], 31 | [-1, 1, Conv, [128, 1, 1]], 32 | [-3, 1, Conv, [128, 1, 1]], 33 | [-1, 1, Conv, [128, 3, 2]], 34 | [[-1, -3], 1, Concat, [1]], # 16-P3/8 35 | [-1, 1, Conv, [128, 1, 1]], 36 | [-2, 1, Conv, [128, 1, 1]], 37 | [-1, 1, Conv, [128, 3, 1]], 38 | [-1, 1, Conv, [128, 3, 1]], 39 | [-1, 1, Conv, [128, 3, 1]], 40 | [-1, 1, Conv, [128, 3, 1]], 41 | [[-1, -3, -5, -6], 1, Concat, [1]], 42 | [-1, 1, Conv, [512, 1, 1]], # 24 43 | 44 | [-1, 1, MP, []], 45 | [-1, 1, Conv, [256, 1, 1]], 46 | [-3, 1, Conv, [256, 1, 1]], 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, -3], 1, Concat, [1]], # 29-P4/16 49 | [-1, 1, Conv, [256, 1, 1]], 50 | [-2, 1, Conv, [256, 1, 1]], 51 | [-1, 1, Conv, [256, 3, 1]], 52 | [-1, 1, Conv, [256, 3, 1]], 53 | [-1, 1, Conv, [256, 3, 1]], 54 | [-1, 1, Conv, [256, 3, 1]], 55 | [[-1, -3, -5, -6], 1, Concat, [1]], 56 | [-1, 1, Conv, [1024, 1, 1]], # 37 57 | 58 | [-1, 1, MP, []], 59 | [-1, 1, Conv, [512, 1, 1]], 60 | [-3, 1, Conv, [512, 1, 1]], 61 | [-1, 1, Conv, [512, 3, 2]], 62 | [[-1, -3], 1, Concat, [1]], # 42-P5/32 63 | [-1, 1, Conv, [256, 1, 1]], 64 | [-2, 1, Conv, [256, 1, 1]], 65 | [-1, 1, Conv, [256, 3, 1]], 66 | [-1, 1, Conv, [256, 3, 1]], 67 | [-1, 1, Conv, [256, 3, 1]], 68 | [-1, 1, Conv, [256, 3, 1]], 69 | [[-1, -3, -5, -6], 1, Concat, [1]], 70 | [-1, 1, Conv, [1024, 1, 1]], # 50 71 | ] 72 | 73 | # yolov7 head 74 | head: 75 | [[-1, 1, SPPCSPC, [512]], # 51 76 | 77 | [-1, 1, Conv, [256, 1, 1]], 78 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 79 | [37, 1, Conv, [256, 1, 1]], # route backbone P4 80 | [[-1, -2], 1, Concat, [1]], 81 | 82 | [-1, 1, Conv, [256, 1, 1]], 83 | [-2, 1, Conv, [256, 1, 1]], 84 | [-1, 1, Conv, [128, 3, 1]], 85 | [-1, 1, Conv, [128, 3, 1]], 86 | [-1, 1, Conv, [128, 3, 1]], 87 | [-1, 1, Conv, [128, 3, 1]], 88 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 89 | [-1, 1, Conv, [256, 1, 1]], # 63 90 | 91 | [-1, 1, Conv, [128, 1, 1]], 92 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 93 | [24, 1, Conv, [128, 1, 1]], # route backbone P3 94 | [[-1, -2], 1, Concat, [1]], 95 | 96 | [-1, 1, Conv, [128, 1, 1]], 97 | [-2, 1, Conv, [128, 1, 1]], 98 | [-1, 1, Conv, [64, 3, 1]], 99 | [-1, 1, Conv, [64, 3, 1]], 100 | [-1, 1, Conv, [64, 3, 1]], 101 | [-1, 1, Conv, [64, 3, 1]], 102 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 103 | [-1, 1, Conv, [128, 1, 1]], # 75 104 | 105 | [-1, 1, MP, []], 106 | [-1, 1, Conv, [128, 1, 1]], 107 | [-3, 1, Conv, [128, 1, 1]], 108 | [-1, 1, Conv, [128, 3, 2]], 109 | [[-1, -3, 63], 1, Concat, [1]], 110 | 111 | [-1, 1, Conv, [256, 1, 1]], 112 | [-2, 1, Conv, [256, 1, 1]], 113 | [-1, 1, Conv, [128, 3, 1]], 114 | [-1, 1, Conv, [128, 3, 1]], 115 | [-1, 1, Conv, [128, 3, 1]], 116 | [-1, 1, Conv, [128, 3, 1]], 117 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 118 | [-1, 1, Conv, [256, 1, 1]], # 88 119 | 120 | [-1, 1, MP, []], 121 | [-1, 1, Conv, [256, 1, 1]], 122 | [-3, 1, Conv, [256, 1, 1]], 123 | [-1, 1, Conv, [256, 3, 2]], 124 | [[-1, -3, 51], 1, Concat, [1]], 125 | 126 | [-1, 1, Conv, [512, 1, 1]], 127 | [-2, 1, Conv, [512, 1, 1]], 128 | [-1, 1, Conv, [256, 3, 1]], 129 | [-1, 1, Conv, [256, 3, 1]], 130 | [-1, 1, Conv, [256, 3, 1]], 131 | [-1, 1, Conv, [256, 3, 1]], 132 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 133 | [-1, 1, Conv, [512, 1, 1]], # 101 134 | 135 | [75, 1, RepConv, [256, 3, 1]], 136 | [88, 1, RepConv, [512, 3, 1]], 137 | [101, 1, RepConv, [1024, 3, 1]], 138 | 139 | [[102,103,104], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5) 140 | ] 141 | -------------------------------------------------------------------------------- /cfg/deploy/yolov7-tiny.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # yolov7-tiny backbone 13 | backbone: 14 | # [from, number, module, args] c2, k=1, s=1, p=None, g=1, act=True 15 | [[-1, 1, Conv, [32, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 0-P1/2 16 | 17 | [-1, 1, Conv, [64, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 1-P2/4 18 | 19 | [-1, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 20 | [-2, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 21 | [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 22 | [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 23 | [[-1, -2, -3, -4], 1, Concat, [1]], 24 | [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 7 25 | 26 | [-1, 1, MP, []], # 8-P3/8 27 | [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 28 | [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 29 | [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 30 | [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 31 | [[-1, -2, -3, -4], 1, Concat, [1]], 32 | [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 14 33 | 34 | [-1, 1, MP, []], # 15-P4/16 35 | [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 36 | [-2, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 37 | [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 38 | [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 39 | [[-1, -2, -3, -4], 1, Concat, [1]], 40 | [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 21 41 | 42 | [-1, 1, MP, []], # 22-P5/32 43 | [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 44 | [-2, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 45 | [-1, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 46 | [-1, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 47 | [[-1, -2, -3, -4], 1, Concat, [1]], 48 | [-1, 1, Conv, [512, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 28 49 | ] 50 | 51 | # yolov7-tiny head 52 | head: 53 | [[-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 54 | [-2, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 55 | [-1, 1, SP, [5]], 56 | [-2, 1, SP, [9]], 57 | [-3, 1, SP, [13]], 58 | [[-1, -2, -3, -4], 1, Concat, [1]], 59 | [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 60 | [[-1, -7], 1, Concat, [1]], 61 | [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 37 62 | 63 | [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 64 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 65 | [21, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P4 66 | [[-1, -2], 1, Concat, [1]], 67 | 68 | [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 69 | [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 70 | [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 71 | [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 72 | [[-1, -2, -3, -4], 1, Concat, [1]], 73 | [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 47 74 | 75 | [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 76 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 77 | [14, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P3 78 | [[-1, -2], 1, Concat, [1]], 79 | 80 | [-1, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 81 | [-2, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 82 | [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 83 | [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 84 | [[-1, -2, -3, -4], 1, Concat, [1]], 85 | [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 57 86 | 87 | [-1, 1, Conv, [128, 3, 2, None, 1, nn.LeakyReLU(0.1)]], 88 | [[-1, 47], 1, Concat, [1]], 89 | 90 | [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 91 | [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 92 | [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 93 | [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 94 | [[-1, -2, -3, -4], 1, Concat, [1]], 95 | [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 65 96 | 97 | [-1, 1, Conv, [256, 3, 2, None, 1, nn.LeakyReLU(0.1)]], 98 | [[-1, 37], 1, Concat, [1]], 99 | 100 | [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 101 | [-2, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 102 | [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 103 | [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 104 | [[-1, -2, -3, -4], 1, Concat, [1]], 105 | [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 73 106 | 107 | [57, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 108 | [65, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 109 | [73, 1, Conv, [512, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 110 | 111 | [[74,75,76], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 112 | ] 113 | -------------------------------------------------------------------------------- /cfg/training/yolov7-tiny.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # yolov7-tiny backbone 13 | backbone: 14 | # [from, number, module, args] c2, k=1, s=1, p=None, g=1, act=True 15 | [[-1, 1, Conv, [32, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 0-P1/2 16 | 17 | [-1, 1, Conv, [64, 3, 2, None, 1, nn.LeakyReLU(0.1)]], # 1-P2/4 18 | 19 | [-1, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 20 | [-2, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 21 | [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 22 | [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 23 | [[-1, -2, -3, -4], 1, Concat, [1]], 24 | [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 7 25 | 26 | [-1, 1, MP, []], # 8-P3/8 27 | [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 28 | [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 29 | [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 30 | [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 31 | [[-1, -2, -3, -4], 1, Concat, [1]], 32 | [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 14 33 | 34 | [-1, 1, MP, []], # 15-P4/16 35 | [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 36 | [-2, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 37 | [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 38 | [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 39 | [[-1, -2, -3, -4], 1, Concat, [1]], 40 | [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 21 41 | 42 | [-1, 1, MP, []], # 22-P5/32 43 | [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 44 | [-2, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 45 | [-1, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 46 | [-1, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 47 | [[-1, -2, -3, -4], 1, Concat, [1]], 48 | [-1, 1, Conv, [512, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 28 49 | ] 50 | 51 | # yolov7-tiny head 52 | head: 53 | [[-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 54 | [-2, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 55 | [-1, 1, SP, [5]], 56 | [-2, 1, SP, [9]], 57 | [-3, 1, SP, [13]], 58 | [[-1, -2, -3, -4], 1, Concat, [1]], 59 | [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 60 | [[-1, -7], 1, Concat, [1]], 61 | [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 37 62 | 63 | [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 64 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 65 | [21, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P4 66 | [[-1, -2], 1, Concat, [1]], 67 | 68 | [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 69 | [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 70 | [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 71 | [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 72 | [[-1, -2, -3, -4], 1, Concat, [1]], 73 | [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 47 74 | 75 | [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 76 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 77 | [14, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # route backbone P3 78 | [[-1, -2], 1, Concat, [1]], 79 | 80 | [-1, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 81 | [-2, 1, Conv, [32, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 82 | [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 83 | [-1, 1, Conv, [32, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 84 | [[-1, -2, -3, -4], 1, Concat, [1]], 85 | [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 57 86 | 87 | [-1, 1, Conv, [128, 3, 2, None, 1, nn.LeakyReLU(0.1)]], 88 | [[-1, 47], 1, Concat, [1]], 89 | 90 | [-1, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 91 | [-2, 1, Conv, [64, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 92 | [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 93 | [-1, 1, Conv, [64, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 94 | [[-1, -2, -3, -4], 1, Concat, [1]], 95 | [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 65 96 | 97 | [-1, 1, Conv, [256, 3, 2, None, 1, nn.LeakyReLU(0.1)]], 98 | [[-1, 37], 1, Concat, [1]], 99 | 100 | [-1, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 101 | [-2, 1, Conv, [128, 1, 1, None, 1, nn.LeakyReLU(0.1)]], 102 | [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 103 | [-1, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 104 | [[-1, -2, -3, -4], 1, Concat, [1]], 105 | [-1, 1, Conv, [256, 1, 1, None, 1, nn.LeakyReLU(0.1)]], # 73 106 | 107 | [57, 1, Conv, [128, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 108 | [65, 1, Conv, [256, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 109 | [73, 1, Conv, [512, 3, 1, None, 1, nn.LeakyReLU(0.1)]], 110 | 111 | [[74,75,76], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5) 112 | ] 113 | -------------------------------------------------------------------------------- /cfg/deploy/yolov7x.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [12,16, 19,36, 40,28] # P3/8 9 | - [36,75, 76,55, 72,146] # P4/16 10 | - [142,110, 192,243, 459,401] # P5/32 11 | 12 | # yolov7x backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [40, 3, 1]], # 0 16 | 17 | [-1, 1, Conv, [80, 3, 2]], # 1-P1/2 18 | [-1, 1, Conv, [80, 3, 1]], 19 | 20 | [-1, 1, Conv, [160, 3, 2]], # 3-P2/4 21 | [-1, 1, Conv, [64, 1, 1]], 22 | [-2, 1, Conv, [64, 1, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [-1, 1, Conv, [64, 3, 1]], 25 | [-1, 1, Conv, [64, 3, 1]], 26 | [-1, 1, Conv, [64, 3, 1]], 27 | [-1, 1, Conv, [64, 3, 1]], 28 | [-1, 1, Conv, [64, 3, 1]], 29 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 30 | [-1, 1, Conv, [320, 1, 1]], # 13 31 | 32 | [-1, 1, MP, []], 33 | [-1, 1, Conv, [160, 1, 1]], 34 | [-3, 1, Conv, [160, 1, 1]], 35 | [-1, 1, Conv, [160, 3, 2]], 36 | [[-1, -3], 1, Concat, [1]], # 18-P3/8 37 | [-1, 1, Conv, [128, 1, 1]], 38 | [-2, 1, Conv, [128, 1, 1]], 39 | [-1, 1, Conv, [128, 3, 1]], 40 | [-1, 1, Conv, [128, 3, 1]], 41 | [-1, 1, Conv, [128, 3, 1]], 42 | [-1, 1, Conv, [128, 3, 1]], 43 | [-1, 1, Conv, [128, 3, 1]], 44 | [-1, 1, Conv, [128, 3, 1]], 45 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 46 | [-1, 1, Conv, [640, 1, 1]], # 28 47 | 48 | [-1, 1, MP, []], 49 | [-1, 1, Conv, [320, 1, 1]], 50 | [-3, 1, Conv, [320, 1, 1]], 51 | [-1, 1, Conv, [320, 3, 2]], 52 | [[-1, -3], 1, Concat, [1]], # 33-P4/16 53 | [-1, 1, Conv, [256, 1, 1]], 54 | [-2, 1, Conv, [256, 1, 1]], 55 | [-1, 1, Conv, [256, 3, 1]], 56 | [-1, 1, Conv, [256, 3, 1]], 57 | [-1, 1, Conv, [256, 3, 1]], 58 | [-1, 1, Conv, [256, 3, 1]], 59 | [-1, 1, Conv, [256, 3, 1]], 60 | [-1, 1, Conv, [256, 3, 1]], 61 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 62 | [-1, 1, Conv, [1280, 1, 1]], # 43 63 | 64 | [-1, 1, MP, []], 65 | [-1, 1, Conv, [640, 1, 1]], 66 | [-3, 1, Conv, [640, 1, 1]], 67 | [-1, 1, Conv, [640, 3, 2]], 68 | [[-1, -3], 1, Concat, [1]], # 48-P5/32 69 | [-1, 1, Conv, [256, 1, 1]], 70 | [-2, 1, Conv, [256, 1, 1]], 71 | [-1, 1, Conv, [256, 3, 1]], 72 | [-1, 1, Conv, [256, 3, 1]], 73 | [-1, 1, Conv, [256, 3, 1]], 74 | [-1, 1, Conv, [256, 3, 1]], 75 | [-1, 1, Conv, [256, 3, 1]], 76 | [-1, 1, Conv, [256, 3, 1]], 77 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 78 | [-1, 1, Conv, [1280, 1, 1]], # 58 79 | ] 80 | 81 | # yolov7x head 82 | head: 83 | [[-1, 1, SPPCSPC, [640]], # 59 84 | 85 | [-1, 1, Conv, [320, 1, 1]], 86 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 87 | [43, 1, Conv, [320, 1, 1]], # route backbone P4 88 | [[-1, -2], 1, Concat, [1]], 89 | 90 | [-1, 1, Conv, [256, 1, 1]], 91 | [-2, 1, Conv, [256, 1, 1]], 92 | [-1, 1, Conv, [256, 3, 1]], 93 | [-1, 1, Conv, [256, 3, 1]], 94 | [-1, 1, Conv, [256, 3, 1]], 95 | [-1, 1, Conv, [256, 3, 1]], 96 | [-1, 1, Conv, [256, 3, 1]], 97 | [-1, 1, Conv, [256, 3, 1]], 98 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 99 | [-1, 1, Conv, [320, 1, 1]], # 73 100 | 101 | [-1, 1, Conv, [160, 1, 1]], 102 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 103 | [28, 1, Conv, [160, 1, 1]], # route backbone P3 104 | [[-1, -2], 1, Concat, [1]], 105 | 106 | [-1, 1, Conv, [128, 1, 1]], 107 | [-2, 1, Conv, [128, 1, 1]], 108 | [-1, 1, Conv, [128, 3, 1]], 109 | [-1, 1, Conv, [128, 3, 1]], 110 | [-1, 1, Conv, [128, 3, 1]], 111 | [-1, 1, Conv, [128, 3, 1]], 112 | [-1, 1, Conv, [128, 3, 1]], 113 | [-1, 1, Conv, [128, 3, 1]], 114 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 115 | [-1, 1, Conv, [160, 1, 1]], # 87 116 | 117 | [-1, 1, MP, []], 118 | [-1, 1, Conv, [160, 1, 1]], 119 | [-3, 1, Conv, [160, 1, 1]], 120 | [-1, 1, Conv, [160, 3, 2]], 121 | [[-1, -3, 73], 1, Concat, [1]], 122 | 123 | [-1, 1, Conv, [256, 1, 1]], 124 | [-2, 1, Conv, [256, 1, 1]], 125 | [-1, 1, Conv, [256, 3, 1]], 126 | [-1, 1, Conv, [256, 3, 1]], 127 | [-1, 1, Conv, [256, 3, 1]], 128 | [-1, 1, Conv, [256, 3, 1]], 129 | [-1, 1, Conv, [256, 3, 1]], 130 | [-1, 1, Conv, [256, 3, 1]], 131 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 132 | [-1, 1, Conv, [320, 1, 1]], # 102 133 | 134 | [-1, 1, MP, []], 135 | [-1, 1, Conv, [320, 1, 1]], 136 | [-3, 1, Conv, [320, 1, 1]], 137 | [-1, 1, Conv, [320, 3, 2]], 138 | [[-1, -3, 59], 1, Concat, [1]], 139 | 140 | [-1, 1, Conv, [512, 1, 1]], 141 | [-2, 1, Conv, [512, 1, 1]], 142 | [-1, 1, Conv, [512, 3, 1]], 143 | [-1, 1, Conv, [512, 3, 1]], 144 | [-1, 1, Conv, [512, 3, 1]], 145 | [-1, 1, Conv, [512, 3, 1]], 146 | [-1, 1, Conv, [512, 3, 1]], 147 | [-1, 1, Conv, [512, 3, 1]], 148 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 149 | [-1, 1, Conv, [640, 1, 1]], # 117 150 | 151 | [87, 1, Conv, [320, 3, 1]], 152 | [102, 1, Conv, [640, 3, 1]], 153 | [117, 1, Conv, [1280, 3, 1]], 154 | 155 | [[118,119,120], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 156 | ] 157 | -------------------------------------------------------------------------------- /cfg/training/yolov7x.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [12,16, 19,36, 40,28] # P3/8 9 | - [36,75, 76,55, 72,146] # P4/16 10 | - [142,110, 192,243, 459,401] # P5/32 11 | 12 | # yolov7 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [40, 3, 1]], # 0 16 | 17 | [-1, 1, Conv, [80, 3, 2]], # 1-P1/2 18 | [-1, 1, Conv, [80, 3, 1]], 19 | 20 | [-1, 1, Conv, [160, 3, 2]], # 3-P2/4 21 | [-1, 1, Conv, [64, 1, 1]], 22 | [-2, 1, Conv, [64, 1, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [-1, 1, Conv, [64, 3, 1]], 25 | [-1, 1, Conv, [64, 3, 1]], 26 | [-1, 1, Conv, [64, 3, 1]], 27 | [-1, 1, Conv, [64, 3, 1]], 28 | [-1, 1, Conv, [64, 3, 1]], 29 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 30 | [-1, 1, Conv, [320, 1, 1]], # 13 31 | 32 | [-1, 1, MP, []], 33 | [-1, 1, Conv, [160, 1, 1]], 34 | [-3, 1, Conv, [160, 1, 1]], 35 | [-1, 1, Conv, [160, 3, 2]], 36 | [[-1, -3], 1, Concat, [1]], # 18-P3/8 37 | [-1, 1, Conv, [128, 1, 1]], 38 | [-2, 1, Conv, [128, 1, 1]], 39 | [-1, 1, Conv, [128, 3, 1]], 40 | [-1, 1, Conv, [128, 3, 1]], 41 | [-1, 1, Conv, [128, 3, 1]], 42 | [-1, 1, Conv, [128, 3, 1]], 43 | [-1, 1, Conv, [128, 3, 1]], 44 | [-1, 1, Conv, [128, 3, 1]], 45 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 46 | [-1, 1, Conv, [640, 1, 1]], # 28 47 | 48 | [-1, 1, MP, []], 49 | [-1, 1, Conv, [320, 1, 1]], 50 | [-3, 1, Conv, [320, 1, 1]], 51 | [-1, 1, Conv, [320, 3, 2]], 52 | [[-1, -3], 1, Concat, [1]], # 33-P4/16 53 | [-1, 1, Conv, [256, 1, 1]], 54 | [-2, 1, Conv, [256, 1, 1]], 55 | [-1, 1, Conv, [256, 3, 1]], 56 | [-1, 1, Conv, [256, 3, 1]], 57 | [-1, 1, Conv, [256, 3, 1]], 58 | [-1, 1, Conv, [256, 3, 1]], 59 | [-1, 1, Conv, [256, 3, 1]], 60 | [-1, 1, Conv, [256, 3, 1]], 61 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 62 | [-1, 1, Conv, [1280, 1, 1]], # 43 63 | 64 | [-1, 1, MP, []], 65 | [-1, 1, Conv, [640, 1, 1]], 66 | [-3, 1, Conv, [640, 1, 1]], 67 | [-1, 1, Conv, [640, 3, 2]], 68 | [[-1, -3], 1, Concat, [1]], # 48-P5/32 69 | [-1, 1, Conv, [256, 1, 1]], 70 | [-2, 1, Conv, [256, 1, 1]], 71 | [-1, 1, Conv, [256, 3, 1]], 72 | [-1, 1, Conv, [256, 3, 1]], 73 | [-1, 1, Conv, [256, 3, 1]], 74 | [-1, 1, Conv, [256, 3, 1]], 75 | [-1, 1, Conv, [256, 3, 1]], 76 | [-1, 1, Conv, [256, 3, 1]], 77 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 78 | [-1, 1, Conv, [1280, 1, 1]], # 58 79 | ] 80 | 81 | # yolov7 head 82 | head: 83 | [[-1, 1, SPPCSPC, [640]], # 59 84 | 85 | [-1, 1, Conv, [320, 1, 1]], 86 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 87 | [43, 1, Conv, [320, 1, 1]], # route backbone P4 88 | [[-1, -2], 1, Concat, [1]], 89 | 90 | [-1, 1, Conv, [256, 1, 1]], 91 | [-2, 1, Conv, [256, 1, 1]], 92 | [-1, 1, Conv, [256, 3, 1]], 93 | [-1, 1, Conv, [256, 3, 1]], 94 | [-1, 1, Conv, [256, 3, 1]], 95 | [-1, 1, Conv, [256, 3, 1]], 96 | [-1, 1, Conv, [256, 3, 1]], 97 | [-1, 1, Conv, [256, 3, 1]], 98 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 99 | [-1, 1, Conv, [320, 1, 1]], # 73 100 | 101 | [-1, 1, Conv, [160, 1, 1]], 102 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 103 | [28, 1, Conv, [160, 1, 1]], # route backbone P3 104 | [[-1, -2], 1, Concat, [1]], 105 | 106 | [-1, 1, Conv, [128, 1, 1]], 107 | [-2, 1, Conv, [128, 1, 1]], 108 | [-1, 1, Conv, [128, 3, 1]], 109 | [-1, 1, Conv, [128, 3, 1]], 110 | [-1, 1, Conv, [128, 3, 1]], 111 | [-1, 1, Conv, [128, 3, 1]], 112 | [-1, 1, Conv, [128, 3, 1]], 113 | [-1, 1, Conv, [128, 3, 1]], 114 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 115 | [-1, 1, Conv, [160, 1, 1]], # 87 116 | 117 | [-1, 1, MP, []], 118 | [-1, 1, Conv, [160, 1, 1]], 119 | [-3, 1, Conv, [160, 1, 1]], 120 | [-1, 1, Conv, [160, 3, 2]], 121 | [[-1, -3, 73], 1, Concat, [1]], 122 | 123 | [-1, 1, Conv, [256, 1, 1]], 124 | [-2, 1, Conv, [256, 1, 1]], 125 | [-1, 1, Conv, [256, 3, 1]], 126 | [-1, 1, Conv, [256, 3, 1]], 127 | [-1, 1, Conv, [256, 3, 1]], 128 | [-1, 1, Conv, [256, 3, 1]], 129 | [-1, 1, Conv, [256, 3, 1]], 130 | [-1, 1, Conv, [256, 3, 1]], 131 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 132 | [-1, 1, Conv, [320, 1, 1]], # 102 133 | 134 | [-1, 1, MP, []], 135 | [-1, 1, Conv, [320, 1, 1]], 136 | [-3, 1, Conv, [320, 1, 1]], 137 | [-1, 1, Conv, [320, 3, 2]], 138 | [[-1, -3, 59], 1, Concat, [1]], 139 | 140 | [-1, 1, Conv, [512, 1, 1]], 141 | [-2, 1, Conv, [512, 1, 1]], 142 | [-1, 1, Conv, [512, 3, 1]], 143 | [-1, 1, Conv, [512, 3, 1]], 144 | [-1, 1, Conv, [512, 3, 1]], 145 | [-1, 1, Conv, [512, 3, 1]], 146 | [-1, 1, Conv, [512, 3, 1]], 147 | [-1, 1, Conv, [512, 3, 1]], 148 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 149 | [-1, 1, Conv, [640, 1, 1]], # 117 150 | 151 | [87, 1, Conv, [320, 3, 1]], 152 | [102, 1, Conv, [640, 3, 1]], 153 | [117, 1, Conv, [1280, 3, 1]], 154 | 155 | [[118,119,120], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5) 156 | ] 157 | -------------------------------------------------------------------------------- /cfg/deploy/yolov7-w6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [ 19,27, 44,40, 38,94 ] # P3/8 9 | - [ 96,68, 86,152, 180,137 ] # P4/16 10 | - [ 140,301, 303,264, 238,542 ] # P5/32 11 | - [ 436,615, 739,380, 925,792 ] # P6/64 12 | 13 | # yolov7-w6 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, ReOrg, []], # 0 17 | [-1, 1, Conv, [64, 3, 1]], # 1-P1/2 18 | 19 | [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 20 | [-1, 1, Conv, [64, 1, 1]], 21 | [-2, 1, Conv, [64, 1, 1]], 22 | [-1, 1, Conv, [64, 3, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [-1, 1, Conv, [64, 3, 1]], 25 | [-1, 1, Conv, [64, 3, 1]], 26 | [[-1, -3, -5, -6], 1, Concat, [1]], 27 | [-1, 1, Conv, [128, 1, 1]], # 10 28 | 29 | [-1, 1, Conv, [256, 3, 2]], # 11-P3/8 30 | [-1, 1, Conv, [128, 1, 1]], 31 | [-2, 1, Conv, [128, 1, 1]], 32 | [-1, 1, Conv, [128, 3, 1]], 33 | [-1, 1, Conv, [128, 3, 1]], 34 | [-1, 1, Conv, [128, 3, 1]], 35 | [-1, 1, Conv, [128, 3, 1]], 36 | [[-1, -3, -5, -6], 1, Concat, [1]], 37 | [-1, 1, Conv, [256, 1, 1]], # 19 38 | 39 | [-1, 1, Conv, [512, 3, 2]], # 20-P4/16 40 | [-1, 1, Conv, [256, 1, 1]], 41 | [-2, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [256, 3, 1]], 43 | [-1, 1, Conv, [256, 3, 1]], 44 | [-1, 1, Conv, [256, 3, 1]], 45 | [-1, 1, Conv, [256, 3, 1]], 46 | [[-1, -3, -5, -6], 1, Concat, [1]], 47 | [-1, 1, Conv, [512, 1, 1]], # 28 48 | 49 | [-1, 1, Conv, [768, 3, 2]], # 29-P5/32 50 | [-1, 1, Conv, [384, 1, 1]], 51 | [-2, 1, Conv, [384, 1, 1]], 52 | [-1, 1, Conv, [384, 3, 1]], 53 | [-1, 1, Conv, [384, 3, 1]], 54 | [-1, 1, Conv, [384, 3, 1]], 55 | [-1, 1, Conv, [384, 3, 1]], 56 | [[-1, -3, -5, -6], 1, Concat, [1]], 57 | [-1, 1, Conv, [768, 1, 1]], # 37 58 | 59 | [-1, 1, Conv, [1024, 3, 2]], # 38-P6/64 60 | [-1, 1, Conv, [512, 1, 1]], 61 | [-2, 1, Conv, [512, 1, 1]], 62 | [-1, 1, Conv, [512, 3, 1]], 63 | [-1, 1, Conv, [512, 3, 1]], 64 | [-1, 1, Conv, [512, 3, 1]], 65 | [-1, 1, Conv, [512, 3, 1]], 66 | [[-1, -3, -5, -6], 1, Concat, [1]], 67 | [-1, 1, Conv, [1024, 1, 1]], # 46 68 | ] 69 | 70 | # yolov7-w6 head 71 | head: 72 | [[-1, 1, SPPCSPC, [512]], # 47 73 | 74 | [-1, 1, Conv, [384, 1, 1]], 75 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 76 | [37, 1, Conv, [384, 1, 1]], # route backbone P5 77 | [[-1, -2], 1, Concat, [1]], 78 | 79 | [-1, 1, Conv, [384, 1, 1]], 80 | [-2, 1, Conv, [384, 1, 1]], 81 | [-1, 1, Conv, [192, 3, 1]], 82 | [-1, 1, Conv, [192, 3, 1]], 83 | [-1, 1, Conv, [192, 3, 1]], 84 | [-1, 1, Conv, [192, 3, 1]], 85 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 86 | [-1, 1, Conv, [384, 1, 1]], # 59 87 | 88 | [-1, 1, Conv, [256, 1, 1]], 89 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 90 | [28, 1, Conv, [256, 1, 1]], # route backbone P4 91 | [[-1, -2], 1, Concat, [1]], 92 | 93 | [-1, 1, Conv, [256, 1, 1]], 94 | [-2, 1, Conv, [256, 1, 1]], 95 | [-1, 1, Conv, [128, 3, 1]], 96 | [-1, 1, Conv, [128, 3, 1]], 97 | [-1, 1, Conv, [128, 3, 1]], 98 | [-1, 1, Conv, [128, 3, 1]], 99 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 100 | [-1, 1, Conv, [256, 1, 1]], # 71 101 | 102 | [-1, 1, Conv, [128, 1, 1]], 103 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 104 | [19, 1, Conv, [128, 1, 1]], # route backbone P3 105 | [[-1, -2], 1, Concat, [1]], 106 | 107 | [-1, 1, Conv, [128, 1, 1]], 108 | [-2, 1, Conv, [128, 1, 1]], 109 | [-1, 1, Conv, [64, 3, 1]], 110 | [-1, 1, Conv, [64, 3, 1]], 111 | [-1, 1, Conv, [64, 3, 1]], 112 | [-1, 1, Conv, [64, 3, 1]], 113 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 114 | [-1, 1, Conv, [128, 1, 1]], # 83 115 | 116 | [-1, 1, Conv, [256, 3, 2]], 117 | [[-1, 71], 1, Concat, [1]], # cat 118 | 119 | [-1, 1, Conv, [256, 1, 1]], 120 | [-2, 1, Conv, [256, 1, 1]], 121 | [-1, 1, Conv, [128, 3, 1]], 122 | [-1, 1, Conv, [128, 3, 1]], 123 | [-1, 1, Conv, [128, 3, 1]], 124 | [-1, 1, Conv, [128, 3, 1]], 125 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 126 | [-1, 1, Conv, [256, 1, 1]], # 93 127 | 128 | [-1, 1, Conv, [384, 3, 2]], 129 | [[-1, 59], 1, Concat, [1]], # cat 130 | 131 | [-1, 1, Conv, [384, 1, 1]], 132 | [-2, 1, Conv, [384, 1, 1]], 133 | [-1, 1, Conv, [192, 3, 1]], 134 | [-1, 1, Conv, [192, 3, 1]], 135 | [-1, 1, Conv, [192, 3, 1]], 136 | [-1, 1, Conv, [192, 3, 1]], 137 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 138 | [-1, 1, Conv, [384, 1, 1]], # 103 139 | 140 | [-1, 1, Conv, [512, 3, 2]], 141 | [[-1, 47], 1, Concat, [1]], # cat 142 | 143 | [-1, 1, Conv, [512, 1, 1]], 144 | [-2, 1, Conv, [512, 1, 1]], 145 | [-1, 1, Conv, [256, 3, 1]], 146 | [-1, 1, Conv, [256, 3, 1]], 147 | [-1, 1, Conv, [256, 3, 1]], 148 | [-1, 1, Conv, [256, 3, 1]], 149 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 150 | [-1, 1, Conv, [512, 1, 1]], # 113 151 | 152 | [83, 1, Conv, [256, 3, 1]], 153 | [93, 1, Conv, [512, 3, 1]], 154 | [103, 1, Conv, [768, 3, 1]], 155 | [113, 1, Conv, [1024, 3, 1]], 156 | 157 | [[114,115,116,117], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 158 | ] 159 | -------------------------------------------------------------------------------- /utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | 3 | import os 4 | import platform 5 | import subprocess 6 | import time 7 | from pathlib import Path 8 | 9 | import requests 10 | import torch 11 | 12 | 13 | def gsutil_getsize(url=''): 14 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du 15 | s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8') 16 | return eval(s.split(' ')[0]) if len(s) else 0 # bytes 17 | 18 | 19 | def attempt_download(file, repo='WongKinYiu/yolov7'): 20 | # Attempt file download if does not exist 21 | file = Path(str(file).strip().replace("'", '').lower()) 22 | 23 | if not file.exists(): 24 | try: 25 | response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json() # github api 26 | assets = [x['name'] for x in response['assets']] # release assets 27 | tag = response['tag_name'] # i.e. 'v1.0' 28 | except: # fallback plan 29 | assets = ['yolov7.pt', 'yolov7-tiny.pt', 'yolov7x.pt', 'yolov7-d6.pt', 'yolov7-e6.pt', 30 | 'yolov7-e6e.pt', 'yolov7-w6.pt'] 31 | tag = subprocess.check_output('git tag', shell=True).decode().split()[-1] 32 | 33 | name = file.name 34 | if name in assets: 35 | msg = f'{file} missing, try downloading from https://github.com/{repo}/releases/' 36 | redundant = False # second download option 37 | try: # GitHub 38 | url = f'https://github.com/{repo}/releases/download/{tag}/{name}' 39 | print(f'Downloading {url} to {file}...') 40 | torch.hub.download_url_to_file(url, file) 41 | assert file.exists() and file.stat().st_size > 1E6 # check 42 | except Exception as e: # GCP 43 | print(f'Download error: {e}') 44 | assert redundant, 'No secondary mirror' 45 | url = f'https://storage.googleapis.com/{repo}/ckpt/{name}' 46 | print(f'Downloading {url} to {file}...') 47 | os.system(f'curl -L {url} -o {file}') # torch.hub.download_url_to_file(url, weights) 48 | finally: 49 | if not file.exists() or file.stat().st_size < 1E6: # check 50 | file.unlink(missing_ok=True) # remove partial downloads 51 | print(f'ERROR: Download failure: {msg}') 52 | print('') 53 | return 54 | 55 | 56 | def gdrive_download(id='', file='tmp.zip'): 57 | # Downloads a file from Google Drive. from yolov7.utils.google_utils import *; gdrive_download() 58 | t = time.time() 59 | file = Path(file) 60 | cookie = Path('cookie') # gdrive cookie 61 | print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='') 62 | file.unlink(missing_ok=True) # remove existing file 63 | cookie.unlink(missing_ok=True) # remove existing cookie 64 | 65 | # Attempt file download 66 | out = "NUL" if platform.system() == "Windows" else "/dev/null" 67 | os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}') 68 | if os.path.exists('cookie'): # large file 69 | s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}' 70 | else: # small file 71 | s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"' 72 | r = os.system(s) # execute, capture return 73 | cookie.unlink(missing_ok=True) # remove existing cookie 74 | 75 | # Error check 76 | if r != 0: 77 | file.unlink(missing_ok=True) # remove partial 78 | print('Download error ') # raise Exception('Download error') 79 | return r 80 | 81 | # Unzip if archive 82 | if file.suffix == '.zip': 83 | print('unzipping... ', end='') 84 | os.system(f'unzip -q {file}') # unzip 85 | file.unlink() # remove zip to free space 86 | 87 | print(f'Done ({time.time() - t:.1f}s)') 88 | return r 89 | 90 | 91 | def get_token(cookie="./cookie"): 92 | with open(cookie) as f: 93 | for line in f: 94 | if "download" in line: 95 | return line.split()[-1] 96 | return "" 97 | 98 | # def upload_blob(bucket_name, source_file_name, destination_blob_name): 99 | # # Uploads a file to a bucket 100 | # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 101 | # 102 | # storage_client = storage.Client() 103 | # bucket = storage_client.get_bucket(bucket_name) 104 | # blob = bucket.blob(destination_blob_name) 105 | # 106 | # blob.upload_from_filename(source_file_name) 107 | # 108 | # print('File {} uploaded to {}.'.format( 109 | # source_file_name, 110 | # destination_blob_name)) 111 | # 112 | # 113 | # def download_blob(bucket_name, source_blob_name, destination_file_name): 114 | # # Uploads a blob from a bucket 115 | # storage_client = storage.Client() 116 | # bucket = storage_client.get_bucket(bucket_name) 117 | # blob = bucket.blob(source_blob_name) 118 | # 119 | # blob.download_to_filename(destination_file_name) 120 | # 121 | # print('Blob {} downloaded to {}.'.format( 122 | # source_blob_name, 123 | # destination_file_name)) 124 | -------------------------------------------------------------------------------- /cfg/training/yolov7-w6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [ 19,27, 44,40, 38,94 ] # P3/8 9 | - [ 96,68, 86,152, 180,137 ] # P4/16 10 | - [ 140,301, 303,264, 238,542 ] # P5/32 11 | - [ 436,615, 739,380, 925,792 ] # P6/64 12 | 13 | # yolov7 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, ReOrg, []], # 0 17 | [-1, 1, Conv, [64, 3, 1]], # 1-P1/2 18 | 19 | [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 20 | [-1, 1, Conv, [64, 1, 1]], 21 | [-2, 1, Conv, [64, 1, 1]], 22 | [-1, 1, Conv, [64, 3, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [-1, 1, Conv, [64, 3, 1]], 25 | [-1, 1, Conv, [64, 3, 1]], 26 | [[-1, -3, -5, -6], 1, Concat, [1]], 27 | [-1, 1, Conv, [128, 1, 1]], # 10 28 | 29 | [-1, 1, Conv, [256, 3, 2]], # 11-P3/8 30 | [-1, 1, Conv, [128, 1, 1]], 31 | [-2, 1, Conv, [128, 1, 1]], 32 | [-1, 1, Conv, [128, 3, 1]], 33 | [-1, 1, Conv, [128, 3, 1]], 34 | [-1, 1, Conv, [128, 3, 1]], 35 | [-1, 1, Conv, [128, 3, 1]], 36 | [[-1, -3, -5, -6], 1, Concat, [1]], 37 | [-1, 1, Conv, [256, 1, 1]], # 19 38 | 39 | [-1, 1, Conv, [512, 3, 2]], # 20-P4/16 40 | [-1, 1, Conv, [256, 1, 1]], 41 | [-2, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [256, 3, 1]], 43 | [-1, 1, Conv, [256, 3, 1]], 44 | [-1, 1, Conv, [256, 3, 1]], 45 | [-1, 1, Conv, [256, 3, 1]], 46 | [[-1, -3, -5, -6], 1, Concat, [1]], 47 | [-1, 1, Conv, [512, 1, 1]], # 28 48 | 49 | [-1, 1, Conv, [768, 3, 2]], # 29-P5/32 50 | [-1, 1, Conv, [384, 1, 1]], 51 | [-2, 1, Conv, [384, 1, 1]], 52 | [-1, 1, Conv, [384, 3, 1]], 53 | [-1, 1, Conv, [384, 3, 1]], 54 | [-1, 1, Conv, [384, 3, 1]], 55 | [-1, 1, Conv, [384, 3, 1]], 56 | [[-1, -3, -5, -6], 1, Concat, [1]], 57 | [-1, 1, Conv, [768, 1, 1]], # 37 58 | 59 | [-1, 1, Conv, [1024, 3, 2]], # 38-P6/64 60 | [-1, 1, Conv, [512, 1, 1]], 61 | [-2, 1, Conv, [512, 1, 1]], 62 | [-1, 1, Conv, [512, 3, 1]], 63 | [-1, 1, Conv, [512, 3, 1]], 64 | [-1, 1, Conv, [512, 3, 1]], 65 | [-1, 1, Conv, [512, 3, 1]], 66 | [[-1, -3, -5, -6], 1, Concat, [1]], 67 | [-1, 1, Conv, [1024, 1, 1]], # 46 68 | ] 69 | 70 | # yolov7 head 71 | head: 72 | [[-1, 1, SPPCSPC, [512]], # 47 73 | 74 | [-1, 1, Conv, [384, 1, 1]], 75 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 76 | [37, 1, Conv, [384, 1, 1]], # route backbone P5 77 | [[-1, -2], 1, Concat, [1]], 78 | 79 | [-1, 1, Conv, [384, 1, 1]], 80 | [-2, 1, Conv, [384, 1, 1]], 81 | [-1, 1, Conv, [192, 3, 1]], 82 | [-1, 1, Conv, [192, 3, 1]], 83 | [-1, 1, Conv, [192, 3, 1]], 84 | [-1, 1, Conv, [192, 3, 1]], 85 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 86 | [-1, 1, Conv, [384, 1, 1]], # 59 87 | 88 | [-1, 1, Conv, [256, 1, 1]], 89 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 90 | [28, 1, Conv, [256, 1, 1]], # route backbone P4 91 | [[-1, -2], 1, Concat, [1]], 92 | 93 | [-1, 1, Conv, [256, 1, 1]], 94 | [-2, 1, Conv, [256, 1, 1]], 95 | [-1, 1, Conv, [128, 3, 1]], 96 | [-1, 1, Conv, [128, 3, 1]], 97 | [-1, 1, Conv, [128, 3, 1]], 98 | [-1, 1, Conv, [128, 3, 1]], 99 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 100 | [-1, 1, Conv, [256, 1, 1]], # 71 101 | 102 | [-1, 1, Conv, [128, 1, 1]], 103 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 104 | [19, 1, Conv, [128, 1, 1]], # route backbone P3 105 | [[-1, -2], 1, Concat, [1]], 106 | 107 | [-1, 1, Conv, [128, 1, 1]], 108 | [-2, 1, Conv, [128, 1, 1]], 109 | [-1, 1, Conv, [64, 3, 1]], 110 | [-1, 1, Conv, [64, 3, 1]], 111 | [-1, 1, Conv, [64, 3, 1]], 112 | [-1, 1, Conv, [64, 3, 1]], 113 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 114 | [-1, 1, Conv, [128, 1, 1]], # 83 115 | 116 | [-1, 1, Conv, [256, 3, 2]], 117 | [[-1, 71], 1, Concat, [1]], # cat 118 | 119 | [-1, 1, Conv, [256, 1, 1]], 120 | [-2, 1, Conv, [256, 1, 1]], 121 | [-1, 1, Conv, [128, 3, 1]], 122 | [-1, 1, Conv, [128, 3, 1]], 123 | [-1, 1, Conv, [128, 3, 1]], 124 | [-1, 1, Conv, [128, 3, 1]], 125 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 126 | [-1, 1, Conv, [256, 1, 1]], # 93 127 | 128 | [-1, 1, Conv, [384, 3, 2]], 129 | [[-1, 59], 1, Concat, [1]], # cat 130 | 131 | [-1, 1, Conv, [384, 1, 1]], 132 | [-2, 1, Conv, [384, 1, 1]], 133 | [-1, 1, Conv, [192, 3, 1]], 134 | [-1, 1, Conv, [192, 3, 1]], 135 | [-1, 1, Conv, [192, 3, 1]], 136 | [-1, 1, Conv, [192, 3, 1]], 137 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 138 | [-1, 1, Conv, [384, 1, 1]], # 103 139 | 140 | [-1, 1, Conv, [512, 3, 2]], 141 | [[-1, 47], 1, Concat, [1]], # cat 142 | 143 | [-1, 1, Conv, [512, 1, 1]], 144 | [-2, 1, Conv, [512, 1, 1]], 145 | [-1, 1, Conv, [256, 3, 1]], 146 | [-1, 1, Conv, [256, 3, 1]], 147 | [-1, 1, Conv, [256, 3, 1]], 148 | [-1, 1, Conv, [256, 3, 1]], 149 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 150 | [-1, 1, Conv, [512, 1, 1]], # 113 151 | 152 | [83, 1, Conv, [256, 3, 1]], 153 | [93, 1, Conv, [512, 3, 1]], 154 | [103, 1, Conv, [768, 3, 1]], 155 | [113, 1, Conv, [1024, 3, 1]], 156 | 157 | [83, 1, Conv, [320, 3, 1]], 158 | [71, 1, Conv, [640, 3, 1]], 159 | [59, 1, Conv, [960, 3, 1]], 160 | [47, 1, Conv, [1280, 3, 1]], 161 | 162 | [[114,115,116,117,118,119,120,121], 1, IAuxDetect, [nc, anchors]], # Detect(P3, P4, P5, P6) 163 | ] 164 | -------------------------------------------------------------------------------- /cfg/yolov7-mask.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | pooler_scale: 0.25 6 | 7 | # anchors 8 | anchors: 9 | - [12,16, 19,36, 40,28] # P3/8 10 | - [36,75, 76,55, 72,146] # P4/16 11 | - [142,110, 192,243, 459,401] # P5/32 12 | 13 | # yolov7 backbone 14 | backbone: 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | 17 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 18 | [-1, 1, Conv, [64, 3, 1]], 19 | 20 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 21 | [-1, 1, Conv, [64, 1, 1]], 22 | [-2, 1, Conv, [64, 1, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [-1, 1, Conv, [64, 3, 1]], 25 | [-1, 1, Conv, [64, 3, 1]], 26 | [-1, 1, Conv, [64, 3, 1]], 27 | [[-1, -3, -5, -6], 1, Concat, [1]], 28 | [-1, 1, Conv, [256, 1, 1]], # 11 29 | 30 | [-1, 1, MP, []], 31 | [-1, 1, Conv, [128, 1, 1]], 32 | [-3, 1, Conv, [128, 1, 1]], 33 | [-1, 1, Conv, [128, 3, 2]], 34 | [[-1, -3], 1, Concat, [1]], # 16-P3/8 35 | [-1, 1, Conv, [128, 1, 1]], 36 | [-2, 1, Conv, [128, 1, 1]], 37 | [-1, 1, Conv, [128, 3, 1]], 38 | [-1, 1, Conv, [128, 3, 1]], 39 | [-1, 1, Conv, [128, 3, 1]], 40 | [-1, 1, Conv, [128, 3, 1]], 41 | [[-1, -3, -5, -6], 1, Concat, [1]], 42 | [-1, 1, Conv, [512, 1, 1]], # 24 43 | 44 | [-1, 1, MP, []], 45 | [-1, 1, Conv, [256, 1, 1]], 46 | [-3, 1, Conv, [256, 1, 1]], 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, -3], 1, Concat, [1]], # 29-P4/16 49 | [-1, 1, Conv, [256, 1, 1]], 50 | [-2, 1, Conv, [256, 1, 1]], 51 | [-1, 1, Conv, [256, 3, 1]], 52 | [-1, 1, Conv, [256, 3, 1]], 53 | [-1, 1, Conv, [256, 3, 1]], 54 | [-1, 1, Conv, [256, 3, 1]], 55 | [[-1, -3, -5, -6], 1, Concat, [1]], 56 | [-1, 1, Conv, [1024, 1, 1]], # 37 57 | 58 | [-1, 1, MP, []], 59 | [-1, 1, Conv, [512, 1, 1]], 60 | [-3, 1, Conv, [512, 1, 1]], 61 | [-1, 1, Conv, [512, 3, 2]], 62 | [[-1, -3], 1, Concat, [1]], # 42-P5/32 63 | [-1, 1, Conv, [256, 1, 1]], 64 | [-2, 1, Conv, [256, 1, 1]], 65 | [-1, 1, Conv, [256, 3, 1]], 66 | [-1, 1, Conv, [256, 3, 1]], 67 | [-1, 1, Conv, [256, 3, 1]], 68 | [-1, 1, Conv, [256, 3, 1]], 69 | [[-1, -3, -5, -6], 1, Concat, [1]], 70 | [-1, 1, Conv, [1024, 1, 1]], # 50 71 | ] 72 | 73 | # yolov7 head 74 | head: 75 | [[-1, 1, SPPCSPC, [512]], # 51 76 | 77 | [-1, 1, Conv, [256, 1, 1]], 78 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 79 | [37, 1, Conv, [256, 1, 1]], # route backbone P4 80 | [[-1, -2], 1, Concat, [1]], 81 | 82 | [-1, 1, Conv, [256, 1, 1]], 83 | [-2, 1, Conv, [256, 1, 1]], 84 | [-1, 1, Conv, [128, 3, 1]], 85 | [-1, 1, Conv, [128, 3, 1]], 86 | [-1, 1, Conv, [128, 3, 1]], 87 | [-1, 1, Conv, [128, 3, 1]], 88 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 89 | [-1, 1, Conv, [256, 1, 1]], # 63 90 | 91 | [-1, 1, Conv, [128, 1, 1]], 92 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 93 | [24, 1, Conv, [128, 1, 1]], # route backbone P3 94 | [[-1, -2], 1, Concat, [1]], 95 | 96 | [-1, 1, Conv, [128, 1, 1]], 97 | [-2, 1, Conv, [128, 1, 1]], 98 | [-1, 1, Conv, [64, 3, 1]], 99 | [-1, 1, Conv, [64, 3, 1]], 100 | [-1, 1, Conv, [64, 3, 1]], 101 | [-1, 1, Conv, [64, 3, 1]], 102 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 103 | [-1, 1, Conv, [128, 1, 1]], # 75 104 | 105 | [-1, 1, MP, []], 106 | [-1, 1, Conv, [128, 1, 1]], 107 | [-3, 1, Conv, [128, 1, 1]], 108 | [-1, 1, Conv, [128, 3, 2]], 109 | [[-1, -3, 63], 1, Concat, [1]], 110 | 111 | [-1, 1, Conv, [256, 1, 1]], 112 | [-2, 1, Conv, [256, 1, 1]], 113 | [-1, 1, Conv, [128, 3, 1]], 114 | [-1, 1, Conv, [128, 3, 1]], 115 | [-1, 1, Conv, [128, 3, 1]], 116 | [-1, 1, Conv, [128, 3, 1]], 117 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 118 | [-1, 1, Conv, [256, 1, 1]], # 88 119 | 120 | [-1, 1, MP, []], 121 | [-1, 1, Conv, [256, 1, 1]], 122 | [-3, 1, Conv, [256, 1, 1]], 123 | [-1, 1, Conv, [256, 3, 2]], 124 | [[-1, -3, 51], 1, Concat, [1]], 125 | 126 | [-1, 1, Conv, [512, 1, 1]], 127 | [-2, 1, Conv, [512, 1, 1]], 128 | [-1, 1, Conv, [256, 3, 1]], 129 | [-1, 1, Conv, [256, 3, 1]], 130 | [-1, 1, Conv, [256, 3, 1]], 131 | [-1, 1, Conv, [256, 3, 1]], 132 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 133 | [-1, 1, Conv, [512, 1, 1]], # 101 134 | 135 | [75, 1, Conv, [256, 3, 1] ], 136 | [88, 1, Conv, [512, 3, 1] ], 137 | [101, 1, Conv, [1024, 3, 1]], 138 | 139 | [[102, 103, 104], 1, Merge, [[256, 512, 1024]]], # 105 140 | 141 | [75, 1, Conv, [64, 1, 1] ], 142 | [-1, 1, nn.Upsample, [None, 2, 'nearest'] ], 143 | [11, 1, Conv, [64, 1, 1] ], # route backbone P2 144 | [[-1, -2], 1, Concat, [1] ], 145 | [-1, 2, BottleneckCSPB, [64] ], 146 | [-1, 1, Conv, [64, 1, 1] ], # 111 147 | 148 | [[75, 88, 101], 1, Refine, [128, 3, 1]], 149 | [-1, 1, Conv, [128, 3, 1]], 150 | [-1, 1, nn.Upsample, [None, 2, 'bilinear']], 151 | [-1, 1, Conv, [128, 3, 1]], 152 | [111, 1, Conv, [128, 3, 1]], 153 | [[-1, -2], 1, Shortcut, [1]], 154 | [-1, 1, Conv, [128, 3, 1]], 155 | [-1, 1, nn.Conv2d, [4, 1]], # 119 156 | 157 | [[75, 63, 51], 1, Refine, [128, 3, 1]], 158 | [-1, 1, Conv, [128, 3, 1]], 159 | [-1, 1, nn.Upsample, [None, 2, 'bilinear']], 160 | [-1, 1, Conv, [128, 3, 1]], 161 | [111, 1, Conv, [128, 3, 1]], 162 | [[-1, -2], 1, Shortcut, [1]], 163 | [-1, 1, Conv, [128, 3, 1]], 164 | [-1, 1, nn.Conv2d, [1, 1]], # 127 165 | 166 | [[105, 119, 127], 1, MT, [nc, anchors, 980]], # Detect(P3, P4, P5) 167 | ] 168 | 169 | -------------------------------------------------------------------------------- /cfg/deploy/yolov7-e6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [ 19,27, 44,40, 38,94 ] # P3/8 9 | - [ 96,68, 86,152, 180,137 ] # P4/16 10 | - [ 140,301, 303,264, 238,542 ] # P5/32 11 | - [ 436,615, 739,380, 925,792 ] # P6/64 12 | 13 | # yolov7-e6 backbone 14 | backbone: 15 | # [from, number, module, args], 16 | [[-1, 1, ReOrg, []], # 0 17 | [-1, 1, Conv, [80, 3, 1]], # 1-P1/2 18 | 19 | [-1, 1, DownC, [160]], # 2-P2/4 20 | [-1, 1, Conv, [64, 1, 1]], 21 | [-2, 1, Conv, [64, 1, 1]], 22 | [-1, 1, Conv, [64, 3, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [-1, 1, Conv, [64, 3, 1]], 25 | [-1, 1, Conv, [64, 3, 1]], 26 | [-1, 1, Conv, [64, 3, 1]], 27 | [-1, 1, Conv, [64, 3, 1]], 28 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 29 | [-1, 1, Conv, [160, 1, 1]], # 12 30 | 31 | [-1, 1, DownC, [320]], # 13-P3/8 32 | [-1, 1, Conv, [128, 1, 1]], 33 | [-2, 1, Conv, [128, 1, 1]], 34 | [-1, 1, Conv, [128, 3, 1]], 35 | [-1, 1, Conv, [128, 3, 1]], 36 | [-1, 1, Conv, [128, 3, 1]], 37 | [-1, 1, Conv, [128, 3, 1]], 38 | [-1, 1, Conv, [128, 3, 1]], 39 | [-1, 1, Conv, [128, 3, 1]], 40 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 41 | [-1, 1, Conv, [320, 1, 1]], # 23 42 | 43 | [-1, 1, DownC, [640]], # 24-P4/16 44 | [-1, 1, Conv, [256, 1, 1]], 45 | [-2, 1, Conv, [256, 1, 1]], 46 | [-1, 1, Conv, [256, 3, 1]], 47 | [-1, 1, Conv, [256, 3, 1]], 48 | [-1, 1, Conv, [256, 3, 1]], 49 | [-1, 1, Conv, [256, 3, 1]], 50 | [-1, 1, Conv, [256, 3, 1]], 51 | [-1, 1, Conv, [256, 3, 1]], 52 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 53 | [-1, 1, Conv, [640, 1, 1]], # 34 54 | 55 | [-1, 1, DownC, [960]], # 35-P5/32 56 | [-1, 1, Conv, [384, 1, 1]], 57 | [-2, 1, Conv, [384, 1, 1]], 58 | [-1, 1, Conv, [384, 3, 1]], 59 | [-1, 1, Conv, [384, 3, 1]], 60 | [-1, 1, Conv, [384, 3, 1]], 61 | [-1, 1, Conv, [384, 3, 1]], 62 | [-1, 1, Conv, [384, 3, 1]], 63 | [-1, 1, Conv, [384, 3, 1]], 64 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 65 | [-1, 1, Conv, [960, 1, 1]], # 45 66 | 67 | [-1, 1, DownC, [1280]], # 46-P6/64 68 | [-1, 1, Conv, [512, 1, 1]], 69 | [-2, 1, Conv, [512, 1, 1]], 70 | [-1, 1, Conv, [512, 3, 1]], 71 | [-1, 1, Conv, [512, 3, 1]], 72 | [-1, 1, Conv, [512, 3, 1]], 73 | [-1, 1, Conv, [512, 3, 1]], 74 | [-1, 1, Conv, [512, 3, 1]], 75 | [-1, 1, Conv, [512, 3, 1]], 76 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 77 | [-1, 1, Conv, [1280, 1, 1]], # 56 78 | ] 79 | 80 | # yolov7-e6 head 81 | head: 82 | [[-1, 1, SPPCSPC, [640]], # 57 83 | 84 | [-1, 1, Conv, [480, 1, 1]], 85 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 86 | [45, 1, Conv, [480, 1, 1]], # route backbone P5 87 | [[-1, -2], 1, Concat, [1]], 88 | 89 | [-1, 1, Conv, [384, 1, 1]], 90 | [-2, 1, Conv, [384, 1, 1]], 91 | [-1, 1, Conv, [192, 3, 1]], 92 | [-1, 1, Conv, [192, 3, 1]], 93 | [-1, 1, Conv, [192, 3, 1]], 94 | [-1, 1, Conv, [192, 3, 1]], 95 | [-1, 1, Conv, [192, 3, 1]], 96 | [-1, 1, Conv, [192, 3, 1]], 97 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 98 | [-1, 1, Conv, [480, 1, 1]], # 71 99 | 100 | [-1, 1, Conv, [320, 1, 1]], 101 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 102 | [34, 1, Conv, [320, 1, 1]], # route backbone P4 103 | [[-1, -2], 1, Concat, [1]], 104 | 105 | [-1, 1, Conv, [256, 1, 1]], 106 | [-2, 1, Conv, [256, 1, 1]], 107 | [-1, 1, Conv, [128, 3, 1]], 108 | [-1, 1, Conv, [128, 3, 1]], 109 | [-1, 1, Conv, [128, 3, 1]], 110 | [-1, 1, Conv, [128, 3, 1]], 111 | [-1, 1, Conv, [128, 3, 1]], 112 | [-1, 1, Conv, [128, 3, 1]], 113 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 114 | [-1, 1, Conv, [320, 1, 1]], # 85 115 | 116 | [-1, 1, Conv, [160, 1, 1]], 117 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 118 | [23, 1, Conv, [160, 1, 1]], # route backbone P3 119 | [[-1, -2], 1, Concat, [1]], 120 | 121 | [-1, 1, Conv, [128, 1, 1]], 122 | [-2, 1, Conv, [128, 1, 1]], 123 | [-1, 1, Conv, [64, 3, 1]], 124 | [-1, 1, Conv, [64, 3, 1]], 125 | [-1, 1, Conv, [64, 3, 1]], 126 | [-1, 1, Conv, [64, 3, 1]], 127 | [-1, 1, Conv, [64, 3, 1]], 128 | [-1, 1, Conv, [64, 3, 1]], 129 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 130 | [-1, 1, Conv, [160, 1, 1]], # 99 131 | 132 | [-1, 1, DownC, [320]], 133 | [[-1, 85], 1, Concat, [1]], 134 | 135 | [-1, 1, Conv, [256, 1, 1]], 136 | [-2, 1, Conv, [256, 1, 1]], 137 | [-1, 1, Conv, [128, 3, 1]], 138 | [-1, 1, Conv, [128, 3, 1]], 139 | [-1, 1, Conv, [128, 3, 1]], 140 | [-1, 1, Conv, [128, 3, 1]], 141 | [-1, 1, Conv, [128, 3, 1]], 142 | [-1, 1, Conv, [128, 3, 1]], 143 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 144 | [-1, 1, Conv, [320, 1, 1]], # 111 145 | 146 | [-1, 1, DownC, [480]], 147 | [[-1, 71], 1, Concat, [1]], 148 | 149 | [-1, 1, Conv, [384, 1, 1]], 150 | [-2, 1, Conv, [384, 1, 1]], 151 | [-1, 1, Conv, [192, 3, 1]], 152 | [-1, 1, Conv, [192, 3, 1]], 153 | [-1, 1, Conv, [192, 3, 1]], 154 | [-1, 1, Conv, [192, 3, 1]], 155 | [-1, 1, Conv, [192, 3, 1]], 156 | [-1, 1, Conv, [192, 3, 1]], 157 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 158 | [-1, 1, Conv, [480, 1, 1]], # 123 159 | 160 | [-1, 1, DownC, [640]], 161 | [[-1, 57], 1, Concat, [1]], 162 | 163 | [-1, 1, Conv, [512, 1, 1]], 164 | [-2, 1, Conv, [512, 1, 1]], 165 | [-1, 1, Conv, [256, 3, 1]], 166 | [-1, 1, Conv, [256, 3, 1]], 167 | [-1, 1, Conv, [256, 3, 1]], 168 | [-1, 1, Conv, [256, 3, 1]], 169 | [-1, 1, Conv, [256, 3, 1]], 170 | [-1, 1, Conv, [256, 3, 1]], 171 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 172 | [-1, 1, Conv, [640, 1, 1]], # 135 173 | 174 | [99, 1, Conv, [320, 3, 1]], 175 | [111, 1, Conv, [640, 3, 1]], 176 | [123, 1, Conv, [960, 3, 1]], 177 | [135, 1, Conv, [1280, 3, 1]], 178 | 179 | [[136,137,138,139], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 180 | ] 181 | -------------------------------------------------------------------------------- /utils/add_nms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import onnx 3 | from onnx import shape_inference 4 | try: 5 | import onnx_graphsurgeon as gs 6 | except Exception as e: 7 | print('Import onnx_graphsurgeon failure: %s' % e) 8 | 9 | import logging 10 | 11 | LOGGER = logging.getLogger(__name__) 12 | 13 | class RegisterNMS(object): 14 | def __init__( 15 | self, 16 | onnx_model_path: str, 17 | precision: str = "fp32", 18 | ): 19 | 20 | self.graph = gs.import_onnx(onnx.load(onnx_model_path)) 21 | assert self.graph 22 | LOGGER.info("ONNX graph created successfully") 23 | # Fold constants via ONNX-GS that PyTorch2ONNX may have missed 24 | self.graph.fold_constants() 25 | self.precision = precision 26 | self.batch_size = 1 27 | def infer(self): 28 | """ 29 | Sanitize the graph by cleaning any unconnected nodes, do a topological resort, 30 | and fold constant inputs values. When possible, run shape inference on the 31 | ONNX graph to determine tensor shapes. 32 | """ 33 | for _ in range(3): 34 | count_before = len(self.graph.nodes) 35 | 36 | self.graph.cleanup().toposort() 37 | try: 38 | for node in self.graph.nodes: 39 | for o in node.outputs: 40 | o.shape = None 41 | model = gs.export_onnx(self.graph) 42 | model = shape_inference.infer_shapes(model) 43 | self.graph = gs.import_onnx(model) 44 | except Exception as e: 45 | LOGGER.info(f"Shape inference could not be performed at this time:\n{e}") 46 | try: 47 | self.graph.fold_constants(fold_shapes=True) 48 | except TypeError as e: 49 | LOGGER.error( 50 | "This version of ONNX GraphSurgeon does not support folding shapes, " 51 | f"please upgrade your onnx_graphsurgeon module. Error:\n{e}" 52 | ) 53 | raise 54 | 55 | count_after = len(self.graph.nodes) 56 | if count_before == count_after: 57 | # No new folding occurred in this iteration, so we can stop for now. 58 | break 59 | 60 | def save(self, output_path): 61 | """ 62 | Save the ONNX model to the given location. 63 | Args: 64 | output_path: Path pointing to the location where to write 65 | out the updated ONNX model. 66 | """ 67 | self.graph.cleanup().toposort() 68 | model = gs.export_onnx(self.graph) 69 | onnx.save(model, output_path) 70 | LOGGER.info(f"Saved ONNX model to {output_path}") 71 | 72 | def register_nms( 73 | self, 74 | *, 75 | score_thresh: float = 0.25, 76 | nms_thresh: float = 0.45, 77 | detections_per_img: int = 100, 78 | ): 79 | """ 80 | Register the ``EfficientNMS_TRT`` plugin node. 81 | NMS expects these shapes for its input tensors: 82 | - box_net: [batch_size, number_boxes, 4] 83 | - class_net: [batch_size, number_boxes, number_labels] 84 | Args: 85 | score_thresh (float): The scalar threshold for score (low scoring boxes are removed). 86 | nms_thresh (float): The scalar threshold for IOU (new boxes that have high IOU 87 | overlap with previously selected boxes are removed). 88 | detections_per_img (int): Number of best detections to keep after NMS. 89 | """ 90 | 91 | self.infer() 92 | # Find the concat node at the end of the network 93 | op_inputs = self.graph.outputs 94 | op = "EfficientNMS_TRT" 95 | attrs = { 96 | "plugin_version": "1", 97 | "background_class": -1, # no background class 98 | "max_output_boxes": detections_per_img, 99 | "score_threshold": score_thresh, 100 | "iou_threshold": nms_thresh, 101 | "score_activation": False, 102 | "box_coding": 0, 103 | } 104 | 105 | if self.precision == "fp32": 106 | dtype_output = np.float32 107 | elif self.precision == "fp16": 108 | dtype_output = np.float16 109 | else: 110 | raise NotImplementedError(f"Currently not supports precision: {self.precision}") 111 | 112 | # NMS Outputs 113 | output_num_detections = gs.Variable( 114 | name="num_dets", 115 | dtype=np.int32, 116 | shape=[self.batch_size, 1], 117 | ) # A scalar indicating the number of valid detections per batch image. 118 | output_boxes = gs.Variable( 119 | name="det_boxes", 120 | dtype=dtype_output, 121 | shape=[self.batch_size, detections_per_img, 4], 122 | ) 123 | output_scores = gs.Variable( 124 | name="det_scores", 125 | dtype=dtype_output, 126 | shape=[self.batch_size, detections_per_img], 127 | ) 128 | output_labels = gs.Variable( 129 | name="det_classes", 130 | dtype=np.int32, 131 | shape=[self.batch_size, detections_per_img], 132 | ) 133 | 134 | op_outputs = [output_num_detections, output_boxes, output_scores, output_labels] 135 | 136 | # Create the NMS Plugin node with the selected inputs. The outputs of the node will also 137 | # become the final outputs of the graph. 138 | self.graph.layer(op=op, name="batched_nms", inputs=op_inputs, outputs=op_outputs, attrs=attrs) 139 | LOGGER.info(f"Created NMS plugin '{op}' with attributes: {attrs}") 140 | 141 | self.graph.outputs = op_outputs 142 | 143 | self.infer() 144 | 145 | def save(self, output_path): 146 | """ 147 | Save the ONNX model to the given location. 148 | Args: 149 | output_path: Path pointing to the location where to write 150 | out the updated ONNX model. 151 | """ 152 | self.graph.cleanup().toposort() 153 | model = gs.export_onnx(self.graph) 154 | onnx.save(model, output_path) 155 | LOGGER.info(f"Saved ONNX model to {output_path}") 156 | -------------------------------------------------------------------------------- /cfg/training/yolov7-e6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [ 19,27, 44,40, 38,94 ] # P3/8 9 | - [ 96,68, 86,152, 180,137 ] # P4/16 10 | - [ 140,301, 303,264, 238,542 ] # P5/32 11 | - [ 436,615, 739,380, 925,792 ] # P6/64 12 | 13 | # yolov7 backbone 14 | backbone: 15 | # [from, number, module, args], 16 | [[-1, 1, ReOrg, []], # 0 17 | [-1, 1, Conv, [80, 3, 1]], # 1-P1/2 18 | 19 | [-1, 1, DownC, [160]], # 2-P2/4 20 | [-1, 1, Conv, [64, 1, 1]], 21 | [-2, 1, Conv, [64, 1, 1]], 22 | [-1, 1, Conv, [64, 3, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [-1, 1, Conv, [64, 3, 1]], 25 | [-1, 1, Conv, [64, 3, 1]], 26 | [-1, 1, Conv, [64, 3, 1]], 27 | [-1, 1, Conv, [64, 3, 1]], 28 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 29 | [-1, 1, Conv, [160, 1, 1]], # 12 30 | 31 | [-1, 1, DownC, [320]], # 13-P3/8 32 | [-1, 1, Conv, [128, 1, 1]], 33 | [-2, 1, Conv, [128, 1, 1]], 34 | [-1, 1, Conv, [128, 3, 1]], 35 | [-1, 1, Conv, [128, 3, 1]], 36 | [-1, 1, Conv, [128, 3, 1]], 37 | [-1, 1, Conv, [128, 3, 1]], 38 | [-1, 1, Conv, [128, 3, 1]], 39 | [-1, 1, Conv, [128, 3, 1]], 40 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 41 | [-1, 1, Conv, [320, 1, 1]], # 23 42 | 43 | [-1, 1, DownC, [640]], # 24-P4/16 44 | [-1, 1, Conv, [256, 1, 1]], 45 | [-2, 1, Conv, [256, 1, 1]], 46 | [-1, 1, Conv, [256, 3, 1]], 47 | [-1, 1, Conv, [256, 3, 1]], 48 | [-1, 1, Conv, [256, 3, 1]], 49 | [-1, 1, Conv, [256, 3, 1]], 50 | [-1, 1, Conv, [256, 3, 1]], 51 | [-1, 1, Conv, [256, 3, 1]], 52 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 53 | [-1, 1, Conv, [640, 1, 1]], # 34 54 | 55 | [-1, 1, DownC, [960]], # 35-P5/32 56 | [-1, 1, Conv, [384, 1, 1]], 57 | [-2, 1, Conv, [384, 1, 1]], 58 | [-1, 1, Conv, [384, 3, 1]], 59 | [-1, 1, Conv, [384, 3, 1]], 60 | [-1, 1, Conv, [384, 3, 1]], 61 | [-1, 1, Conv, [384, 3, 1]], 62 | [-1, 1, Conv, [384, 3, 1]], 63 | [-1, 1, Conv, [384, 3, 1]], 64 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 65 | [-1, 1, Conv, [960, 1, 1]], # 45 66 | 67 | [-1, 1, DownC, [1280]], # 46-P6/64 68 | [-1, 1, Conv, [512, 1, 1]], 69 | [-2, 1, Conv, [512, 1, 1]], 70 | [-1, 1, Conv, [512, 3, 1]], 71 | [-1, 1, Conv, [512, 3, 1]], 72 | [-1, 1, Conv, [512, 3, 1]], 73 | [-1, 1, Conv, [512, 3, 1]], 74 | [-1, 1, Conv, [512, 3, 1]], 75 | [-1, 1, Conv, [512, 3, 1]], 76 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 77 | [-1, 1, Conv, [1280, 1, 1]], # 56 78 | ] 79 | 80 | # yolov7 head 81 | head: 82 | [[-1, 1, SPPCSPC, [640]], # 57 83 | 84 | [-1, 1, Conv, [480, 1, 1]], 85 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 86 | [45, 1, Conv, [480, 1, 1]], # route backbone P5 87 | [[-1, -2], 1, Concat, [1]], 88 | 89 | [-1, 1, Conv, [384, 1, 1]], 90 | [-2, 1, Conv, [384, 1, 1]], 91 | [-1, 1, Conv, [192, 3, 1]], 92 | [-1, 1, Conv, [192, 3, 1]], 93 | [-1, 1, Conv, [192, 3, 1]], 94 | [-1, 1, Conv, [192, 3, 1]], 95 | [-1, 1, Conv, [192, 3, 1]], 96 | [-1, 1, Conv, [192, 3, 1]], 97 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 98 | [-1, 1, Conv, [480, 1, 1]], # 71 99 | 100 | [-1, 1, Conv, [320, 1, 1]], 101 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 102 | [34, 1, Conv, [320, 1, 1]], # route backbone P4 103 | [[-1, -2], 1, Concat, [1]], 104 | 105 | [-1, 1, Conv, [256, 1, 1]], 106 | [-2, 1, Conv, [256, 1, 1]], 107 | [-1, 1, Conv, [128, 3, 1]], 108 | [-1, 1, Conv, [128, 3, 1]], 109 | [-1, 1, Conv, [128, 3, 1]], 110 | [-1, 1, Conv, [128, 3, 1]], 111 | [-1, 1, Conv, [128, 3, 1]], 112 | [-1, 1, Conv, [128, 3, 1]], 113 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 114 | [-1, 1, Conv, [320, 1, 1]], # 85 115 | 116 | [-1, 1, Conv, [160, 1, 1]], 117 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 118 | [23, 1, Conv, [160, 1, 1]], # route backbone P3 119 | [[-1, -2], 1, Concat, [1]], 120 | 121 | [-1, 1, Conv, [128, 1, 1]], 122 | [-2, 1, Conv, [128, 1, 1]], 123 | [-1, 1, Conv, [64, 3, 1]], 124 | [-1, 1, Conv, [64, 3, 1]], 125 | [-1, 1, Conv, [64, 3, 1]], 126 | [-1, 1, Conv, [64, 3, 1]], 127 | [-1, 1, Conv, [64, 3, 1]], 128 | [-1, 1, Conv, [64, 3, 1]], 129 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 130 | [-1, 1, Conv, [160, 1, 1]], # 99 131 | 132 | [-1, 1, DownC, [320]], 133 | [[-1, 85], 1, Concat, [1]], 134 | 135 | [-1, 1, Conv, [256, 1, 1]], 136 | [-2, 1, Conv, [256, 1, 1]], 137 | [-1, 1, Conv, [128, 3, 1]], 138 | [-1, 1, Conv, [128, 3, 1]], 139 | [-1, 1, Conv, [128, 3, 1]], 140 | [-1, 1, Conv, [128, 3, 1]], 141 | [-1, 1, Conv, [128, 3, 1]], 142 | [-1, 1, Conv, [128, 3, 1]], 143 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 144 | [-1, 1, Conv, [320, 1, 1]], # 111 145 | 146 | [-1, 1, DownC, [480]], 147 | [[-1, 71], 1, Concat, [1]], 148 | 149 | [-1, 1, Conv, [384, 1, 1]], 150 | [-2, 1, Conv, [384, 1, 1]], 151 | [-1, 1, Conv, [192, 3, 1]], 152 | [-1, 1, Conv, [192, 3, 1]], 153 | [-1, 1, Conv, [192, 3, 1]], 154 | [-1, 1, Conv, [192, 3, 1]], 155 | [-1, 1, Conv, [192, 3, 1]], 156 | [-1, 1, Conv, [192, 3, 1]], 157 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 158 | [-1, 1, Conv, [480, 1, 1]], # 123 159 | 160 | [-1, 1, DownC, [640]], 161 | [[-1, 57], 1, Concat, [1]], 162 | 163 | [-1, 1, Conv, [512, 1, 1]], 164 | [-2, 1, Conv, [512, 1, 1]], 165 | [-1, 1, Conv, [256, 3, 1]], 166 | [-1, 1, Conv, [256, 3, 1]], 167 | [-1, 1, Conv, [256, 3, 1]], 168 | [-1, 1, Conv, [256, 3, 1]], 169 | [-1, 1, Conv, [256, 3, 1]], 170 | [-1, 1, Conv, [256, 3, 1]], 171 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 172 | [-1, 1, Conv, [640, 1, 1]], # 135 173 | 174 | [99, 1, Conv, [320, 3, 1]], 175 | [111, 1, Conv, [640, 3, 1]], 176 | [123, 1, Conv, [960, 3, 1]], 177 | [135, 1, Conv, [1280, 3, 1]], 178 | 179 | [99, 1, Conv, [320, 3, 1]], 180 | [85, 1, Conv, [640, 3, 1]], 181 | [71, 1, Conv, [960, 3, 1]], 182 | [57, 1, Conv, [1280, 3, 1]], 183 | 184 | [[136,137,138,139,140,141,142,143], 1, IAuxDetect, [nc, anchors]], # Detect(P3, P4, P5, P6) 185 | ] 186 | -------------------------------------------------------------------------------- /cfg/deploy/yolov7-d6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [ 19,27, 44,40, 38,94 ] # P3/8 9 | - [ 96,68, 86,152, 180,137 ] # P4/16 10 | - [ 140,301, 303,264, 238,542 ] # P5/32 11 | - [ 436,615, 739,380, 925,792 ] # P6/64 12 | 13 | # yolov7-d6 backbone 14 | backbone: 15 | # [from, number, module, args], 16 | [[-1, 1, ReOrg, []], # 0 17 | [-1, 1, Conv, [96, 3, 1]], # 1-P1/2 18 | 19 | [-1, 1, DownC, [192]], # 2-P2/4 20 | [-1, 1, Conv, [64, 1, 1]], 21 | [-2, 1, Conv, [64, 1, 1]], 22 | [-1, 1, Conv, [64, 3, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [-1, 1, Conv, [64, 3, 1]], 25 | [-1, 1, Conv, [64, 3, 1]], 26 | [-1, 1, Conv, [64, 3, 1]], 27 | [-1, 1, Conv, [64, 3, 1]], 28 | [-1, 1, Conv, [64, 3, 1]], 29 | [-1, 1, Conv, [64, 3, 1]], 30 | [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]], 31 | [-1, 1, Conv, [192, 1, 1]], # 14 32 | 33 | [-1, 1, DownC, [384]], # 15-P3/8 34 | [-1, 1, Conv, [128, 1, 1]], 35 | [-2, 1, Conv, [128, 1, 1]], 36 | [-1, 1, Conv, [128, 3, 1]], 37 | [-1, 1, Conv, [128, 3, 1]], 38 | [-1, 1, Conv, [128, 3, 1]], 39 | [-1, 1, Conv, [128, 3, 1]], 40 | [-1, 1, Conv, [128, 3, 1]], 41 | [-1, 1, Conv, [128, 3, 1]], 42 | [-1, 1, Conv, [128, 3, 1]], 43 | [-1, 1, Conv, [128, 3, 1]], 44 | [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]], 45 | [-1, 1, Conv, [384, 1, 1]], # 27 46 | 47 | [-1, 1, DownC, [768]], # 28-P4/16 48 | [-1, 1, Conv, [256, 1, 1]], 49 | [-2, 1, Conv, [256, 1, 1]], 50 | [-1, 1, Conv, [256, 3, 1]], 51 | [-1, 1, Conv, [256, 3, 1]], 52 | [-1, 1, Conv, [256, 3, 1]], 53 | [-1, 1, Conv, [256, 3, 1]], 54 | [-1, 1, Conv, [256, 3, 1]], 55 | [-1, 1, Conv, [256, 3, 1]], 56 | [-1, 1, Conv, [256, 3, 1]], 57 | [-1, 1, Conv, [256, 3, 1]], 58 | [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]], 59 | [-1, 1, Conv, [768, 1, 1]], # 40 60 | 61 | [-1, 1, DownC, [1152]], # 41-P5/32 62 | [-1, 1, Conv, [384, 1, 1]], 63 | [-2, 1, Conv, [384, 1, 1]], 64 | [-1, 1, Conv, [384, 3, 1]], 65 | [-1, 1, Conv, [384, 3, 1]], 66 | [-1, 1, Conv, [384, 3, 1]], 67 | [-1, 1, Conv, [384, 3, 1]], 68 | [-1, 1, Conv, [384, 3, 1]], 69 | [-1, 1, Conv, [384, 3, 1]], 70 | [-1, 1, Conv, [384, 3, 1]], 71 | [-1, 1, Conv, [384, 3, 1]], 72 | [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]], 73 | [-1, 1, Conv, [1152, 1, 1]], # 53 74 | 75 | [-1, 1, DownC, [1536]], # 54-P6/64 76 | [-1, 1, Conv, [512, 1, 1]], 77 | [-2, 1, Conv, [512, 1, 1]], 78 | [-1, 1, Conv, [512, 3, 1]], 79 | [-1, 1, Conv, [512, 3, 1]], 80 | [-1, 1, Conv, [512, 3, 1]], 81 | [-1, 1, Conv, [512, 3, 1]], 82 | [-1, 1, Conv, [512, 3, 1]], 83 | [-1, 1, Conv, [512, 3, 1]], 84 | [-1, 1, Conv, [512, 3, 1]], 85 | [-1, 1, Conv, [512, 3, 1]], 86 | [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]], 87 | [-1, 1, Conv, [1536, 1, 1]], # 66 88 | ] 89 | 90 | # yolov7-d6 head 91 | head: 92 | [[-1, 1, SPPCSPC, [768]], # 67 93 | 94 | [-1, 1, Conv, [576, 1, 1]], 95 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 96 | [53, 1, Conv, [576, 1, 1]], # route backbone P5 97 | [[-1, -2], 1, Concat, [1]], 98 | 99 | [-1, 1, Conv, [384, 1, 1]], 100 | [-2, 1, Conv, [384, 1, 1]], 101 | [-1, 1, Conv, [192, 3, 1]], 102 | [-1, 1, Conv, [192, 3, 1]], 103 | [-1, 1, Conv, [192, 3, 1]], 104 | [-1, 1, Conv, [192, 3, 1]], 105 | [-1, 1, Conv, [192, 3, 1]], 106 | [-1, 1, Conv, [192, 3, 1]], 107 | [-1, 1, Conv, [192, 3, 1]], 108 | [-1, 1, Conv, [192, 3, 1]], 109 | [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]], 110 | [-1, 1, Conv, [576, 1, 1]], # 83 111 | 112 | [-1, 1, Conv, [384, 1, 1]], 113 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 114 | [40, 1, Conv, [384, 1, 1]], # route backbone P4 115 | [[-1, -2], 1, Concat, [1]], 116 | 117 | [-1, 1, Conv, [256, 1, 1]], 118 | [-2, 1, Conv, [256, 1, 1]], 119 | [-1, 1, Conv, [128, 3, 1]], 120 | [-1, 1, Conv, [128, 3, 1]], 121 | [-1, 1, Conv, [128, 3, 1]], 122 | [-1, 1, Conv, [128, 3, 1]], 123 | [-1, 1, Conv, [128, 3, 1]], 124 | [-1, 1, Conv, [128, 3, 1]], 125 | [-1, 1, Conv, [128, 3, 1]], 126 | [-1, 1, Conv, [128, 3, 1]], 127 | [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]], 128 | [-1, 1, Conv, [384, 1, 1]], # 99 129 | 130 | [-1, 1, Conv, [192, 1, 1]], 131 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 132 | [27, 1, Conv, [192, 1, 1]], # route backbone P3 133 | [[-1, -2], 1, Concat, [1]], 134 | 135 | [-1, 1, Conv, [128, 1, 1]], 136 | [-2, 1, Conv, [128, 1, 1]], 137 | [-1, 1, Conv, [64, 3, 1]], 138 | [-1, 1, Conv, [64, 3, 1]], 139 | [-1, 1, Conv, [64, 3, 1]], 140 | [-1, 1, Conv, [64, 3, 1]], 141 | [-1, 1, Conv, [64, 3, 1]], 142 | [-1, 1, Conv, [64, 3, 1]], 143 | [-1, 1, Conv, [64, 3, 1]], 144 | [-1, 1, Conv, [64, 3, 1]], 145 | [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]], 146 | [-1, 1, Conv, [192, 1, 1]], # 115 147 | 148 | [-1, 1, DownC, [384]], 149 | [[-1, 99], 1, Concat, [1]], 150 | 151 | [-1, 1, Conv, [256, 1, 1]], 152 | [-2, 1, Conv, [256, 1, 1]], 153 | [-1, 1, Conv, [128, 3, 1]], 154 | [-1, 1, Conv, [128, 3, 1]], 155 | [-1, 1, Conv, [128, 3, 1]], 156 | [-1, 1, Conv, [128, 3, 1]], 157 | [-1, 1, Conv, [128, 3, 1]], 158 | [-1, 1, Conv, [128, 3, 1]], 159 | [-1, 1, Conv, [128, 3, 1]], 160 | [-1, 1, Conv, [128, 3, 1]], 161 | [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]], 162 | [-1, 1, Conv, [384, 1, 1]], # 129 163 | 164 | [-1, 1, DownC, [576]], 165 | [[-1, 83], 1, Concat, [1]], 166 | 167 | [-1, 1, Conv, [384, 1, 1]], 168 | [-2, 1, Conv, [384, 1, 1]], 169 | [-1, 1, Conv, [192, 3, 1]], 170 | [-1, 1, Conv, [192, 3, 1]], 171 | [-1, 1, Conv, [192, 3, 1]], 172 | [-1, 1, Conv, [192, 3, 1]], 173 | [-1, 1, Conv, [192, 3, 1]], 174 | [-1, 1, Conv, [192, 3, 1]], 175 | [-1, 1, Conv, [192, 3, 1]], 176 | [-1, 1, Conv, [192, 3, 1]], 177 | [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]], 178 | [-1, 1, Conv, [576, 1, 1]], # 143 179 | 180 | [-1, 1, DownC, [768]], 181 | [[-1, 67], 1, Concat, [1]], 182 | 183 | [-1, 1, Conv, [512, 1, 1]], 184 | [-2, 1, Conv, [512, 1, 1]], 185 | [-1, 1, Conv, [256, 3, 1]], 186 | [-1, 1, Conv, [256, 3, 1]], 187 | [-1, 1, Conv, [256, 3, 1]], 188 | [-1, 1, Conv, [256, 3, 1]], 189 | [-1, 1, Conv, [256, 3, 1]], 190 | [-1, 1, Conv, [256, 3, 1]], 191 | [-1, 1, Conv, [256, 3, 1]], 192 | [-1, 1, Conv, [256, 3, 1]], 193 | [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]], 194 | [-1, 1, Conv, [768, 1, 1]], # 157 195 | 196 | [115, 1, Conv, [384, 3, 1]], 197 | [129, 1, Conv, [768, 3, 1]], 198 | [143, 1, Conv, [1152, 3, 1]], 199 | [157, 1, Conv, [1536, 3, 1]], 200 | 201 | [[158,159,160,161], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 202 | ] 203 | -------------------------------------------------------------------------------- /cfg/training/yolov7-d6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [ 19,27, 44,40, 38,94 ] # P3/8 9 | - [ 96,68, 86,152, 180,137 ] # P4/16 10 | - [ 140,301, 303,264, 238,542 ] # P5/32 11 | - [ 436,615, 739,380, 925,792 ] # P6/64 12 | 13 | # yolov7 backbone 14 | backbone: 15 | # [from, number, module, args], 16 | [[-1, 1, ReOrg, []], # 0 17 | [-1, 1, Conv, [96, 3, 1]], # 1-P1/2 18 | 19 | [-1, 1, DownC, [192]], # 2-P2/4 20 | [-1, 1, Conv, [64, 1, 1]], 21 | [-2, 1, Conv, [64, 1, 1]], 22 | [-1, 1, Conv, [64, 3, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [-1, 1, Conv, [64, 3, 1]], 25 | [-1, 1, Conv, [64, 3, 1]], 26 | [-1, 1, Conv, [64, 3, 1]], 27 | [-1, 1, Conv, [64, 3, 1]], 28 | [-1, 1, Conv, [64, 3, 1]], 29 | [-1, 1, Conv, [64, 3, 1]], 30 | [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]], 31 | [-1, 1, Conv, [192, 1, 1]], # 14 32 | 33 | [-1, 1, DownC, [384]], # 15-P3/8 34 | [-1, 1, Conv, [128, 1, 1]], 35 | [-2, 1, Conv, [128, 1, 1]], 36 | [-1, 1, Conv, [128, 3, 1]], 37 | [-1, 1, Conv, [128, 3, 1]], 38 | [-1, 1, Conv, [128, 3, 1]], 39 | [-1, 1, Conv, [128, 3, 1]], 40 | [-1, 1, Conv, [128, 3, 1]], 41 | [-1, 1, Conv, [128, 3, 1]], 42 | [-1, 1, Conv, [128, 3, 1]], 43 | [-1, 1, Conv, [128, 3, 1]], 44 | [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]], 45 | [-1, 1, Conv, [384, 1, 1]], # 27 46 | 47 | [-1, 1, DownC, [768]], # 28-P4/16 48 | [-1, 1, Conv, [256, 1, 1]], 49 | [-2, 1, Conv, [256, 1, 1]], 50 | [-1, 1, Conv, [256, 3, 1]], 51 | [-1, 1, Conv, [256, 3, 1]], 52 | [-1, 1, Conv, [256, 3, 1]], 53 | [-1, 1, Conv, [256, 3, 1]], 54 | [-1, 1, Conv, [256, 3, 1]], 55 | [-1, 1, Conv, [256, 3, 1]], 56 | [-1, 1, Conv, [256, 3, 1]], 57 | [-1, 1, Conv, [256, 3, 1]], 58 | [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]], 59 | [-1, 1, Conv, [768, 1, 1]], # 40 60 | 61 | [-1, 1, DownC, [1152]], # 41-P5/32 62 | [-1, 1, Conv, [384, 1, 1]], 63 | [-2, 1, Conv, [384, 1, 1]], 64 | [-1, 1, Conv, [384, 3, 1]], 65 | [-1, 1, Conv, [384, 3, 1]], 66 | [-1, 1, Conv, [384, 3, 1]], 67 | [-1, 1, Conv, [384, 3, 1]], 68 | [-1, 1, Conv, [384, 3, 1]], 69 | [-1, 1, Conv, [384, 3, 1]], 70 | [-1, 1, Conv, [384, 3, 1]], 71 | [-1, 1, Conv, [384, 3, 1]], 72 | [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]], 73 | [-1, 1, Conv, [1152, 1, 1]], # 53 74 | 75 | [-1, 1, DownC, [1536]], # 54-P6/64 76 | [-1, 1, Conv, [512, 1, 1]], 77 | [-2, 1, Conv, [512, 1, 1]], 78 | [-1, 1, Conv, [512, 3, 1]], 79 | [-1, 1, Conv, [512, 3, 1]], 80 | [-1, 1, Conv, [512, 3, 1]], 81 | [-1, 1, Conv, [512, 3, 1]], 82 | [-1, 1, Conv, [512, 3, 1]], 83 | [-1, 1, Conv, [512, 3, 1]], 84 | [-1, 1, Conv, [512, 3, 1]], 85 | [-1, 1, Conv, [512, 3, 1]], 86 | [[-1, -3, -5, -7, -9, -10], 1, Concat, [1]], 87 | [-1, 1, Conv, [1536, 1, 1]], # 66 88 | ] 89 | 90 | # yolov7 head 91 | head: 92 | [[-1, 1, SPPCSPC, [768]], # 67 93 | 94 | [-1, 1, Conv, [576, 1, 1]], 95 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 96 | [53, 1, Conv, [576, 1, 1]], # route backbone P5 97 | [[-1, -2], 1, Concat, [1]], 98 | 99 | [-1, 1, Conv, [384, 1, 1]], 100 | [-2, 1, Conv, [384, 1, 1]], 101 | [-1, 1, Conv, [192, 3, 1]], 102 | [-1, 1, Conv, [192, 3, 1]], 103 | [-1, 1, Conv, [192, 3, 1]], 104 | [-1, 1, Conv, [192, 3, 1]], 105 | [-1, 1, Conv, [192, 3, 1]], 106 | [-1, 1, Conv, [192, 3, 1]], 107 | [-1, 1, Conv, [192, 3, 1]], 108 | [-1, 1, Conv, [192, 3, 1]], 109 | [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]], 110 | [-1, 1, Conv, [576, 1, 1]], # 83 111 | 112 | [-1, 1, Conv, [384, 1, 1]], 113 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 114 | [40, 1, Conv, [384, 1, 1]], # route backbone P4 115 | [[-1, -2], 1, Concat, [1]], 116 | 117 | [-1, 1, Conv, [256, 1, 1]], 118 | [-2, 1, Conv, [256, 1, 1]], 119 | [-1, 1, Conv, [128, 3, 1]], 120 | [-1, 1, Conv, [128, 3, 1]], 121 | [-1, 1, Conv, [128, 3, 1]], 122 | [-1, 1, Conv, [128, 3, 1]], 123 | [-1, 1, Conv, [128, 3, 1]], 124 | [-1, 1, Conv, [128, 3, 1]], 125 | [-1, 1, Conv, [128, 3, 1]], 126 | [-1, 1, Conv, [128, 3, 1]], 127 | [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]], 128 | [-1, 1, Conv, [384, 1, 1]], # 99 129 | 130 | [-1, 1, Conv, [192, 1, 1]], 131 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 132 | [27, 1, Conv, [192, 1, 1]], # route backbone P3 133 | [[-1, -2], 1, Concat, [1]], 134 | 135 | [-1, 1, Conv, [128, 1, 1]], 136 | [-2, 1, Conv, [128, 1, 1]], 137 | [-1, 1, Conv, [64, 3, 1]], 138 | [-1, 1, Conv, [64, 3, 1]], 139 | [-1, 1, Conv, [64, 3, 1]], 140 | [-1, 1, Conv, [64, 3, 1]], 141 | [-1, 1, Conv, [64, 3, 1]], 142 | [-1, 1, Conv, [64, 3, 1]], 143 | [-1, 1, Conv, [64, 3, 1]], 144 | [-1, 1, Conv, [64, 3, 1]], 145 | [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]], 146 | [-1, 1, Conv, [192, 1, 1]], # 115 147 | 148 | [-1, 1, DownC, [384]], 149 | [[-1, 99], 1, Concat, [1]], 150 | 151 | [-1, 1, Conv, [256, 1, 1]], 152 | [-2, 1, Conv, [256, 1, 1]], 153 | [-1, 1, Conv, [128, 3, 1]], 154 | [-1, 1, Conv, [128, 3, 1]], 155 | [-1, 1, Conv, [128, 3, 1]], 156 | [-1, 1, Conv, [128, 3, 1]], 157 | [-1, 1, Conv, [128, 3, 1]], 158 | [-1, 1, Conv, [128, 3, 1]], 159 | [-1, 1, Conv, [128, 3, 1]], 160 | [-1, 1, Conv, [128, 3, 1]], 161 | [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]], 162 | [-1, 1, Conv, [384, 1, 1]], # 129 163 | 164 | [-1, 1, DownC, [576]], 165 | [[-1, 83], 1, Concat, [1]], 166 | 167 | [-1, 1, Conv, [384, 1, 1]], 168 | [-2, 1, Conv, [384, 1, 1]], 169 | [-1, 1, Conv, [192, 3, 1]], 170 | [-1, 1, Conv, [192, 3, 1]], 171 | [-1, 1, Conv, [192, 3, 1]], 172 | [-1, 1, Conv, [192, 3, 1]], 173 | [-1, 1, Conv, [192, 3, 1]], 174 | [-1, 1, Conv, [192, 3, 1]], 175 | [-1, 1, Conv, [192, 3, 1]], 176 | [-1, 1, Conv, [192, 3, 1]], 177 | [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]], 178 | [-1, 1, Conv, [576, 1, 1]], # 143 179 | 180 | [-1, 1, DownC, [768]], 181 | [[-1, 67], 1, Concat, [1]], 182 | 183 | [-1, 1, Conv, [512, 1, 1]], 184 | [-2, 1, Conv, [512, 1, 1]], 185 | [-1, 1, Conv, [256, 3, 1]], 186 | [-1, 1, Conv, [256, 3, 1]], 187 | [-1, 1, Conv, [256, 3, 1]], 188 | [-1, 1, Conv, [256, 3, 1]], 189 | [-1, 1, Conv, [256, 3, 1]], 190 | [-1, 1, Conv, [256, 3, 1]], 191 | [-1, 1, Conv, [256, 3, 1]], 192 | [-1, 1, Conv, [256, 3, 1]], 193 | [[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10], 1, Concat, [1]], 194 | [-1, 1, Conv, [768, 1, 1]], # 157 195 | 196 | [115, 1, Conv, [384, 3, 1]], 197 | [129, 1, Conv, [768, 3, 1]], 198 | [143, 1, Conv, [1152, 3, 1]], 199 | [157, 1, Conv, [1536, 3, 1]], 200 | 201 | [115, 1, Conv, [384, 3, 1]], 202 | [99, 1, Conv, [768, 3, 1]], 203 | [83, 1, Conv, [1152, 3, 1]], 204 | [67, 1, Conv, [1536, 3, 1]], 205 | 206 | [[158,159,160,161,162,163,164,165], 1, IAuxDetect, [nc, anchors]], # Detect(P3, P4, P5, P6) 207 | ] 208 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Yolov7 Segmentation model with TensorRT 2 | **This repository implement the real-time Instance Segmentation Algorithm named Yolov7 with TensoRT.** 3 | 4 | ## Some remarks 5 | - The initial repository on which I build mine is from __**WongKinYiu/yolov7**__ repository (https://github.com/WongKinYiu/yolov7)), for additional information about the installation of Yolov7, refer to the original repository. 6 | - This project is built upon the excellent framework **detectron2**, and you should install detectron2 first, please check official installation guide for more details. (https://github.com/facebookresearch/detectron2.git) 7 | - For command other than TensoRT and ONNX inference, please refer to the initial repository (e.g detect.py). 8 | - If you face any problem during the parsing time, don't hesitate to drop an issue. If there aren't any, don't hesitate to drop a :star: 9 | - Be aware that in order to parse the model to ONNX and TensorRT, some originals files has been modified/slightly modified, don't forget to check the modifications if you come from the initial repository. 10 | 11 | 12 | ## Installation and dependencies 13 |
14 | Click me 15 | 16 | - pip3 install requirements.txt 17 | - Install Pytorch (1.10.0) and TorchVision (0.11.1) 18 | ```js 19 | pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 20 | 21 | If other versions of torch are needed, select yours by putting torch==1.11.0+cu102 for example. 22 | ``` 23 | 24 | - Install CUDA (10.2) and cuDNN (8.0.0) : https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&Distribution=WSL-Ubuntu&target_version=2.0&target_type=deb_local 25 | 26 | - For WSL-Ubuntu : 27 | ```js 28 | sudo wget https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-wsl-ubuntu.pin 29 | sudo mv cuda-wsl-ubuntu.pin /etc/apt/preferences.d/cuda-repository-pin-600 30 | sudo wget https://developer.download.nvidia.com/compute/cuda/11.7.1/local_insta 31 | llers/cuda-repo-wsl-ubuntu-11-7-local_11.7.1-1_amd64.deb 32 | sudo dpkg -i cuda-repo-wsl-ubuntu-11-7-local_11.7.1-1_amd64.deb 33 | sudo cp /var/cuda-repo-wsl-ubuntu-11-7-local/cuda-96193861-keyring.gpg /usr/share/keyrings/ 34 | sudo apt-get update 35 | sudo apt-get -y install cuda 36 | ``` 37 | 38 | - Install TensorRT (8.0.1.6), if you are using an nvidia edge device, TensorRT should already be installed 39 | ```js 40 | python3 -m pip install --upgrade setuptools pip 41 | python3 -m pip install nvidia-pyindex 42 | python3 -m pip install --upgrade nvidia-tensorrt 43 | 44 | Verify installation by writing : assert tensorrt.Builder(tensorrt.Logger()) 45 | ``` 46 | - Install ONNX and ONNXruntime 47 | ```js 48 | pip install onnxruntime-gpu 49 | pip install onnxruntime 50 | pip install numpy protobuf==4.21.5 51 | pip install onnx 52 | ``` 53 | - Install all the other packages needed to run the original SparseInst algorithm (Should be done if you have installed Dectectron2) 54 | 55 |
56 | 57 | 58 | ## Models and Results for TensorRT and ONNX inference script: 59 | 60 | The inference speed for TensorRT are shown in the table below. Yolov7 running with TensoRT achieved more a less 3 times faster inference speed than Yolov7 running with Pytorch. Lowering the input size of the image can lead to a decent real-time speed. 61 | The models from TensorRT and ONNX are built upon the Pytorch weights : Yolov7-mask.pt : 62 | 63 | ``` 64 | wget -c https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-mask.pt 65 | ``` 66 | 67 | *Note: All the computations has been done on a Nvidia Jetson TX2 Jetpack 4.6.* 68 | 69 |
70 | 71 | | Model | Input Size | Inference Speed 72 | | :---: | :---: | :---: | 73 | | Yolov7 Segmentation TensorRT | 320 | 14.00 FPS | 74 | | Yolov7 Segmentation TensorRT | 640 | 5.00 FPS | 75 | 76 |
77 | 78 | Instance Segmentation with | Yolov7 TensorRT 79 | :-------------------------:|:-------------------------: 80 | ![](results/640_trt_cv2img_VP_0.jpg) | ![](results/640_trt_cv2img_VP_1.jpg) 81 | ![](results/640_trt_cv2img_VP_2.jpg) | ![](results/640_trt_cv2img_VP_3.jpg) 82 | ![](results/640_trt_cv2img_VP_4.jpg) | ![](results/640_trt_cv2img_VP_5.jpg) 83 | 84 | 85 | ## Building the ONNX model : 86 | 87 | To build the model from Pytorch to ONNX, you need to run the following command. You can set the arguments to default. Please check if the config path and the model weights path are correctly set up. 88 | ``` 89 | mkdir onnx && python3 export_mask.py --weights ./yolov7-mask.pt --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --imgsz $imgsz --input $input_image_path 90 | ``` 91 | The onnx file will be saved in the created onnx directory. The input that you give will be infered right after the export operation to verify if the export was successful. if you don't want to infer the image, just add --no_infer. 92 | 93 | ## Building the TensorRT model : 94 | 95 | To build the model from ONNX to TensorRT, you need to run the following command. You can set the arguments to default. If you have any problem while parsing the model to TensorRT, don't hesitate to ask. The exportation is based on the repository TensorRT-For-YOLO-Series (https://github.com/Linaom1214/TensorRT-For-YOLO-Series). 96 | ``` 97 | git clone https://github.com/Linaom1214/TensorRT-For-YOLO-Series.git 98 | mkdir engine && python3 ./TensorRT-For-YOLO-Series/export.py -o $onnx_file_path -e $./engine/engine_name.engine -p fp16 99 | ``` 100 | 101 | ## Testing Yolov7 with Pytorch, TensorRT and ONNX : 102 | **TensorRT** 103 | 104 | To test the inference speed (FPS) of the TensorRT model, run the following command. 105 | 106 | ``` 107 | python3 segment_image.py --input $input_image_path --model $tensorrt_engine_path --onnx_model $onnx_model_path --imgsz $image_size 108 | ``` 109 | 110 | You can still find the ONNX inference in the export_mask.py if you need it. 111 | 112 | 113 | **Notes :** 114 | - **Input argument** can either be an image or a directory of images (directory/*) 115 | 116 | ## Visualizing Yolov7 with TensorRT : 117 | **TensorRT** 118 | 1. To visualize segmentation results on your **images or directory of images**, you can run the following commands : 119 | 120 | 121 | ``` 122 | python3 segment_image.py --input $input_image_path --model $tensorrt_engine_path --onnx_model $onnx_model_path --imgsz $image_size --save_image --save_path $result_directory_path 123 | 124 | python3 segment_image.py --input $image_directory_path/* --model $tensorrt_engine_path --onnx_model $onnx_model_path --imgsz $image_size --save_image --save_path $result_directory_path 125 | ``` 126 | 2. To visualize segmentation results on your **video**, you can run the following commands : 127 | ``` 128 | python3 segment_video.py --input $input_video_path --model $tensorrt_engine_path --onnx_model $onnx_model_path --imgsz $image_size --save_video --save_path $result_directory_path/video_name.mp4 129 | ``` 130 | 131 | **Notes :** 132 | - If you don't specify --save_image or --save_video, it will only infer the model and not save the outputs. 133 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /utils/autoanchor.py: -------------------------------------------------------------------------------- 1 | # Auto-anchor utils 2 | 3 | import numpy as np 4 | import torch 5 | import yaml 6 | from scipy.cluster.vq import kmeans 7 | from tqdm import tqdm 8 | 9 | from utils.general import colorstr 10 | 11 | 12 | def check_anchor_order(m): 13 | # Check anchor order against stride order for YOLO Detect() module m, and correct if necessary 14 | a = m.anchor_grid.prod(-1).view(-1) # anchor area 15 | da = a[-1] - a[0] # delta a 16 | ds = m.stride[-1] - m.stride[0] # delta s 17 | if da.sign() != ds.sign(): # same order 18 | print('Reversing anchor order') 19 | m.anchors[:] = m.anchors.flip(0) 20 | m.anchor_grid[:] = m.anchor_grid.flip(0) 21 | 22 | 23 | def check_anchors(dataset, model, thr=4.0, imgsz=640): 24 | # Check anchor fit to data, recompute if necessary 25 | prefix = colorstr('autoanchor: ') 26 | print(f'\n{prefix}Analyzing anchors... ', end='') 27 | m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() 28 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) 29 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale 30 | wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh 31 | 32 | def metric(k): # compute metric 33 | r = wh[:, None] / k[None] 34 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 35 | best = x.max(1)[0] # best_x 36 | aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold 37 | bpr = (best > 1. / thr).float().mean() # best possible recall 38 | return bpr, aat 39 | 40 | anchors = m.anchor_grid.clone().cpu().view(-1, 2) # current anchors 41 | bpr, aat = metric(anchors) 42 | print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='') 43 | if bpr < 0.98: # threshold to recompute 44 | print('. Attempting to improve anchors, please wait...') 45 | na = m.anchor_grid.numel() // 2 # number of anchors 46 | try: 47 | anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) 48 | except Exception as e: 49 | print(f'{prefix}ERROR: {e}') 50 | new_bpr = metric(anchors)[0] 51 | if new_bpr > bpr: # replace anchors 52 | anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors) 53 | m.anchor_grid[:] = anchors.clone().view_as(m.anchor_grid) # for inference 54 | check_anchor_order(m) 55 | m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss 56 | print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.') 57 | else: 58 | print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.') 59 | print('') # newline 60 | 61 | 62 | def kmean_anchors(path='./data/coco.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): 63 | """ Creates kmeans-evolved anchors from training dataset 64 | 65 | Arguments: 66 | path: path to dataset *.yaml, or a loaded dataset 67 | n: number of anchors 68 | img_size: image size used for training 69 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 70 | gen: generations to evolve anchors using genetic algorithm 71 | verbose: print all results 72 | 73 | Return: 74 | k: kmeans evolved anchors 75 | 76 | Usage: 77 | from utils.autoanchor import *; _ = kmean_anchors() 78 | """ 79 | thr = 1. / thr 80 | prefix = colorstr('autoanchor: ') 81 | 82 | def metric(k, wh): # compute metrics 83 | r = wh[:, None] / k[None] 84 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 85 | # x = wh_iou(wh, torch.tensor(k)) # iou metric 86 | return x, x.max(1)[0] # x, best_x 87 | 88 | def anchor_fitness(k): # mutation fitness 89 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh) 90 | return (best * (best > thr).float()).mean() # fitness 91 | 92 | def print_results(k): 93 | k = k[np.argsort(k.prod(1))] # sort small to large 94 | x, best = metric(k, wh0) 95 | bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr 96 | print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr') 97 | print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' 98 | f'past_thr={x[x > thr].mean():.3f}-mean: ', end='') 99 | for i, x in enumerate(k): 100 | print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg 101 | return k 102 | 103 | if isinstance(path, str): # *.yaml file 104 | with open(path) as f: 105 | data_dict = yaml.load(f, Loader=yaml.SafeLoader) # model dict 106 | from utils.datasets import LoadImagesAndLabels 107 | dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) 108 | else: 109 | dataset = path # dataset 110 | 111 | # Get label wh 112 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) 113 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh 114 | 115 | # Filter 116 | i = (wh0 < 3.0).any(1).sum() 117 | if i: 118 | print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.') 119 | wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels 120 | # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 121 | 122 | # Kmeans calculation 123 | print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...') 124 | s = wh.std(0) # sigmas for whitening 125 | k, dist = kmeans(wh / s, n, iter=30) # points, mean distance 126 | assert len(k) == n, print(f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}') 127 | k *= s 128 | wh = torch.tensor(wh, dtype=torch.float32) # filtered 129 | wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered 130 | k = print_results(k) 131 | 132 | # Plot 133 | # k, d = [None] * 20, [None] * 20 134 | # for i in tqdm(range(1, 21)): 135 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance 136 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True) 137 | # ax = ax.ravel() 138 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') 139 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh 140 | # ax[0].hist(wh[wh[:, 0]<100, 0],400) 141 | # ax[1].hist(wh[wh[:, 1]<100, 1],400) 142 | # fig.savefig('wh.png', dpi=200) 143 | 144 | # Evolve 145 | npr = np.random 146 | f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma 147 | pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:') # progress bar 148 | for _ in pbar: 149 | v = np.ones(sh) 150 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates) 151 | v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) 152 | kg = (k.copy() * v).clip(min=2.0) 153 | fg = anchor_fitness(kg) 154 | if fg > f: 155 | f, k = fg, kg.copy() 156 | pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}' 157 | if verbose: 158 | print_results(k) 159 | 160 | return print_results(k) 161 | -------------------------------------------------------------------------------- /deploy/triton-inference-server/README.md: -------------------------------------------------------------------------------- 1 | # YOLOv7 on Triton Inference Server 2 | 3 | Instructions to deploy YOLOv7 as TensorRT engine to [Triton Inference Server](https://github.com/NVIDIA/triton-inference-server). 4 | 5 | Triton Inference Server takes care of model deployment with many out-of-the-box benefits, like a GRPC and HTTP interface, automatic scheduling on multiple GPUs, shared memory (even on GPU), dynamic server-side batching, health metrics and memory resource management. 6 | 7 | There are no additional dependencies needed to run this deployment, except a working docker daemon with GPU support. 8 | 9 | ## Export TensorRT 10 | 11 | See https://github.com/WongKinYiu/yolov7#export for more info. 12 | 13 | ```bash 14 | # Pytorch Yolov7 -> ONNX with grid, EfficientNMS plugin and dynamic batch size 15 | python export.py --weights ./yolov7.pt --grid --end2end --dynamic-batch --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 16 | # ONNX -> TensorRT with trtexec and docker 17 | docker run -it --rm --gpus=all nvcr.io/nvidia/tensorrt:22.06-py3 18 | # Copy onnx -> container: docker cp yolov7.onnx :/workspace/ 19 | # Export with FP16 precision, min batch 1, opt batch 8 and max batch 8 20 | ./tensorrt/bin/trtexec --onnx=yolov7.onnx --minShapes=images:1x3x640x640 --optShapes=images:8x3x640x640 --maxShapes=images:8x3x640x640 --fp16 --workspace=4096 --saveEngine=yolov7-fp16-1x8x8.engine --timingCacheFile=timing.cache 21 | # Test engine 22 | ./tensorrt/bin/trtexec --loadEngine=yolov7-fp16-1x8x8.engine 23 | # Copy engine -> host: docker cp :/workspace/yolov7-fp16-1x8x8.engine . 24 | ``` 25 | 26 | Example output of test with RTX 3090. 27 | 28 | ``` 29 | [I] === Performance summary === 30 | [I] Throughput: 73.4985 qps 31 | [I] Latency: min = 14.8578 ms, max = 15.8344 ms, mean = 15.07 ms, median = 15.0422 ms, percentile(99%) = 15.7443 ms 32 | [I] End-to-End Host Latency: min = 25.8715 ms, max = 28.4102 ms, mean = 26.672 ms, median = 26.6082 ms, percentile(99%) = 27.8314 ms 33 | [I] Enqueue Time: min = 0.793701 ms, max = 1.47144 ms, mean = 1.2008 ms, median = 1.28644 ms, percentile(99%) = 1.38965 ms 34 | [I] H2D Latency: min = 1.50073 ms, max = 1.52454 ms, mean = 1.51225 ms, median = 1.51404 ms, percentile(99%) = 1.51941 ms 35 | [I] GPU Compute Time: min = 13.3386 ms, max = 14.3186 ms, mean = 13.5448 ms, median = 13.5178 ms, percentile(99%) = 14.2151 ms 36 | [I] D2H Latency: min = 0.00878906 ms, max = 0.0172729 ms, mean = 0.0128844 ms, median = 0.0125732 ms, percentile(99%) = 0.0166016 ms 37 | [I] Total Host Walltime: 3.04768 s 38 | [I] Total GPU Compute Time: 3.03404 s 39 | [I] Explanations of the performance metrics are printed in the verbose logs. 40 | ``` 41 | Note: 73.5 qps x batch 8 = 588 fps @ ~15ms latency. 42 | 43 | ## Model Repository 44 | 45 | See [Triton Model Repository Documentation](https://github.com/triton-inference-server/server/blob/main/docs/model_repository.md#model-repository) for more info. 46 | 47 | ```bash 48 | # Create folder structure 49 | mkdir -p triton-deploy/models/yolov7/1/ 50 | touch triton-deploy/models/yolov7/config.pbtxt 51 | # Place model 52 | mv yolov7-fp16-1x8x8.engine triton-deploy/models/yolov7/1/model.plan 53 | ``` 54 | 55 | ## Model Configuration 56 | 57 | See [Triton Model Configuration Documentation](https://github.com/triton-inference-server/server/blob/main/docs/model_configuration.md#model-configuration) for more info. 58 | 59 | Minimal configuration for `triton-deploy/models/yolov7/config.pbtxt`: 60 | 61 | ``` 62 | name: "yolov7" 63 | platform: "tensorrt_plan" 64 | max_batch_size: 8 65 | dynamic_batching { } 66 | ``` 67 | 68 | Example repository: 69 | 70 | ```bash 71 | $ tree triton-deploy/ 72 | triton-deploy/ 73 | └── models 74 | └── yolov7 75 | ├── 1 76 | │   └── model.plan 77 | └── config.pbtxt 78 | 79 | 3 directories, 2 files 80 | ``` 81 | 82 | ## Start Triton Inference Server 83 | 84 | ``` 85 | docker run --gpus all --rm --ipc=host --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -p8000:8000 -p8001:8001 -p8002:8002 -v$(pwd)/triton-deploy/models:/models nvcr.io/nvidia/tritonserver:22.06-py3 tritonserver --model-repository=/models --strict-model-config=false --log-verbose 1 86 | ``` 87 | 88 | In the log you should see: 89 | 90 | ``` 91 | +--------+---------+--------+ 92 | | Model | Version | Status | 93 | +--------+---------+--------+ 94 | | yolov7 | 1 | READY | 95 | +--------+---------+--------+ 96 | ``` 97 | 98 | ## Performance with Model Analyzer 99 | 100 | See [Triton Model Analyzer Documentation](https://github.com/triton-inference-server/server/blob/main/docs/model_analyzer.md#model-analyzer) for more info. 101 | 102 | Performance numbers @ RTX 3090 + AMD Ryzen 9 5950X 103 | 104 | Example test for 16 concurrent clients using shared memory, each with batch size 1 requests: 105 | 106 | ```bash 107 | docker run -it --ipc=host --net=host nvcr.io/nvidia/tritonserver:22.06-py3-sdk /bin/bash 108 | 109 | ./install/bin/perf_analyzer -m yolov7 -u 127.0.0.1:8001 -i grpc --shared-memory system --concurrency-range 16 110 | 111 | # Result (truncated) 112 | Concurrency: 16, throughput: 590.119 infer/sec, latency 27080 usec 113 | ``` 114 | 115 | Throughput for 16 clients with batch size 1 is the same as for a single thread running the engine at 16 batch size locally thanks to Triton [Dynamic Batching Strategy](https://github.com/triton-inference-server/server/blob/main/docs/model_configuration.md#dynamic-batcher). Result without dynamic batching (disable in model configuration) considerably worse: 116 | 117 | ```bash 118 | # Result (truncated) 119 | Concurrency: 16, throughput: 335.587 infer/sec, latency 47616 usec 120 | ``` 121 | 122 | ## How to run model in your code 123 | 124 | Example client can be found in client.py. It can run dummy input, images and videos. 125 | 126 | ```bash 127 | pip3 install tritonclient[all] opencv-python 128 | python3 client.py image data/dog.jpg 129 | ``` 130 | 131 | ![exemplary output result](data/dog_result.jpg) 132 | 133 | ``` 134 | $ python3 client.py --help 135 | usage: client.py [-h] [-m MODEL] [--width WIDTH] [--height HEIGHT] [-u URL] [-o OUT] [-f FPS] [-i] [-v] [-t CLIENT_TIMEOUT] [-s] [-r ROOT_CERTIFICATES] [-p PRIVATE_KEY] [-x CERTIFICATE_CHAIN] {dummy,image,video} [input] 136 | 137 | positional arguments: 138 | {dummy,image,video} Run mode. 'dummy' will send an emtpy buffer to the server to test if inference works. 'image' will process an image. 'video' will process a video. 139 | input Input file to load from in image or video mode 140 | 141 | optional arguments: 142 | -h, --help show this help message and exit 143 | -m MODEL, --model MODEL 144 | Inference model name, default yolov7 145 | --width WIDTH Inference model input width, default 640 146 | --height HEIGHT Inference model input height, default 640 147 | -u URL, --url URL Inference server URL, default localhost:8001 148 | -o OUT, --out OUT Write output into file instead of displaying it 149 | -f FPS, --fps FPS Video output fps, default 24.0 FPS 150 | -i, --model-info Print model status, configuration and statistics 151 | -v, --verbose Enable verbose client output 152 | -t CLIENT_TIMEOUT, --client-timeout CLIENT_TIMEOUT 153 | Client timeout in seconds, default no timeout 154 | -s, --ssl Enable SSL encrypted channel to the server 155 | -r ROOT_CERTIFICATES, --root-certificates ROOT_CERTIFICATES 156 | File holding PEM-encoded root certificates, default none 157 | -p PRIVATE_KEY, --private-key PRIVATE_KEY 158 | File holding PEM-encoded private key, default is none 159 | -x CERTIFICATE_CHAIN, --certificate-chain CERTIFICATE_CHAIN 160 | File holding PEM-encoded certicate chain default is none 161 | ``` 162 | -------------------------------------------------------------------------------- /export_mask.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import time 4 | 5 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 6 | import tensorrt as trt 7 | import pycuda.autoinit 8 | import pycuda.driver as cuda 9 | import numpy as np 10 | import argparse 11 | import time 12 | import numpy as np 13 | import argparse 14 | import onnxruntime as ort 15 | import os 16 | import torch 17 | import torch.backends.cudnn as cudnn 18 | import cv2 19 | import onnx 20 | import yaml 21 | from torchvision import transforms 22 | import tqdm 23 | 24 | from utils.datasets import letterbox 25 | from utils.general import non_max_suppression_mask_conf 26 | from detectron2.modeling.poolers import ROIPooler 27 | from detectron2.structures import Boxes 28 | from detectron2.utils.memory import retry_if_cuda_oom 29 | from detectron2.layers import paste_masks_in_image 30 | from utils.general import set_logging 31 | from models.experimental import attempt_load 32 | 33 | 34 | 35 | 36 | def PostProcess(img, hyp, model, inf_out, attn, bases, sem_output): 37 | bases = torch.cat([bases, sem_output], dim=1) 38 | nb, _, height, width = img.shape 39 | names = model.names 40 | pooler_scale = model.pooler_scale 41 | pooler = ROIPooler(output_size=hyp['mask_resolution'], scales=(pooler_scale,), sampling_ratio=1, pooler_type='ROIAlignV2', canonical_level=2) 42 | 43 | output, output_mask = non_max_suppression_mask_conf(inf_out, attn, bases, pooler, hyp, conf_thres=0.25, iou_thres=0.65, merge=False, mask_iou=None) 44 | 45 | pred, pred_masks = output[0], output_mask[0] 46 | base = bases[0] 47 | bboxes = Boxes(pred[:, :4]) 48 | original_pred_masks = pred_masks.view(-1, hyp['mask_resolution'], hyp['mask_resolution']) 49 | pred_masks = retry_if_cuda_oom(paste_masks_in_image)( original_pred_masks, bboxes, (height, width), threshold=0.5) 50 | pred_masks_np = pred_masks.detach().cpu().numpy() 51 | pred_cls = pred[:, 5].detach().cpu().numpy() 52 | pred_conf = pred[:, 4].detach().cpu().numpy() 53 | nimg = img[0].permute(1, 2, 0) * 255 54 | nimg = nimg.cpu().numpy().astype(np.uint8) 55 | nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR) 56 | nbboxes = bboxes.tensor.detach().cpu().numpy().astype(np.int) 57 | pnimg = nimg.copy() 58 | 59 | 60 | for one_mask, bbox, cls, conf in zip(pred_masks_np, nbboxes, pred_cls, pred_conf): 61 | if conf < 0.25: 62 | continue 63 | color = [np.random.randint(255), np.random.randint(255), np.random.randint(255)] 64 | print(color) 65 | 66 | pnimg[one_mask] = pnimg[one_mask] * 0.5 + np.array(color, dtype=np.uint8) * 0.5 67 | pnimg = cv2.rectangle(pnimg, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) 68 | 69 | return pnimg 70 | if __name__ == '__main__': 71 | parser = argparse.ArgumentParser() 72 | parser.add_argument('--weights', type=str, default='./yolor-csp-c.pt', help='weights path') 73 | parser.add_argument('--onnx_name', type=str, default='./yolov7_mask.onnx', help='onnx filename') 74 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 75 | parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes') 76 | parser.add_argument('--dynamic-batch', action='store_true', help='dynamic batch onnx for tensorrt and onnx-runtime') 77 | parser.add_argument('--grid', action='store_true', help='export Detect() layer grid') 78 | parser.add_argument('--end2end', action='store_true', help='export end2end onnx') 79 | parser.add_argument('--max-wh', type=int, default=None, help='None for tensorrt nms, int value for onnx-runtime nms') 80 | parser.add_argument('--topk-all', type=int, default=100, help='topk objects for every images') 81 | parser.add_argument('--iou-thres', type=float, default=0.45, help='iou threshold for NMS') 82 | parser.add_argument('--conf-thres', type=float, default=0.25, help='conf threshold for NMS') 83 | parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 84 | parser.add_argument('--simplify', action='store_true', help='simplify onnx model') 85 | parser.add_argument('--include-nms', action='store_true', help='export end2end onnx') 86 | parser.add_argument('--fp16', action='store_true', help='CoreML FP16 half-precision export') 87 | parser.add_argument('--int8', action='store_true', help='CoreML INT8 quantization') 88 | parser.add_argument("--input", nargs="+", help="A file or directory of your input data ") 89 | parser.add_argument('--imgsz', type=int, default=320, help='image size') # height, width 90 | parser.add_argument('--no_infer', action='store_true', help='CoreML FP16 half-precision export') 91 | 92 | opt = parser.parse_args() 93 | opt.dynamic = opt.dynamic and not opt.end2end 94 | opt.dynamic = False if opt.dynamic_batch else opt.dynamic 95 | set_logging() 96 | t = time.time() 97 | 98 | 99 | device = torch.device( "cpu") 100 | 101 | 102 | with open('data/hyp.scratch.mask.yaml') as f: 103 | hyp = yaml.load(f, Loader=yaml.FullLoader) 104 | device = torch.device( "cpu") 105 | weights = opt.weights 106 | model = attempt_load(weights, map_location=device) 107 | _ = model.eval() 108 | 109 | import time 110 | time1 = time.time() 111 | loop = 1 112 | for i in range(loop): 113 | image = cv2.imread(opt.input[0]) # 504x378 image 114 | image = letterbox(image, (opt.imgsz,opt.imgsz), stride=64, auto=True)[0] 115 | image_ = image.copy() 116 | image = transforms.ToTensor()(image) 117 | image = torch.tensor(np.array([image.numpy()])) 118 | image = image.to(device) 119 | img = image 120 | y = model(image) 121 | 122 | try: 123 | import onnx 124 | 125 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__) 126 | f ="./onnx/"+opt.onnx_name 127 | model.eval() 128 | output_names = ['output'] 129 | dynamic_axes = None 130 | if opt.grid : 131 | model.model[-1].concat = True 132 | torch.onnx.export(model, image, f, verbose=True, opset_version=13, input_names=['images'], 133 | output_names=output_names, 134 | dynamic_axes=dynamic_axes) 135 | 136 | # Checks 137 | onnx_model = onnx.load(f) # load onnx model 138 | onnx.checker.check_model(onnx_model) # check onnx model 139 | onnx.save(onnx_model,f) 140 | print('ONNX export success, saved as %s' % f) 141 | except Exception as e: 142 | print('ONNX export failure: %s' % e) 143 | 144 | # Finish 145 | print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t)) 146 | 147 | 148 | if not(opt.no_infer): 149 | f ="./onnx/"+opt.onnx_name 150 | image_path = opt.input 151 | 152 | iteration = 0 153 | start_time_all = time.time() 154 | w = f 155 | providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider'] 156 | session = ort.InferenceSession(w, providers=providers) 157 | 158 | model_onnx = onnx.load(w) 159 | input_shapes = [[d.dim_value for d in _input.type.tensor_type.shape.dim] for _input in model_onnx.graph.input] 160 | output_shapes = [[d.dim_value for d in _output.type.tensor_type.shape.dim] for _output in model_onnx.graph.output] 161 | 162 | 163 | 164 | outname = [i.name for i in session.get_outputs()] 165 | 166 | inname = [i.name for i in session.get_inputs()] 167 | time_use_trt_only = 0 168 | time_use_trt_ = 0 169 | for img_path in tqdm.tqdm(image_path): 170 | start_time = time.time() 171 | image = cv2.imread(img_path) 172 | image = letterbox(image, (opt.imgsz, opt.imgsz), stride=64, auto=True)[0] 173 | image_letter = image.copy() 174 | image_ = image.copy() 175 | image = transforms.ToTensor()(image) 176 | image = torch.tensor(np.array([image.numpy()])) ##tensor or numpy?? 177 | img = np.array(image) 178 | 179 | img = np.ascontiguousarray(img, dtype=np.float32) 180 | inp = {inname[0]:img} 181 | output = session.run(outname, inp)[0] 182 | output1 = session.run(outname, inp)[1] 183 | output2 = session.run(outname, inp)[2] 184 | output3 = session.run(outname, inp)[3] 185 | output4 = session.run(outname, inp)[4] 186 | output5 = session.run(outname, inp)[5] 187 | output6 = session.run(outname, inp)[6] 188 | inf_out, train_out = torch.tensor(output), [torch.tensor(output2),torch.tensor(output3),torch.tensor(output4)] 189 | attn, mask_iou, bases, sem_output = torch.tensor(output1), None, torch.tensor(output5), torch.tensor(output6) 190 | img = torch.tensor(img) 191 | pnimg = PostProcess(img, hyp, model, inf_out, attn, bases, sem_output) 192 | 193 | 194 | save_path = "./result_onnx" 195 | cv2.imwrite(save_path+str(int(opt.imgsz))+".jpg", pnimg) 196 | iteration+=1 197 | -------------------------------------------------------------------------------- /utils/metrics.py: -------------------------------------------------------------------------------- 1 | # Model validation metrics 2 | 3 | from pathlib import Path 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import torch 8 | 9 | from . import general 10 | 11 | 12 | def fitness(x): 13 | # Model fitness as a weighted combination of metrics 14 | w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] 15 | return (x[:, :4] * w).sum(1) 16 | 17 | 18 | def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=()): 19 | """ Compute the average precision, given the recall and precision curves. 20 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. 21 | # Arguments 22 | tp: True positives (nparray, nx1 or nx10). 23 | conf: Objectness value from 0-1 (nparray). 24 | pred_cls: Predicted object classes (nparray). 25 | target_cls: True object classes (nparray). 26 | plot: Plot precision-recall curve at mAP@0.5 27 | save_dir: Plot save directory 28 | # Returns 29 | The average precision as computed in py-faster-rcnn. 30 | """ 31 | 32 | # Sort by objectness 33 | i = np.argsort(-conf) 34 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] 35 | 36 | # Find unique classes 37 | unique_classes = np.unique(target_cls) 38 | nc = unique_classes.shape[0] # number of classes, number of detections 39 | 40 | # Create Precision-Recall curve and compute AP for each class 41 | px, py = np.linspace(0, 1, 1000), [] # for plotting 42 | ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) 43 | for ci, c in enumerate(unique_classes): 44 | i = pred_cls == c 45 | n_l = (target_cls == c).sum() # number of labels 46 | n_p = i.sum() # number of predictions 47 | 48 | if n_p == 0 or n_l == 0: 49 | continue 50 | else: 51 | # Accumulate FPs and TPs 52 | fpc = (1 - tp[i]).cumsum(0) 53 | tpc = tp[i].cumsum(0) 54 | 55 | # Recall 56 | recall = tpc / (n_l + 1e-16) # recall curve 57 | r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases 58 | 59 | # Precision 60 | precision = tpc / (tpc + fpc) # precision curve 61 | p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score 62 | 63 | # AP from recall-precision curve 64 | for j in range(tp.shape[1]): 65 | ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) 66 | if plot and j == 0: 67 | py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 68 | 69 | # Compute F1 (harmonic mean of precision and recall) 70 | f1 = 2 * p * r / (p + r + 1e-16) 71 | if plot: 72 | plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names) 73 | plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1') 74 | plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision') 75 | plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall') 76 | 77 | i = f1.mean(0).argmax() # max F1 index 78 | return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32') 79 | 80 | 81 | def compute_ap(recall, precision): 82 | """ Compute the average precision, given the recall and precision curves 83 | # Arguments 84 | recall: The recall curve (list) 85 | precision: The precision curve (list) 86 | # Returns 87 | Average precision, precision curve, recall curve 88 | """ 89 | 90 | # Append sentinel values to beginning and end 91 | mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01])) 92 | mpre = np.concatenate(([1.], precision, [0.])) 93 | 94 | # Compute the precision envelope 95 | mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) 96 | 97 | # Integrate area under curve 98 | method = 'interp' # methods: 'continuous', 'interp' 99 | if method == 'interp': 100 | x = np.linspace(0, 1, 101) # 101-point interp (COCO) 101 | ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate 102 | else: # 'continuous' 103 | i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes 104 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve 105 | 106 | return ap, mpre, mrec 107 | 108 | 109 | class ConfusionMatrix: 110 | # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix 111 | def __init__(self, nc, conf=0.25, iou_thres=0.45): 112 | self.matrix = np.zeros((nc + 1, nc + 1)) 113 | self.nc = nc # number of classes 114 | self.conf = conf 115 | self.iou_thres = iou_thres 116 | 117 | def process_batch(self, detections, labels): 118 | """ 119 | Return intersection-over-union (Jaccard index) of boxes. 120 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 121 | Arguments: 122 | detections (Array[N, 6]), x1, y1, x2, y2, conf, class 123 | labels (Array[M, 5]), class, x1, y1, x2, y2 124 | Returns: 125 | None, updates confusion matrix accordingly 126 | """ 127 | detections = detections[detections[:, 4] > self.conf] 128 | gt_classes = labels[:, 0].int() 129 | detection_classes = detections[:, 5].int() 130 | iou = general.box_iou(labels[:, 1:], detections[:, :4]) 131 | 132 | x = torch.where(iou > self.iou_thres) 133 | if x[0].shape[0]: 134 | matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() 135 | if x[0].shape[0] > 1: 136 | matches = matches[matches[:, 2].argsort()[::-1]] 137 | matches = matches[np.unique(matches[:, 1], return_index=True)[1]] 138 | matches = matches[matches[:, 2].argsort()[::-1]] 139 | matches = matches[np.unique(matches[:, 0], return_index=True)[1]] 140 | else: 141 | matches = np.zeros((0, 3)) 142 | 143 | n = matches.shape[0] > 0 144 | m0, m1, _ = matches.transpose().astype(np.int16) 145 | for i, gc in enumerate(gt_classes): 146 | j = m0 == i 147 | if n and sum(j) == 1: 148 | self.matrix[gc, detection_classes[m1[j]]] += 1 # correct 149 | else: 150 | self.matrix[self.nc, gc] += 1 # background FP 151 | 152 | if n: 153 | for i, dc in enumerate(detection_classes): 154 | if not any(m1 == i): 155 | self.matrix[dc, self.nc] += 1 # background FN 156 | 157 | def matrix(self): 158 | return self.matrix 159 | 160 | def plot(self, save_dir='', names=()): 161 | try: 162 | import seaborn as sn 163 | 164 | array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6) # normalize 165 | array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) 166 | 167 | fig = plt.figure(figsize=(12, 9), tight_layout=True) 168 | sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size 169 | labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels 170 | sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True, 171 | xticklabels=names + ['background FP'] if labels else "auto", 172 | yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1)) 173 | fig.axes[0].set_xlabel('True') 174 | fig.axes[0].set_ylabel('Predicted') 175 | fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250) 176 | except Exception as e: 177 | pass 178 | 179 | def print(self): 180 | for i in range(self.nc + 1): 181 | print(' '.join(map(str, self.matrix[i]))) 182 | 183 | 184 | # Plots ---------------------------------------------------------------------------------------------------------------- 185 | 186 | def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()): 187 | # Precision-recall curve 188 | fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) 189 | py = np.stack(py, axis=1) 190 | 191 | if 0 < len(names) < 21: # display per-class legend if < 21 classes 192 | for i, y in enumerate(py.T): 193 | ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision) 194 | else: 195 | ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision) 196 | 197 | ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) 198 | ax.set_xlabel('Recall') 199 | ax.set_ylabel('Precision') 200 | ax.set_xlim(0, 1) 201 | ax.set_ylim(0, 1) 202 | plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") 203 | fig.savefig(Path(save_dir), dpi=250) 204 | 205 | 206 | def plot_mc_curve(px, py, save_dir='mc_curve.png', names=(), xlabel='Confidence', ylabel='Metric'): 207 | # Metric-confidence curve 208 | fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) 209 | 210 | if 0 < len(names) < 21: # display per-class legend if < 21 classes 211 | for i, y in enumerate(py): 212 | ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric) 213 | else: 214 | ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric) 215 | 216 | y = py.mean(0) 217 | ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}') 218 | ax.set_xlabel(xlabel) 219 | ax.set_ylabel(ylabel) 220 | ax.set_xlim(0, 1) 221 | ax.set_ylim(0, 1) 222 | plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") 223 | fig.savefig(Path(save_dir), dpi=250) 224 | -------------------------------------------------------------------------------- /cfg/deploy/yolov7-e6e.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [ 19,27, 44,40, 38,94 ] # P3/8 9 | - [ 96,68, 86,152, 180,137 ] # P4/16 10 | - [ 140,301, 303,264, 238,542 ] # P5/32 11 | - [ 436,615, 739,380, 925,792 ] # P6/64 12 | 13 | # yolov7-e6e backbone 14 | backbone: 15 | # [from, number, module, args], 16 | [[-1, 1, ReOrg, []], # 0 17 | [-1, 1, Conv, [80, 3, 1]], # 1-P1/2 18 | 19 | [-1, 1, DownC, [160]], # 2-P2/4 20 | [-1, 1, Conv, [64, 1, 1]], 21 | [-2, 1, Conv, [64, 1, 1]], 22 | [-1, 1, Conv, [64, 3, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [-1, 1, Conv, [64, 3, 1]], 25 | [-1, 1, Conv, [64, 3, 1]], 26 | [-1, 1, Conv, [64, 3, 1]], 27 | [-1, 1, Conv, [64, 3, 1]], 28 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 29 | [-1, 1, Conv, [160, 1, 1]], # 12 30 | [-11, 1, Conv, [64, 1, 1]], 31 | [-12, 1, Conv, [64, 1, 1]], 32 | [-1, 1, Conv, [64, 3, 1]], 33 | [-1, 1, Conv, [64, 3, 1]], 34 | [-1, 1, Conv, [64, 3, 1]], 35 | [-1, 1, Conv, [64, 3, 1]], 36 | [-1, 1, Conv, [64, 3, 1]], 37 | [-1, 1, Conv, [64, 3, 1]], 38 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 39 | [-1, 1, Conv, [160, 1, 1]], # 22 40 | [[-1, -11], 1, Shortcut, [1]], # 23 41 | 42 | [-1, 1, DownC, [320]], # 24-P3/8 43 | [-1, 1, Conv, [128, 1, 1]], 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, Conv, [128, 3, 1]], 46 | [-1, 1, Conv, [128, 3, 1]], 47 | [-1, 1, Conv, [128, 3, 1]], 48 | [-1, 1, Conv, [128, 3, 1]], 49 | [-1, 1, Conv, [128, 3, 1]], 50 | [-1, 1, Conv, [128, 3, 1]], 51 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 52 | [-1, 1, Conv, [320, 1, 1]], # 34 53 | [-11, 1, Conv, [128, 1, 1]], 54 | [-12, 1, Conv, [128, 1, 1]], 55 | [-1, 1, Conv, [128, 3, 1]], 56 | [-1, 1, Conv, [128, 3, 1]], 57 | [-1, 1, Conv, [128, 3, 1]], 58 | [-1, 1, Conv, [128, 3, 1]], 59 | [-1, 1, Conv, [128, 3, 1]], 60 | [-1, 1, Conv, [128, 3, 1]], 61 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 62 | [-1, 1, Conv, [320, 1, 1]], # 44 63 | [[-1, -11], 1, Shortcut, [1]], # 45 64 | 65 | [-1, 1, DownC, [640]], # 46-P4/16 66 | [-1, 1, Conv, [256, 1, 1]], 67 | [-2, 1, Conv, [256, 1, 1]], 68 | [-1, 1, Conv, [256, 3, 1]], 69 | [-1, 1, Conv, [256, 3, 1]], 70 | [-1, 1, Conv, [256, 3, 1]], 71 | [-1, 1, Conv, [256, 3, 1]], 72 | [-1, 1, Conv, [256, 3, 1]], 73 | [-1, 1, Conv, [256, 3, 1]], 74 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 75 | [-1, 1, Conv, [640, 1, 1]], # 56 76 | [-11, 1, Conv, [256, 1, 1]], 77 | [-12, 1, Conv, [256, 1, 1]], 78 | [-1, 1, Conv, [256, 3, 1]], 79 | [-1, 1, Conv, [256, 3, 1]], 80 | [-1, 1, Conv, [256, 3, 1]], 81 | [-1, 1, Conv, [256, 3, 1]], 82 | [-1, 1, Conv, [256, 3, 1]], 83 | [-1, 1, Conv, [256, 3, 1]], 84 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 85 | [-1, 1, Conv, [640, 1, 1]], # 66 86 | [[-1, -11], 1, Shortcut, [1]], # 67 87 | 88 | [-1, 1, DownC, [960]], # 68-P5/32 89 | [-1, 1, Conv, [384, 1, 1]], 90 | [-2, 1, Conv, [384, 1, 1]], 91 | [-1, 1, Conv, [384, 3, 1]], 92 | [-1, 1, Conv, [384, 3, 1]], 93 | [-1, 1, Conv, [384, 3, 1]], 94 | [-1, 1, Conv, [384, 3, 1]], 95 | [-1, 1, Conv, [384, 3, 1]], 96 | [-1, 1, Conv, [384, 3, 1]], 97 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 98 | [-1, 1, Conv, [960, 1, 1]], # 78 99 | [-11, 1, Conv, [384, 1, 1]], 100 | [-12, 1, Conv, [384, 1, 1]], 101 | [-1, 1, Conv, [384, 3, 1]], 102 | [-1, 1, Conv, [384, 3, 1]], 103 | [-1, 1, Conv, [384, 3, 1]], 104 | [-1, 1, Conv, [384, 3, 1]], 105 | [-1, 1, Conv, [384, 3, 1]], 106 | [-1, 1, Conv, [384, 3, 1]], 107 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 108 | [-1, 1, Conv, [960, 1, 1]], # 88 109 | [[-1, -11], 1, Shortcut, [1]], # 89 110 | 111 | [-1, 1, DownC, [1280]], # 90-P6/64 112 | [-1, 1, Conv, [512, 1, 1]], 113 | [-2, 1, Conv, [512, 1, 1]], 114 | [-1, 1, Conv, [512, 3, 1]], 115 | [-1, 1, Conv, [512, 3, 1]], 116 | [-1, 1, Conv, [512, 3, 1]], 117 | [-1, 1, Conv, [512, 3, 1]], 118 | [-1, 1, Conv, [512, 3, 1]], 119 | [-1, 1, Conv, [512, 3, 1]], 120 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 121 | [-1, 1, Conv, [1280, 1, 1]], # 100 122 | [-11, 1, Conv, [512, 1, 1]], 123 | [-12, 1, Conv, [512, 1, 1]], 124 | [-1, 1, Conv, [512, 3, 1]], 125 | [-1, 1, Conv, [512, 3, 1]], 126 | [-1, 1, Conv, [512, 3, 1]], 127 | [-1, 1, Conv, [512, 3, 1]], 128 | [-1, 1, Conv, [512, 3, 1]], 129 | [-1, 1, Conv, [512, 3, 1]], 130 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 131 | [-1, 1, Conv, [1280, 1, 1]], # 110 132 | [[-1, -11], 1, Shortcut, [1]], # 111 133 | ] 134 | 135 | # yolov7-e6e head 136 | head: 137 | [[-1, 1, SPPCSPC, [640]], # 112 138 | 139 | [-1, 1, Conv, [480, 1, 1]], 140 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 141 | [89, 1, Conv, [480, 1, 1]], # route backbone P5 142 | [[-1, -2], 1, Concat, [1]], 143 | 144 | [-1, 1, Conv, [384, 1, 1]], 145 | [-2, 1, Conv, [384, 1, 1]], 146 | [-1, 1, Conv, [192, 3, 1]], 147 | [-1, 1, Conv, [192, 3, 1]], 148 | [-1, 1, Conv, [192, 3, 1]], 149 | [-1, 1, Conv, [192, 3, 1]], 150 | [-1, 1, Conv, [192, 3, 1]], 151 | [-1, 1, Conv, [192, 3, 1]], 152 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 153 | [-1, 1, Conv, [480, 1, 1]], # 126 154 | [-11, 1, Conv, [384, 1, 1]], 155 | [-12, 1, Conv, [384, 1, 1]], 156 | [-1, 1, Conv, [192, 3, 1]], 157 | [-1, 1, Conv, [192, 3, 1]], 158 | [-1, 1, Conv, [192, 3, 1]], 159 | [-1, 1, Conv, [192, 3, 1]], 160 | [-1, 1, Conv, [192, 3, 1]], 161 | [-1, 1, Conv, [192, 3, 1]], 162 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 163 | [-1, 1, Conv, [480, 1, 1]], # 136 164 | [[-1, -11], 1, Shortcut, [1]], # 137 165 | 166 | [-1, 1, Conv, [320, 1, 1]], 167 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 168 | [67, 1, Conv, [320, 1, 1]], # route backbone P4 169 | [[-1, -2], 1, Concat, [1]], 170 | 171 | [-1, 1, Conv, [256, 1, 1]], 172 | [-2, 1, Conv, [256, 1, 1]], 173 | [-1, 1, Conv, [128, 3, 1]], 174 | [-1, 1, Conv, [128, 3, 1]], 175 | [-1, 1, Conv, [128, 3, 1]], 176 | [-1, 1, Conv, [128, 3, 1]], 177 | [-1, 1, Conv, [128, 3, 1]], 178 | [-1, 1, Conv, [128, 3, 1]], 179 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 180 | [-1, 1, Conv, [320, 1, 1]], # 151 181 | [-11, 1, Conv, [256, 1, 1]], 182 | [-12, 1, Conv, [256, 1, 1]], 183 | [-1, 1, Conv, [128, 3, 1]], 184 | [-1, 1, Conv, [128, 3, 1]], 185 | [-1, 1, Conv, [128, 3, 1]], 186 | [-1, 1, Conv, [128, 3, 1]], 187 | [-1, 1, Conv, [128, 3, 1]], 188 | [-1, 1, Conv, [128, 3, 1]], 189 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 190 | [-1, 1, Conv, [320, 1, 1]], # 161 191 | [[-1, -11], 1, Shortcut, [1]], # 162 192 | 193 | [-1, 1, Conv, [160, 1, 1]], 194 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 195 | [45, 1, Conv, [160, 1, 1]], # route backbone P3 196 | [[-1, -2], 1, Concat, [1]], 197 | 198 | [-1, 1, Conv, [128, 1, 1]], 199 | [-2, 1, Conv, [128, 1, 1]], 200 | [-1, 1, Conv, [64, 3, 1]], 201 | [-1, 1, Conv, [64, 3, 1]], 202 | [-1, 1, Conv, [64, 3, 1]], 203 | [-1, 1, Conv, [64, 3, 1]], 204 | [-1, 1, Conv, [64, 3, 1]], 205 | [-1, 1, Conv, [64, 3, 1]], 206 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 207 | [-1, 1, Conv, [160, 1, 1]], # 176 208 | [-11, 1, Conv, [128, 1, 1]], 209 | [-12, 1, Conv, [128, 1, 1]], 210 | [-1, 1, Conv, [64, 3, 1]], 211 | [-1, 1, Conv, [64, 3, 1]], 212 | [-1, 1, Conv, [64, 3, 1]], 213 | [-1, 1, Conv, [64, 3, 1]], 214 | [-1, 1, Conv, [64, 3, 1]], 215 | [-1, 1, Conv, [64, 3, 1]], 216 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 217 | [-1, 1, Conv, [160, 1, 1]], # 186 218 | [[-1, -11], 1, Shortcut, [1]], # 187 219 | 220 | [-1, 1, DownC, [320]], 221 | [[-1, 162], 1, Concat, [1]], 222 | 223 | [-1, 1, Conv, [256, 1, 1]], 224 | [-2, 1, Conv, [256, 1, 1]], 225 | [-1, 1, Conv, [128, 3, 1]], 226 | [-1, 1, Conv, [128, 3, 1]], 227 | [-1, 1, Conv, [128, 3, 1]], 228 | [-1, 1, Conv, [128, 3, 1]], 229 | [-1, 1, Conv, [128, 3, 1]], 230 | [-1, 1, Conv, [128, 3, 1]], 231 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 232 | [-1, 1, Conv, [320, 1, 1]], # 199 233 | [-11, 1, Conv, [256, 1, 1]], 234 | [-12, 1, Conv, [256, 1, 1]], 235 | [-1, 1, Conv, [128, 3, 1]], 236 | [-1, 1, Conv, [128, 3, 1]], 237 | [-1, 1, Conv, [128, 3, 1]], 238 | [-1, 1, Conv, [128, 3, 1]], 239 | [-1, 1, Conv, [128, 3, 1]], 240 | [-1, 1, Conv, [128, 3, 1]], 241 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 242 | [-1, 1, Conv, [320, 1, 1]], # 209 243 | [[-1, -11], 1, Shortcut, [1]], # 210 244 | 245 | [-1, 1, DownC, [480]], 246 | [[-1, 137], 1, Concat, [1]], 247 | 248 | [-1, 1, Conv, [384, 1, 1]], 249 | [-2, 1, Conv, [384, 1, 1]], 250 | [-1, 1, Conv, [192, 3, 1]], 251 | [-1, 1, Conv, [192, 3, 1]], 252 | [-1, 1, Conv, [192, 3, 1]], 253 | [-1, 1, Conv, [192, 3, 1]], 254 | [-1, 1, Conv, [192, 3, 1]], 255 | [-1, 1, Conv, [192, 3, 1]], 256 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 257 | [-1, 1, Conv, [480, 1, 1]], # 222 258 | [-11, 1, Conv, [384, 1, 1]], 259 | [-12, 1, Conv, [384, 1, 1]], 260 | [-1, 1, Conv, [192, 3, 1]], 261 | [-1, 1, Conv, [192, 3, 1]], 262 | [-1, 1, Conv, [192, 3, 1]], 263 | [-1, 1, Conv, [192, 3, 1]], 264 | [-1, 1, Conv, [192, 3, 1]], 265 | [-1, 1, Conv, [192, 3, 1]], 266 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 267 | [-1, 1, Conv, [480, 1, 1]], # 232 268 | [[-1, -11], 1, Shortcut, [1]], # 233 269 | 270 | [-1, 1, DownC, [640]], 271 | [[-1, 112], 1, Concat, [1]], 272 | 273 | [-1, 1, Conv, [512, 1, 1]], 274 | [-2, 1, Conv, [512, 1, 1]], 275 | [-1, 1, Conv, [256, 3, 1]], 276 | [-1, 1, Conv, [256, 3, 1]], 277 | [-1, 1, Conv, [256, 3, 1]], 278 | [-1, 1, Conv, [256, 3, 1]], 279 | [-1, 1, Conv, [256, 3, 1]], 280 | [-1, 1, Conv, [256, 3, 1]], 281 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 282 | [-1, 1, Conv, [640, 1, 1]], # 245 283 | [-11, 1, Conv, [512, 1, 1]], 284 | [-12, 1, Conv, [512, 1, 1]], 285 | [-1, 1, Conv, [256, 3, 1]], 286 | [-1, 1, Conv, [256, 3, 1]], 287 | [-1, 1, Conv, [256, 3, 1]], 288 | [-1, 1, Conv, [256, 3, 1]], 289 | [-1, 1, Conv, [256, 3, 1]], 290 | [-1, 1, Conv, [256, 3, 1]], 291 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 292 | [-1, 1, Conv, [640, 1, 1]], # 255 293 | [[-1, -11], 1, Shortcut, [1]], # 256 294 | 295 | [187, 1, Conv, [320, 3, 1]], 296 | [210, 1, Conv, [640, 3, 1]], 297 | [233, 1, Conv, [960, 3, 1]], 298 | [256, 1, Conv, [1280, 3, 1]], 299 | 300 | [[257,258,259,260], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 301 | ] 302 | -------------------------------------------------------------------------------- /cfg/training/yolov7-e6e.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [ 19,27, 44,40, 38,94 ] # P3/8 9 | - [ 96,68, 86,152, 180,137 ] # P4/16 10 | - [ 140,301, 303,264, 238,542 ] # P5/32 11 | - [ 436,615, 739,380, 925,792 ] # P6/64 12 | 13 | # yolov7 backbone 14 | backbone: 15 | # [from, number, module, args], 16 | [[-1, 1, ReOrg, []], # 0 17 | [-1, 1, Conv, [80, 3, 1]], # 1-P1/2 18 | 19 | [-1, 1, DownC, [160]], # 2-P2/4 20 | [-1, 1, Conv, [64, 1, 1]], 21 | [-2, 1, Conv, [64, 1, 1]], 22 | [-1, 1, Conv, [64, 3, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [-1, 1, Conv, [64, 3, 1]], 25 | [-1, 1, Conv, [64, 3, 1]], 26 | [-1, 1, Conv, [64, 3, 1]], 27 | [-1, 1, Conv, [64, 3, 1]], 28 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 29 | [-1, 1, Conv, [160, 1, 1]], # 12 30 | [-11, 1, Conv, [64, 1, 1]], 31 | [-12, 1, Conv, [64, 1, 1]], 32 | [-1, 1, Conv, [64, 3, 1]], 33 | [-1, 1, Conv, [64, 3, 1]], 34 | [-1, 1, Conv, [64, 3, 1]], 35 | [-1, 1, Conv, [64, 3, 1]], 36 | [-1, 1, Conv, [64, 3, 1]], 37 | [-1, 1, Conv, [64, 3, 1]], 38 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 39 | [-1, 1, Conv, [160, 1, 1]], # 22 40 | [[-1, -11], 1, Shortcut, [1]], # 23 41 | 42 | [-1, 1, DownC, [320]], # 24-P3/8 43 | [-1, 1, Conv, [128, 1, 1]], 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, Conv, [128, 3, 1]], 46 | [-1, 1, Conv, [128, 3, 1]], 47 | [-1, 1, Conv, [128, 3, 1]], 48 | [-1, 1, Conv, [128, 3, 1]], 49 | [-1, 1, Conv, [128, 3, 1]], 50 | [-1, 1, Conv, [128, 3, 1]], 51 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 52 | [-1, 1, Conv, [320, 1, 1]], # 34 53 | [-11, 1, Conv, [128, 1, 1]], 54 | [-12, 1, Conv, [128, 1, 1]], 55 | [-1, 1, Conv, [128, 3, 1]], 56 | [-1, 1, Conv, [128, 3, 1]], 57 | [-1, 1, Conv, [128, 3, 1]], 58 | [-1, 1, Conv, [128, 3, 1]], 59 | [-1, 1, Conv, [128, 3, 1]], 60 | [-1, 1, Conv, [128, 3, 1]], 61 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 62 | [-1, 1, Conv, [320, 1, 1]], # 44 63 | [[-1, -11], 1, Shortcut, [1]], # 45 64 | 65 | [-1, 1, DownC, [640]], # 46-P4/16 66 | [-1, 1, Conv, [256, 1, 1]], 67 | [-2, 1, Conv, [256, 1, 1]], 68 | [-1, 1, Conv, [256, 3, 1]], 69 | [-1, 1, Conv, [256, 3, 1]], 70 | [-1, 1, Conv, [256, 3, 1]], 71 | [-1, 1, Conv, [256, 3, 1]], 72 | [-1, 1, Conv, [256, 3, 1]], 73 | [-1, 1, Conv, [256, 3, 1]], 74 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 75 | [-1, 1, Conv, [640, 1, 1]], # 56 76 | [-11, 1, Conv, [256, 1, 1]], 77 | [-12, 1, Conv, [256, 1, 1]], 78 | [-1, 1, Conv, [256, 3, 1]], 79 | [-1, 1, Conv, [256, 3, 1]], 80 | [-1, 1, Conv, [256, 3, 1]], 81 | [-1, 1, Conv, [256, 3, 1]], 82 | [-1, 1, Conv, [256, 3, 1]], 83 | [-1, 1, Conv, [256, 3, 1]], 84 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 85 | [-1, 1, Conv, [640, 1, 1]], # 66 86 | [[-1, -11], 1, Shortcut, [1]], # 67 87 | 88 | [-1, 1, DownC, [960]], # 68-P5/32 89 | [-1, 1, Conv, [384, 1, 1]], 90 | [-2, 1, Conv, [384, 1, 1]], 91 | [-1, 1, Conv, [384, 3, 1]], 92 | [-1, 1, Conv, [384, 3, 1]], 93 | [-1, 1, Conv, [384, 3, 1]], 94 | [-1, 1, Conv, [384, 3, 1]], 95 | [-1, 1, Conv, [384, 3, 1]], 96 | [-1, 1, Conv, [384, 3, 1]], 97 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 98 | [-1, 1, Conv, [960, 1, 1]], # 78 99 | [-11, 1, Conv, [384, 1, 1]], 100 | [-12, 1, Conv, [384, 1, 1]], 101 | [-1, 1, Conv, [384, 3, 1]], 102 | [-1, 1, Conv, [384, 3, 1]], 103 | [-1, 1, Conv, [384, 3, 1]], 104 | [-1, 1, Conv, [384, 3, 1]], 105 | [-1, 1, Conv, [384, 3, 1]], 106 | [-1, 1, Conv, [384, 3, 1]], 107 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 108 | [-1, 1, Conv, [960, 1, 1]], # 88 109 | [[-1, -11], 1, Shortcut, [1]], # 89 110 | 111 | [-1, 1, DownC, [1280]], # 90-P6/64 112 | [-1, 1, Conv, [512, 1, 1]], 113 | [-2, 1, Conv, [512, 1, 1]], 114 | [-1, 1, Conv, [512, 3, 1]], 115 | [-1, 1, Conv, [512, 3, 1]], 116 | [-1, 1, Conv, [512, 3, 1]], 117 | [-1, 1, Conv, [512, 3, 1]], 118 | [-1, 1, Conv, [512, 3, 1]], 119 | [-1, 1, Conv, [512, 3, 1]], 120 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 121 | [-1, 1, Conv, [1280, 1, 1]], # 100 122 | [-11, 1, Conv, [512, 1, 1]], 123 | [-12, 1, Conv, [512, 1, 1]], 124 | [-1, 1, Conv, [512, 3, 1]], 125 | [-1, 1, Conv, [512, 3, 1]], 126 | [-1, 1, Conv, [512, 3, 1]], 127 | [-1, 1, Conv, [512, 3, 1]], 128 | [-1, 1, Conv, [512, 3, 1]], 129 | [-1, 1, Conv, [512, 3, 1]], 130 | [[-1, -3, -5, -7, -8], 1, Concat, [1]], 131 | [-1, 1, Conv, [1280, 1, 1]], # 110 132 | [[-1, -11], 1, Shortcut, [1]], # 111 133 | ] 134 | 135 | # yolov7 head 136 | head: 137 | [[-1, 1, SPPCSPC, [640]], # 112 138 | 139 | [-1, 1, Conv, [480, 1, 1]], 140 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 141 | [89, 1, Conv, [480, 1, 1]], # route backbone P5 142 | [[-1, -2], 1, Concat, [1]], 143 | 144 | [-1, 1, Conv, [384, 1, 1]], 145 | [-2, 1, Conv, [384, 1, 1]], 146 | [-1, 1, Conv, [192, 3, 1]], 147 | [-1, 1, Conv, [192, 3, 1]], 148 | [-1, 1, Conv, [192, 3, 1]], 149 | [-1, 1, Conv, [192, 3, 1]], 150 | [-1, 1, Conv, [192, 3, 1]], 151 | [-1, 1, Conv, [192, 3, 1]], 152 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 153 | [-1, 1, Conv, [480, 1, 1]], # 126 154 | [-11, 1, Conv, [384, 1, 1]], 155 | [-12, 1, Conv, [384, 1, 1]], 156 | [-1, 1, Conv, [192, 3, 1]], 157 | [-1, 1, Conv, [192, 3, 1]], 158 | [-1, 1, Conv, [192, 3, 1]], 159 | [-1, 1, Conv, [192, 3, 1]], 160 | [-1, 1, Conv, [192, 3, 1]], 161 | [-1, 1, Conv, [192, 3, 1]], 162 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 163 | [-1, 1, Conv, [480, 1, 1]], # 136 164 | [[-1, -11], 1, Shortcut, [1]], # 137 165 | 166 | [-1, 1, Conv, [320, 1, 1]], 167 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 168 | [67, 1, Conv, [320, 1, 1]], # route backbone P4 169 | [[-1, -2], 1, Concat, [1]], 170 | 171 | [-1, 1, Conv, [256, 1, 1]], 172 | [-2, 1, Conv, [256, 1, 1]], 173 | [-1, 1, Conv, [128, 3, 1]], 174 | [-1, 1, Conv, [128, 3, 1]], 175 | [-1, 1, Conv, [128, 3, 1]], 176 | [-1, 1, Conv, [128, 3, 1]], 177 | [-1, 1, Conv, [128, 3, 1]], 178 | [-1, 1, Conv, [128, 3, 1]], 179 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 180 | [-1, 1, Conv, [320, 1, 1]], # 151 181 | [-11, 1, Conv, [256, 1, 1]], 182 | [-12, 1, Conv, [256, 1, 1]], 183 | [-1, 1, Conv, [128, 3, 1]], 184 | [-1, 1, Conv, [128, 3, 1]], 185 | [-1, 1, Conv, [128, 3, 1]], 186 | [-1, 1, Conv, [128, 3, 1]], 187 | [-1, 1, Conv, [128, 3, 1]], 188 | [-1, 1, Conv, [128, 3, 1]], 189 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 190 | [-1, 1, Conv, [320, 1, 1]], # 161 191 | [[-1, -11], 1, Shortcut, [1]], # 162 192 | 193 | [-1, 1, Conv, [160, 1, 1]], 194 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 195 | [45, 1, Conv, [160, 1, 1]], # route backbone P3 196 | [[-1, -2], 1, Concat, [1]], 197 | 198 | [-1, 1, Conv, [128, 1, 1]], 199 | [-2, 1, Conv, [128, 1, 1]], 200 | [-1, 1, Conv, [64, 3, 1]], 201 | [-1, 1, Conv, [64, 3, 1]], 202 | [-1, 1, Conv, [64, 3, 1]], 203 | [-1, 1, Conv, [64, 3, 1]], 204 | [-1, 1, Conv, [64, 3, 1]], 205 | [-1, 1, Conv, [64, 3, 1]], 206 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 207 | [-1, 1, Conv, [160, 1, 1]], # 176 208 | [-11, 1, Conv, [128, 1, 1]], 209 | [-12, 1, Conv, [128, 1, 1]], 210 | [-1, 1, Conv, [64, 3, 1]], 211 | [-1, 1, Conv, [64, 3, 1]], 212 | [-1, 1, Conv, [64, 3, 1]], 213 | [-1, 1, Conv, [64, 3, 1]], 214 | [-1, 1, Conv, [64, 3, 1]], 215 | [-1, 1, Conv, [64, 3, 1]], 216 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 217 | [-1, 1, Conv, [160, 1, 1]], # 186 218 | [[-1, -11], 1, Shortcut, [1]], # 187 219 | 220 | [-1, 1, DownC, [320]], 221 | [[-1, 162], 1, Concat, [1]], 222 | 223 | [-1, 1, Conv, [256, 1, 1]], 224 | [-2, 1, Conv, [256, 1, 1]], 225 | [-1, 1, Conv, [128, 3, 1]], 226 | [-1, 1, Conv, [128, 3, 1]], 227 | [-1, 1, Conv, [128, 3, 1]], 228 | [-1, 1, Conv, [128, 3, 1]], 229 | [-1, 1, Conv, [128, 3, 1]], 230 | [-1, 1, Conv, [128, 3, 1]], 231 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 232 | [-1, 1, Conv, [320, 1, 1]], # 199 233 | [-11, 1, Conv, [256, 1, 1]], 234 | [-12, 1, Conv, [256, 1, 1]], 235 | [-1, 1, Conv, [128, 3, 1]], 236 | [-1, 1, Conv, [128, 3, 1]], 237 | [-1, 1, Conv, [128, 3, 1]], 238 | [-1, 1, Conv, [128, 3, 1]], 239 | [-1, 1, Conv, [128, 3, 1]], 240 | [-1, 1, Conv, [128, 3, 1]], 241 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 242 | [-1, 1, Conv, [320, 1, 1]], # 209 243 | [[-1, -11], 1, Shortcut, [1]], # 210 244 | 245 | [-1, 1, DownC, [480]], 246 | [[-1, 137], 1, Concat, [1]], 247 | 248 | [-1, 1, Conv, [384, 1, 1]], 249 | [-2, 1, Conv, [384, 1, 1]], 250 | [-1, 1, Conv, [192, 3, 1]], 251 | [-1, 1, Conv, [192, 3, 1]], 252 | [-1, 1, Conv, [192, 3, 1]], 253 | [-1, 1, Conv, [192, 3, 1]], 254 | [-1, 1, Conv, [192, 3, 1]], 255 | [-1, 1, Conv, [192, 3, 1]], 256 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 257 | [-1, 1, Conv, [480, 1, 1]], # 222 258 | [-11, 1, Conv, [384, 1, 1]], 259 | [-12, 1, Conv, [384, 1, 1]], 260 | [-1, 1, Conv, [192, 3, 1]], 261 | [-1, 1, Conv, [192, 3, 1]], 262 | [-1, 1, Conv, [192, 3, 1]], 263 | [-1, 1, Conv, [192, 3, 1]], 264 | [-1, 1, Conv, [192, 3, 1]], 265 | [-1, 1, Conv, [192, 3, 1]], 266 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 267 | [-1, 1, Conv, [480, 1, 1]], # 232 268 | [[-1, -11], 1, Shortcut, [1]], # 233 269 | 270 | [-1, 1, DownC, [640]], 271 | [[-1, 112], 1, Concat, [1]], 272 | 273 | [-1, 1, Conv, [512, 1, 1]], 274 | [-2, 1, Conv, [512, 1, 1]], 275 | [-1, 1, Conv, [256, 3, 1]], 276 | [-1, 1, Conv, [256, 3, 1]], 277 | [-1, 1, Conv, [256, 3, 1]], 278 | [-1, 1, Conv, [256, 3, 1]], 279 | [-1, 1, Conv, [256, 3, 1]], 280 | [-1, 1, Conv, [256, 3, 1]], 281 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 282 | [-1, 1, Conv, [640, 1, 1]], # 245 283 | [-11, 1, Conv, [512, 1, 1]], 284 | [-12, 1, Conv, [512, 1, 1]], 285 | [-1, 1, Conv, [256, 3, 1]], 286 | [-1, 1, Conv, [256, 3, 1]], 287 | [-1, 1, Conv, [256, 3, 1]], 288 | [-1, 1, Conv, [256, 3, 1]], 289 | [-1, 1, Conv, [256, 3, 1]], 290 | [-1, 1, Conv, [256, 3, 1]], 291 | [[-1, -2, -3, -4, -5, -6, -7, -8], 1, Concat, [1]], 292 | [-1, 1, Conv, [640, 1, 1]], # 255 293 | [[-1, -11], 1, Shortcut, [1]], # 256 294 | 295 | [187, 1, Conv, [320, 3, 1]], 296 | [210, 1, Conv, [640, 3, 1]], 297 | [233, 1, Conv, [960, 3, 1]], 298 | [256, 1, Conv, [1280, 3, 1]], 299 | 300 | [186, 1, Conv, [320, 3, 1]], 301 | [161, 1, Conv, [640, 3, 1]], 302 | [136, 1, Conv, [960, 3, 1]], 303 | [112, 1, Conv, [1280, 3, 1]], 304 | 305 | [[257,258,259,260,261,262,263,264], 1, IAuxDetect, [nc, anchors]], # Detect(P3, P4, P5, P6) 306 | ] 307 | -------------------------------------------------------------------------------- /segment_video.py: -------------------------------------------------------------------------------- 1 | import tensorrt as trt 2 | import pycuda.autoinit 3 | import pycuda.driver as cuda 4 | import torch.backends.cudnn as cudnn 5 | import numpy as np 6 | import cv2 7 | import argparse 8 | import time 9 | import onnxruntime as ort 10 | import onnx 11 | import os 12 | import torch 13 | import yaml 14 | import tqdm 15 | import glob 16 | from PIL import Image 17 | from skimage.io._plugins.pil_plugin import ndarray_to_pil, pil_to_ndarray 18 | from utils.datasets import LoadStreams, LoadImages 19 | from utils.datasets import letterbox 20 | from torchvision import transforms 21 | from models.experimental import attempt_load 22 | from utils.general import non_max_suppression_mask_conf 23 | 24 | 25 | from detectron2.modeling.poolers import ROIPooler 26 | from detectron2.structures import Boxes 27 | from detectron2.utils.memory import retry_if_cuda_oom 28 | from detectron2.layers import paste_masks_in_image 29 | 30 | class HostDeviceMem(object): 31 | def __init__(self, host_mem, device_mem): 32 | self.host = host_mem 33 | self.device = device_mem 34 | 35 | def __str__(self): 36 | return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) 37 | 38 | def __repr__(self): 39 | return self.__str__() 40 | 41 | class BaseEngine(object): 42 | def __init__(self, engine_path, imgsz=(320,320)): 43 | self.imgsz = imgsz 44 | self.mean = None 45 | self.std = None 46 | self.class_names = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 47 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 48 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 49 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 50 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 51 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 52 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 53 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 54 | 'hair drier', 'toothbrush' ] 55 | 56 | f = args.onnx_model 57 | model_onnx = onnx.load(f) 58 | self.input_shapes = [[d.dim_value for d in _input.type.tensor_type.shape.dim] for _input in model_onnx.graph.input] 59 | 60 | 61 | logger = trt.Logger(trt.Logger.WARNING) 62 | trt.init_libnvinfer_plugins(logger,'') 63 | runtime = trt.Runtime(logger) 64 | with open(engine_path, "rb") as f: 65 | serialized_engine = f.read() 66 | engine = runtime.deserialize_cuda_engine(serialized_engine) 67 | self.context = engine.create_execution_context() 68 | self.inputs, self.outputs, self.bindings = [], [], [] 69 | self.stream = cuda.Stream() 70 | for binding in engine: 71 | size = trt.volume(engine.get_binding_shape(binding)) 72 | dtype = trt.nptype(engine.get_binding_dtype(binding)) 73 | host_mem = cuda.pagelocked_empty(size, dtype) 74 | device_mem = cuda.mem_alloc(host_mem.nbytes) 75 | self.bindings.append(int(device_mem)) 76 | if engine.binding_is_input(binding): 77 | self.inputs.append(HostDeviceMem(host_mem, device_mem)) 78 | else: 79 | self.outputs.append(HostDeviceMem(host_mem, device_mem)) 80 | def PreProcess(self, image_path): 81 | image = cv2.imread(image_path) 82 | real_image = image.copy() 83 | img = letterbox(image, self.imgsz, stride=64, auto=True)[0] 84 | if (np.shape(image) != self.input_shapes[0][2:4]): #Not the same shape as the input of the onnx model, needs to implement dynamical shape 85 | image = (cv2.resize(image, self.input_shapes[0][2:4])) 86 | img = transforms.ToTensor()(img) 87 | img = torch.unsqueeze(img, 0) 88 | return img, real_image 89 | def PostProcess(self,img, hyp, inf_out, attn, bases, sem_output, real_image): 90 | bases = torch.cat([bases, sem_output], dim=1) 91 | nb, _, height, width = img.shape 92 | pooler_scale = 0.25 #model.pooler_scale 93 | pooler = ROIPooler(output_size=hyp['mask_resolution'], scales=(pooler_scale,), sampling_ratio=1, pooler_type='ROIAlignV2', canonical_level=2) 94 | 95 | output, output_mask = non_max_suppression_mask_conf(inf_out, attn, bases, pooler, hyp, conf_thres=0.25, iou_thres=0.65, merge=False, mask_iou=None) 96 | 97 | pred, pred_masks = output[0], output_mask[0] 98 | if pred is not None : 99 | bboxes = Boxes(pred[:, :4]) 100 | original_pred_masks = pred_masks.view(-1, hyp['mask_resolution'], hyp['mask_resolution']) 101 | pred_masks = retry_if_cuda_oom(paste_masks_in_image)( original_pred_masks, bboxes, (height, width), threshold=0.5) 102 | pred_masks_np = pred_masks.detach().cpu().numpy() 103 | pred_conf = pred[:, 4].detach().cpu().numpy() 104 | nimg = img[0].permute(1, 2, 0) * 255 105 | nimg = nimg.cpu().numpy().astype(np.uint8) 106 | nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR) 107 | pnimg = nimg.copy() 108 | nimg[:,:] = nimg[:,:]*0 109 | cnimg = nimg.copy() 110 | ite = 0 111 | for one_mask, conf in zip(pred_masks_np, pred_conf): 112 | cnimg[:,:] = cnimg[:,:]*0 113 | if conf < 0.25: 114 | continue 115 | color = [0,255,0] 116 | pnimg[one_mask] = pnimg[one_mask] * 0.5 + np.array(color, dtype=np.uint8) * 0.5 117 | cnimg[one_mask] = cnimg[one_mask]*0 + 255 118 | nimg[one_mask] = nimg[one_mask]*0 + 255 119 | ite +=1 120 | else : 121 | nimg = img[0].permute(1, 2, 0) * 255 122 | nimg = nimg.cpu().numpy().astype(np.uint8) 123 | nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR) 124 | pnimg = nimg.copy() 125 | real_image = real_image 126 | return pnimg, nimg, real_image 127 | 128 | def infer(self, img): 129 | img = np.ascontiguousarray(img, dtype=np.float32) 130 | self.inputs[0].host = img 131 | for inp in self.inputs: 132 | cuda.memcpy_htod_async(inp.device, inp.host, self.stream) 133 | self.context.execute_async_v2( 134 | bindings=self.bindings, 135 | stream_handle=self.stream.handle) 136 | for out in self.outputs: 137 | cuda.memcpy_dtoh_async(out.host, out.device, self.stream) 138 | self.stream.synchronize() 139 | data = [out.host for out in self.outputs] 140 | return data 141 | 142 | def inference(self, dataset, conf=0.25): 143 | with open('data/hyp.scratch.mask.yaml') as f: 144 | hyp = yaml.load(f, Loader=yaml.FullLoader) 145 | vid_writer = None 146 | vid_path = None 147 | imh_path_alone = "data/horses.jpg" 148 | img, real_image = self.PreProcess(imh_path_alone) 149 | for _ in range(5): 150 | output = self.infer(img) #dry run 151 | iteration = 0 152 | 153 | for path, im0s, vid_cap in dataset: 154 | real_image = im0s.copy() 155 | img = letterbox(im0s, self.imgsz, stride=64, auto=True)[0] 156 | if (np.shape(image)[0:2] != self.input_shapes[0][2:4]): #Not the same shape as the input of the onnx model, needs to implement dynamical shape 157 | print("/!\ Shape of the input " + str(np.shape(image)[0:2]) + " different from the input size of the ONNX model "+ str(self.input_shapes[0][2:4])+", have to resize the image.") 158 | image = (cv2.resize(image, (self.input_shapes[0][3], self.input_shapes[0][2]))) 159 | img = transforms.ToTensor()(img) 160 | img = torch.unsqueeze(img, 0) 161 | 162 | output = self.infer(img) 163 | 164 | for i in range(len(output)): 165 | output[i] = torch.tensor(output[i]) 166 | inf_out = torch.reshape((output[5]), (1, len((output[5]))//85,85)) 167 | attn = torch.reshape((output[6]), (1, (len((output[6]))//980),980)) 168 | bases = torch.reshape( (output[0]), (1, 4, ((len(output[0])//(self.imgsz[0]//4))//4), (self.imgsz[0]//4))) 169 | sem_output = torch.reshape((output[1]), (1, 1, (len(output[1])//(self.imgsz[0]//4)), (self.imgsz[0]//4))) 170 | 171 | pnimg, nimg, real_image = self.PostProcess(img, hyp, inf_out, attn, bases, sem_output, real_image) 172 | 173 | if args.save_video: 174 | 175 | if vid_path != args.save_path: # new video 176 | vid_path = args.save_path 177 | if isinstance(vid_writer, cv2.VideoWriter): 178 | vid_writer.release() # release previous video writer 179 | if vid_cap: # video 180 | fps = vid_cap.get(cv2.CAP_PROP_FPS) 181 | w = int(self.imgsz[0]) 182 | h = int(self.imgsz[1]) 183 | vid_writer = cv2.VideoWriter(str(args.save_path), cv2.VideoWriter_fourcc(*'mp4v'), fps, (h, w)) 184 | vid_writer.write(pnimg) 185 | iteration += 1 186 | 187 | def get_parser(): 188 | parser = argparse.ArgumentParser( 189 | description="Detectron2 demo for builtin models") 190 | parser.add_argument( 191 | "--input", 192 | default="data/horses.jpg", 193 | nargs="+", 194 | help="A file or directory of your input data " 195 | "If not given, will show output in an OpenCV window.", 196 | ) 197 | parser.add_argument( 198 | "--model", 199 | default='./engineyolov7-mask.engine', 200 | help="A file or directory of your model ", 201 | ) 202 | parser.add_argument( 203 | "--model_onnx", 204 | default='onnx/yolov7-mask.onnx', 205 | help="A file or directory of your model ", 206 | ) 207 | parser.add_argument( 208 | "--imgsz", 209 | default=640, 210 | type=int, 211 | help="A file or directory of your model ", 212 | ) 213 | parser.add_argument( 214 | "--save_video", 215 | action="store_true", 216 | ) 217 | parser.add_argument( 218 | "--save_path", 219 | help="A file or directory of your output images ", 220 | ) 221 | 222 | return parser 223 | 224 | args = get_parser().parse_args() 225 | arg_input = args.input 226 | if (args.save_path is None and args.save_video): 227 | print("You need a result directory : mkdir results && --save_path results/name_video.mp4") 228 | exit(0) 229 | dataset = LoadImages(arg_input[0], img_size=320, stride=64) 230 | pred = BaseEngine(engine_path=args.model, imgsz=(args.imgsz,args.imgsz)) 231 | origin_img = pred.inference(dataset) 232 | 233 | 234 | --------------------------------------------------------------------------------