├── .gitignore ├── Dockerfile_base ├── Dockerfile_torchserve_tensorrt ├── LICENSE ├── README.md ├── config └── config.properties ├── data ├── argoverse_hd.yaml ├── coco.yaml ├── coco128.yaml ├── hyp.finetune.yaml ├── hyp.scratch.yaml ├── images │ ├── 1.jpg │ ├── FDDB.png │ ├── Widerface.jpg │ ├── bad_image.jpg │ ├── bus.jpg │ ├── head.jpg │ ├── head_down.jpg │ ├── head_small.jpg │ ├── mask.jpg │ ├── test.jpg │ ├── test_down.jpg │ └── zidane.jpg ├── retinaface2yolo.py ├── scripts │ ├── get_argoverse_hd.sh │ ├── get_coco.sh │ └── get_voc.sh ├── train2yolo.py ├── val2yolo.py ├── voc.yaml └── widerface.yaml ├── detect_face.py ├── docker ├── .env ├── Dockerfile ├── Dockerfile_model └── docker-compose.yml ├── go.mod ├── go ├── readme.md ├── results.go ├── service.go ├── service_test.go ├── test │ ├── perf_test.go │ └── zidane.jpg └── util.go ├── hubconf.py ├── models ├── __init__.py ├── common.py ├── experimental.py ├── export.py ├── yolo.py ├── yolo4train.py ├── yolov5l.yaml ├── yolov5l6.yaml ├── yolov5m.yaml ├── yolov5m6.yaml ├── yolov5n-0.5.yaml ├── yolov5n.yaml ├── yolov5n6.yaml ├── yolov5s.yaml └── yolov5s6.yaml ├── requirements.txt ├── scripts └── build_model.sh ├── test.py ├── test_widerface.py ├── torchserve ├── __init__.py ├── api.py ├── client.py ├── grpc │ ├── inference_pb2.py │ └── inference_pb2_grpc.py ├── handler.py ├── model_repack.py ├── qpstest.py ├── readme_torchserve_depricated.md ├── resize_client │ ├── client.py │ ├── client_utils.py │ ├── client_utils_old.py │ └── handler (copy).py └── run.sh ├── train.py ├── utils ├── __init__.py ├── activations.py ├── autoanchor.py ├── aws │ ├── __init__.py │ ├── mime.sh │ ├── resume.py │ └── userdata.sh ├── datasets.py ├── face_datasets.py ├── general.py ├── google_app_engine │ ├── Dockerfile │ ├── additional_requirements.txt │ └── app.yaml ├── google_utils.py ├── infer_utils.py ├── loss.py ├── metrics.py ├── plots.py ├── torch_utils.py └── wandb_logging │ ├── __init__.py │ ├── log_dataset.py │ └── wandb_utils.py └── weights ├── download_weights.sh └── yolov5s-face.pth /.gitignore: -------------------------------------------------------------------------------- 1 | logs/ 2 | *.pt 3 | #*.pth 4 | *.onnx 5 | *.trt 6 | *.torch2trt 7 | torch2trt/ 8 | result.jpg 9 | __pycache__/ 10 | torchserve/model_store/ 11 | *.pyc 12 | -------------------------------------------------------------------------------- /Dockerfile_base: -------------------------------------------------------------------------------- 1 | FROM nvcr.io/nvidia/tensorrt:20.12-py3 2 | # include ubuntu20.04, cuda, cudnn, tensorRT, pytorch 3 | 4 | COPY requirements.txt requirements.txt 5 | 6 | # setup torch2trt 7 | RUN python3 -m pip install pip --upgrade 8 | RUN python3 -m pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple 9 | 10 | # setup java-11 11 | RUN wget download.op.uzoo.cn/openjdk-11+28_linux-x64_bin.tar.gz 12 | RUN tar xvf openjdk-11+28_linux-x64_bin.tar.gz -C /usr/local/ 13 | RUN ln -s /usr/local/jdk-11 /usr/java11 14 | RUN ln -s /usr/java11/bin/java /usr/local/bin/java 15 | 16 | RUN echo export JAVA_HOME=/usr/java11 >> /etc/profile 17 | RUN echo export JRE_HOME=$JAVA_HOME/jre >> /etc/profile 18 | RUN echo export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib >> /etc/profile 19 | RUN echo export PATH=${JAVA_HOME}/bin:$PATH >> /etc/profile 20 | RUN rm openjdk-11+28_linux-x64_bin.tar.gz 21 | -------------------------------------------------------------------------------- /Dockerfile_torchserve_tensorrt: -------------------------------------------------------------------------------- 1 | FROM base 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y netcat 5 | 6 | COPY weights/yolov5s-face.pth weights/yolov5s-face.pth 7 | 8 | COPY torchserve/ torchserve/ 9 | 10 | COPY models models 11 | 12 | COPY utils utils 13 | 14 | COPY torch2trt torch2trt 15 | #RUN git clone https://github.com/NVIDIA-AI-IOT/torch2trt 16 | 17 | RUN cd torch2trt && python3 setup.py install && cd .. && rm -rf torch2trt 18 | 19 | RUN python3 ./torchserve/model_repack.py --trt 1 # 依赖GPU,模型转trt格式,需要一分钟 20 | 21 | EXPOSE 8080 8081 8082 7070 7071 22 | 23 | RUN chmod +x ./torchserve/run.sh 24 | 25 | ENTRYPOINT ["./torchserve/run.sh"] 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Face Detection Server with Torchserve and TensorRT (torch2trt) 3 | 4 | Inference yolov5_face model with Torchserve and TensorRT backend(recommended)with 11ms latency,query per second (QPS) 700 on T4 GPU server 5 | 6 | Traditional Torchserve pipeline with Jit TorchScript backend see **torchserve/readme_torchserve_depricated.md**. With higher latency and lower throughput. 7 | 8 | this repo is adapted from https://github.com/deepcam-cn/yolov5-face (Warning: GNU LICENSE) 9 | 1. Add Torchserve as Inference server 10 | 2. Accerlerated with TensorRT by torch2trt toolkit, with 10x lower latency and 2x larger throughput. This is the first demo to show how serve TensorRT model on Torchserve as far as I know. 11 | 3. Add Docker and logging. 12 | 13 | Where Torchserve is a performant, flexible and easy to use tool for serving PyTorch eager mode and torschripted models https://github.com/pytorch/serve. 14 | 15 | TensorRT is a library developed by NVIDIA for faster inference on NVIDIA graphics processing units (GPUs). ... It can give around 4 to 5 times faster inference on many real-time services and embedded applications. https://github.com/NVIDIA/TensorRT 16 | 17 | "torch2trt" is a PyTorch to TensorRT converter which utilizes the TensorRT Python API. It remain the input/ouput of the model as Torch Tensor format. https://github.com/NVIDIA-AI-IOT/torch2trt 18 | 19 | ## Use TensorRT with Torchserve together in few lines of code 20 | 21 | torchserve can serve torch2trt model pretty well, simply by rewriting the handler like this. 22 | ``` 23 | from torch2trt import TRTModule 24 | 25 | class Yolov5FaceHandler(BaseHandler): 26 | def initialize(self, context): 27 | serialized_file = context.manifest["model"]["serializedFile"] 28 | if serialized_file.split(".")[-1] == "torch2trt": #if serializedFile ends with .torch2trt instead of .pt 29 | self._load_torchscript_model = self._load_torch2trt_model # overwrite load model function 30 | self.super().initializer(context) 31 | 32 | def _load_torch2trt_model(self, torch2trt_path): 33 | logger.info("Loading torch2trt model") 34 | model_trt = TRTModule() 35 | model_trt.load_state_dict(torch.load(torch2trt_path)) 36 | return model_trt 37 | ``` 38 | 39 | see https://github.com/pytorch/serve/issues/1243 for discussion 40 | 41 | ## why choose yolov5face as face detection model 42 | 1. currently (20210904) a SOTA face-detection model on widerface benchmark, balanced between speed and accuracy. 43 | 2. based on pytorch,easy to finetuning,easy to build inference server via torchserve. 44 | 45 | ## model local test and visualization 46 | ` 47 | python torchserve/qpstest.py --mode 1 --vis 1 --image data/images/test.jpg 48 | ` 49 | 50 | ## Torhcserve pipeline 51 | 1、decode the image from jpg,resize and padding to lower resolution as 320×320 for acceleration。 52 | 2、batch inference with TensorRT backend 53 | 3、revert face coords to the size of original resolution and return result with json format 54 | 55 | ## Interface protocal 56 | 57 | input:jpg data stream, 58 | output:json format, (bounding box, confidence, 5 landmarks) 59 | ```json 60 | [ 61 | { 62 | "xywh_ratio": [0.7689772367477417, 0.25734335581461587, 0.11677041053771975, 0.26296865675184466], 63 | "conf": 0.8641895651817322, 64 | "landmarks_ratio": [0.754405927658081, 0.22680193583170574, 0.8030961990356446, 0.23478228251139324, 0.7799828529357911, 0.2754765404595269, 0.7510656356811524, 0.31618389553493925, 0.7911150932312012, 0.32295591566297743] 65 | }, 66 | { 67 | "xywh_ratio": [0.4645264148712158, 0.47456512451171873, 0.12120456695556636, 0.29619462754991316], 68 | "conf": 0.7263935804367065, 69 | "landmarks_ratio": [0.4809267997741699, 0.44996253119574653, 0.5082815647125244, 0.4542162577311198, 0.5047649383544922, 0.5095860799153645, 0.4696146011352539, 0.5512683444552952, 0.4905359745025635, 0.5559690687391493] 70 | } 71 | ] 72 | ``` 73 | every image consists of N faces,every face include 3 keys: 74 | - xywh_ratio is face center coordinates and width and height, as ratio of the image size. 75 | - conf is the confidence of face detection from 0 to 1 76 | - landmarks_ratio are 5 coords of face landmarks, as ratio of the image size 77 | 78 | 79 | # QuickStart with Docker 80 | 81 | Follow below instructions to deploy yolov5face 82 | 83 | 1. cd yolov5face/docker 84 | 2. docker-compose up -d 85 | 86 | Configurations 87 | The yolov5face configurations are actually configures to torchserve. The configuration file locates at: 88 | yolov5face/config/config.properties 89 | 90 | The configuration items are the ip addresses and port that the service is binded to. 91 | 92 | The worker number of torchserve is currently hard fixed to 4. 93 | 94 | 95 | Bottlenecks: 96 | Each yolov5face torchserve consumes 2.5G memory in average, so memory of the system is a bottleneck. 97 | 98 | 99 | # Install Manually without Docker 100 | 101 | ### install dependencies 102 | ``` 103 | pip install -r requirements.txt 104 | ``` 105 | install java11 dependence. https://www.ubuntu18.com/ubuntu-install-openjdk-11/ 106 | On cloud server, if cuda version is different from cuda10.2, manually edit the pytorch version in requirements.txt 107 | https://pytorch.org/get-started/locally/ 108 | 109 | ### Download model file 110 | download 50M file **yolov5s** https://drive.google.com/file/d/1zxaHeLDyID9YU4-hqK7KNepXIwbTkRIO/view?usp=sharing 111 | unzip to folder 112 | ``` 113 | weights/yolov5s-face.pt 114 | ``` 115 | 116 | ### install TensorRT without docker 117 | download TensorRT-7 (compatible with torch2trt tool on 2021, maybe TensorRT-8 is also compatible for now) 118 | https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html 119 | recommend to install via tar.gz, which is compatible with conda environment 120 | https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html#installing-tar 121 | be aware to write correct ubuntu version,cuda version,and cudnn version 122 | 123 | ### install torch2trt 124 | ``` 125 | cd ~/ 126 | git clone https://github.com/NVIDIA-AI-IOT/torch2trt 127 | cd torch2trt 128 | python setup.py install 129 | ``` 130 | 131 | ## Start Server and Register Model 132 | 1. pack models and python code to torchserve .mar format. Backends with TensorRT. 133 | ``` 134 | python ./torchserve/model_repack.py --trt 1 135 | ``` 136 | will generate a file "./torchserve/model_store/trt_fd1.mar". 137 | - start server 138 | ``` 139 | torchserve --start --ncs --model-store ./torchserve/model_store/ 140 | ``` 141 | - localhost register model 142 | ``` 143 | curl -X POST "127.0.0.1:8081/models?url=trt_fd1.mar&batch_size=1&max_batch_delay=2&initial_workers=4&model_name=fd1" 144 | ``` 145 | Note that 146 | 1) url=trt_fd1.mar 147 | 2) batch_size=1 148 | 3) initial_workers=2 149 | where 2 is the number of cpu cores on your server, and require 3 * 2 GB system memory. 150 | 151 | 152 | 153 | # docker install and run 154 | ### git clone torch2trt 155 | ``` 156 | git clone https://github.com/NVIDIA-AI-IOT/torch2trt --branch v0.3.0 157 | ``` 158 | ### pull nvidia tensorrt7 docker image, (包含ubuntu20.04, cuda, cudnn, tensorrt7.2.2), 时间较长 159 | ``` 160 | docker pull nvcr.io/nvidia/tensorrt:20.12-py3 161 | ``` 162 | ### install nvidia-container-runtime 163 | why this step? docker build need GPU and torch2trt to convert the model,see https://stackoverflow.com/questions/59691207/docker-build-with-nvidia-runtime 164 | 1. Install nvidia-container-runtime: 165 | ``` 166 | sudo apt-get install nvidia-container-runtime 167 | ``` 168 | 2. Edit/create the **/etc/docker/daemon.json** with content: 169 | ``` 170 | { 171 | "runtimes": { 172 | "nvidia": { 173 | "path": "/usr/bin/nvidia-container-runtime", 174 | "runtimeArgs": [] 175 | } 176 | }, 177 | "default-runtime": "nvidia" 178 | } 179 | ``` 180 | 3. Restart docker daemon: 181 | ``` 182 | sudo systemctl restart docker 183 | ``` 184 | 185 | ### build environment docker image 186 | ``` 187 | docker build -f Dockerfile_base --tag base --progress=plain . 188 | ``` 189 | ### build final docker image 190 | ``` 191 | docker build -f Dockerfile_torchserve_tensorrt --tag ts_trt --progress=plain . 192 | ``` 193 | ### run 194 | ``` 195 | docker run --gpus all -it --rm --name test -p 8080:8080 -p 8081:8081 -p 8082:8082 -p 7070:7070 -p 7071:7071 ts_trt 196 | ``` 197 | It will run torchserve/run.sh inside the image. 198 | 1、start torchserve 199 | 2、register the model 200 | success log: 201 | {"status": "Model \"fd1\" Version: 1.0 registered with 4 initial workers"} 202 | 203 | 204 | # TEST 205 | QPS test 206 | ``` 207 | python torchserve/qpstest.py --mode 3 208 | ``` 209 | visualization 210 | ``` 211 | python torchserve/qpstest.py --mode 3 --vis 1 212 | ``` 213 | ### API example 214 | ` 215 | python torchserve/api.py 216 | ` 217 | or 218 | ` 219 | curl 127.0.0.1:8080/predictions/fd1 -T ./data/images/zidane.jpg 220 | ` 221 | ### QPS test on torchserve 222 | ``` 223 | bash ./torchserve/run.sh 224 | python torchserve/qpstest.py --mode 3 225 | ``` 226 | QPS test on local torchscript model 227 | ``` 228 | python ./torchserve/model_repack.py --trt 0 229 | python torchserve/qpstest.py --mode 1 230 | ``` 231 | QPS test on local tensorrt model 232 | ``` 233 | python ./torchserve/model_repack.py --trt 1 234 | python torchserve/qpstest.py --mode 2 235 | ``` 236 | 237 | 238 | -------------------------------------------------------------------------------- /config/config.properties: -------------------------------------------------------------------------------- 1 | # Inference API binding address. Default: http://127.0.0.1:8080 2 | inference_address=http://0.0.0.0:8080 3 | # Management API binding address. Default: http://127.0.0.1:8081 4 | management_address=http://0.0.0.0:8081 5 | # Metrics API binding address. Default: http://127.0.0.1:8082 6 | metrics_address=http://0.0.0.0:8082 7 | # Maximum number of GPUs that TorchServe can use for inference. Default: all available GPUs in system, if the value is larger than the GPU number torchserve will adjust it to real GPU number 8 | # number_of_gpu=10 -------------------------------------------------------------------------------- /data/argoverse_hd.yaml: -------------------------------------------------------------------------------- 1 | # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ 2 | # Train command: python train.py --data argoverse_hd.yaml 3 | # Default dataset location is next to /yolov5: 4 | # /parent_folder 5 | # /argoverse 6 | # /yolov5 7 | 8 | 9 | # download command/URL (optional) 10 | download: bash data/scripts/get_argoverse_hd.sh 11 | 12 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] 13 | train: ../argoverse/Argoverse-1.1/images/train/ # 39384 images 14 | val: ../argoverse/Argoverse-1.1/images/val/ # 15062 iamges 15 | test: ../argoverse/Argoverse-1.1/images/test/ # Submit to: https://eval.ai/web/challenges/challenge-page/800/overview 16 | 17 | # number of classes 18 | nc: 8 19 | 20 | # class names 21 | names: [ 'person', 'bicycle', 'car', 'motorcycle', 'bus', 'truck', 'traffic_light', 'stop_sign' ] 22 | -------------------------------------------------------------------------------- /data/coco.yaml: -------------------------------------------------------------------------------- 1 | # COCO 2017 dataset http://cocodataset.org 2 | # Train command: python train.py --data coco.yaml 3 | # Default dataset location is next to /yolov5: 4 | # /parent_folder 5 | # /coco 6 | # /yolov5 7 | 8 | 9 | # download command/URL (optional) 10 | download: bash data/scripts/get_coco.sh 11 | 12 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] 13 | train: ../coco/train2017.txt # 118287 images 14 | val: ../coco/val2017.txt # 5000 images 15 | test: ../coco/test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 16 | 17 | # number of classes 18 | nc: 80 19 | 20 | # class names 21 | names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 22 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 23 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 24 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 25 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 26 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 27 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 28 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 29 | 'hair drier', 'toothbrush' ] 30 | 31 | # Print classes 32 | # with open('data/coco.yaml') as f: 33 | # d = yaml.load(f, Loader=yaml.FullLoader) # dict 34 | # for i, x in enumerate(d['names']): 35 | # print(i, x) 36 | -------------------------------------------------------------------------------- /data/coco128.yaml: -------------------------------------------------------------------------------- 1 | # COCO 2017 dataset http://cocodataset.org - first 128 training images 2 | # Train command: python train.py --data coco128.yaml 3 | # Default dataset location is next to /yolov5: 4 | # /parent_folder 5 | # /coco128 6 | # /yolov5 7 | 8 | 9 | # download command/URL (optional) 10 | download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip 11 | 12 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] 13 | train: ../coco128/images/train2017/ # 128 images 14 | val: ../coco128/images/train2017/ # 128 images 15 | 16 | # number of classes 17 | nc: 80 18 | 19 | # class names 20 | names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 21 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 22 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 23 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 24 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 25 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 26 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 27 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 28 | 'hair drier', 'toothbrush' ] 29 | -------------------------------------------------------------------------------- /data/hyp.finetune.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for VOC finetuning 2 | # python train.py --batch 64 --weights yolov5m.pt --data voc.yaml --img 512 --epochs 50 3 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 4 | 5 | 6 | # Hyperparameter Evolution Results 7 | # Generations: 306 8 | # P R mAP.5 mAP.5:.95 box obj cls 9 | # Metrics: 0.6 0.936 0.896 0.684 0.0115 0.00805 0.00146 10 | 11 | lr0: 0.0032 12 | lrf: 0.12 13 | momentum: 0.843 14 | weight_decay: 0.00036 15 | warmup_epochs: 2.0 16 | warmup_momentum: 0.5 17 | warmup_bias_lr: 0.05 18 | box: 0.0296 19 | cls: 0.243 20 | cls_pw: 0.631 21 | obj: 0.301 22 | obj_pw: 0.911 23 | iou_t: 0.2 24 | anchor_t: 2.91 25 | # anchors: 3.63 26 | fl_gamma: 0.0 27 | hsv_h: 0.0138 28 | hsv_s: 0.664 29 | hsv_v: 0.464 30 | degrees: 0.373 31 | translate: 0.245 32 | scale: 0.898 33 | shear: 0.602 34 | perspective: 0.0 35 | flipud: 0.00856 36 | fliplr: 0.5 37 | mosaic: 1.0 38 | mixup: 0.243 39 | -------------------------------------------------------------------------------- /data/hyp.scratch.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for COCO training from scratch 2 | # python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300 3 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 4 | 5 | 6 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 7 | lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) 8 | momentum: 0.937 # SGD momentum/Adam beta1 9 | weight_decay: 0.0005 # optimizer weight decay 5e-4 10 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 11 | warmup_momentum: 0.8 # warmup initial momentum 12 | warmup_bias_lr: 0.1 # warmup initial bias lr 13 | box: 0.05 # box loss gain 14 | cls: 0.5 # cls loss gain 15 | landmark: 0.005 # landmark loss gain 16 | cls_pw: 1.0 # cls BCELoss positive_weight 17 | obj: 1.0 # obj loss gain (scale with pixels) 18 | obj_pw: 1.0 # obj BCELoss positive_weight 19 | iou_t: 0.20 # IoU training threshold 20 | anchor_t: 4.0 # anchor-multiple threshold 21 | # anchors: 3 # anchors per output layer (0 to ignore) 22 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 23 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 24 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 25 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 26 | degrees: 0.0 # image rotation (+/- deg) 27 | translate: 0.1 # image translation (+/- fraction) 28 | scale: 0.5 # image scale (+/- gain) 29 | shear: 0.5 # image shear (+/- deg) 30 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 31 | flipud: 0.0 # image flip up-down (probability) 32 | fliplr: 0.5 # image flip left-right (probability) 33 | mosaic: 0.5 # image mosaic (probability) 34 | mixup: 0.0 # image mixup (probability) 35 | -------------------------------------------------------------------------------- /data/images/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/data/images/1.jpg -------------------------------------------------------------------------------- /data/images/FDDB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/data/images/FDDB.png -------------------------------------------------------------------------------- /data/images/Widerface.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/data/images/Widerface.jpg -------------------------------------------------------------------------------- /data/images/bad_image.jpg: -------------------------------------------------------------------------------- 1 | bad_image 2 | -------------------------------------------------------------------------------- /data/images/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/data/images/bus.jpg -------------------------------------------------------------------------------- /data/images/head.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/data/images/head.jpg -------------------------------------------------------------------------------- /data/images/head_down.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/data/images/head_down.jpg -------------------------------------------------------------------------------- /data/images/head_small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/data/images/head_small.jpg -------------------------------------------------------------------------------- /data/images/mask.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/data/images/mask.jpg -------------------------------------------------------------------------------- /data/images/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/data/images/test.jpg -------------------------------------------------------------------------------- /data/images/test_down.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/data/images/test_down.jpg -------------------------------------------------------------------------------- /data/images/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/data/images/zidane.jpg -------------------------------------------------------------------------------- /data/retinaface2yolo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import sys 4 | import torch 5 | import torch.utils.data as data 6 | import cv2 7 | import numpy as np 8 | 9 | class WiderFaceDetection(data.Dataset): 10 | def __init__(self, txt_path, preproc=None): 11 | self.preproc = preproc 12 | self.imgs_path = [] 13 | self.words = [] 14 | f = open(txt_path,'r') 15 | lines = f.readlines() 16 | isFirst = True 17 | labels = [] 18 | for line in lines: 19 | line = line.rstrip() 20 | if line.startswith('#'): 21 | if isFirst is True: 22 | isFirst = False 23 | else: 24 | labels_copy = labels.copy() 25 | self.words.append(labels_copy) 26 | labels.clear() 27 | path = line[2:] 28 | path = txt_path.replace('label.txt','images/') + path 29 | self.imgs_path.append(path) 30 | else: 31 | line = line.split(' ') 32 | label = [float(x) for x in line] 33 | labels.append(label) 34 | 35 | self.words.append(labels) 36 | 37 | def __len__(self): 38 | return len(self.imgs_path) 39 | 40 | def __getitem__(self, index): 41 | img = cv2.imread(self.imgs_path[index]) 42 | height, width, _ = img.shape 43 | 44 | labels = self.words[index] 45 | annotations = np.zeros((0, 15)) 46 | if len(labels) == 0: 47 | return annotations 48 | for idx, label in enumerate(labels): 49 | annotation = np.zeros((1, 15)) 50 | # bbox 51 | annotation[0, 0] = label[0] # x1 52 | annotation[0, 1] = label[1] # y1 53 | annotation[0, 2] = label[0] + label[2] # x2 54 | annotation[0, 3] = label[1] + label[3] # y2 55 | 56 | # landmarks 57 | annotation[0, 4] = label[4] # l0_x 58 | annotation[0, 5] = label[5] # l0_y 59 | annotation[0, 6] = label[7] # l1_x 60 | annotation[0, 7] = label[8] # l1_y 61 | annotation[0, 8] = label[10] # l2_x 62 | annotation[0, 9] = label[11] # l2_y 63 | annotation[0, 10] = label[13] # l3_x 64 | annotation[0, 11] = label[14] # l3_y 65 | annotation[0, 12] = label[16] # l4_x 66 | annotation[0, 13] = label[17] # l4_y 67 | if (annotation[0, 4]<0): 68 | annotation[0, 14] = -1 69 | else: 70 | annotation[0, 14] = 1 71 | 72 | annotations = np.append(annotations, annotation, axis=0) 73 | target = np.array(annotations) 74 | if self.preproc is not None: 75 | img, target = self.preproc(img, target) 76 | 77 | return torch.from_numpy(img), target 78 | 79 | def detection_collate(batch): 80 | """Custom collate fn for dealing with batches of images that have a different 81 | number of associated object annotations (bounding boxes). 82 | 83 | Arguments: 84 | batch: (tuple) A tuple of tensor images and lists of annotations 85 | 86 | Return: 87 | A tuple containing: 88 | 1) (tensor) batch of images stacked on their 0 dim 89 | 2) (list of tensors) annotations for a given image are stacked on 0 dim 90 | """ 91 | targets = [] 92 | imgs = [] 93 | for _, sample in enumerate(batch): 94 | for _, tup in enumerate(sample): 95 | if torch.is_tensor(tup): 96 | imgs.append(tup) 97 | elif isinstance(tup, type(np.empty(0))): 98 | annos = torch.from_numpy(tup).float() 99 | targets.append(annos) 100 | 101 | return (torch.stack(imgs, 0), targets) 102 | 103 | save_path = '/ssd_1t/derron/yolov5-face/data/widerface/train' 104 | aa=WiderFaceDetection("/ssd_1t/derron/yolov5-face/data/widerface/widerface/train/label.txt") 105 | for i in range(len(aa.imgs_path)): 106 | print(i, aa.imgs_path[i]) 107 | img = cv2.imread(aa.imgs_path[i]) 108 | base_img = os.path.basename(aa.imgs_path[i]) 109 | base_txt = os.path.basename(aa.imgs_path[i])[:-4] +".txt" 110 | save_img_path = os.path.join(save_path, base_img) 111 | save_txt_path = os.path.join(save_path, base_txt) 112 | with open(save_txt_path, "w") as f: 113 | height, width, _ = img.shape 114 | labels = aa.words[i] 115 | annotations = np.zeros((0, 14)) 116 | if len(labels) == 0: 117 | continue 118 | for idx, label in enumerate(labels): 119 | annotation = np.zeros((1, 14)) 120 | # bbox 121 | label[0] = max(0, label[0]) 122 | label[1] = max(0, label[1]) 123 | label[2] = min(width - 1, label[2]) 124 | label[3] = min(height - 1, label[3]) 125 | annotation[0, 0] = (label[0] + label[2] / 2) / width # cx 126 | annotation[0, 1] = (label[1] + label[3] / 2) / height # cy 127 | annotation[0, 2] = label[2] / width # w 128 | annotation[0, 3] = label[3] / height # h 129 | #if (label[2] -label[0]) < 8 or (label[3] - label[1]) < 8: 130 | # img[int(label[1]):int(label[3]), int(label[0]):int(label[2])] = 127 131 | # continue 132 | # landmarks 133 | annotation[0, 4] = label[4] / width # l0_x 134 | annotation[0, 5] = label[5] / height # l0_y 135 | annotation[0, 6] = label[7] / width # l1_x 136 | annotation[0, 7] = label[8] / height # l1_y 137 | annotation[0, 8] = label[10] / width # l2_x 138 | annotation[0, 9] = label[11] / height # l2_y 139 | annotation[0, 10] = label[13] / width # l3_x 140 | annotation[0, 11] = label[14] / height # l3_y 141 | annotation[0, 12] = label[16] / width # l4_x 142 | annotation[0, 13] = label[17] / height # l4_y 143 | str_label="0 " 144 | for i in range(len(annotation[0])): 145 | str_label =str_label+" "+str(annotation[0][i]) 146 | str_label = str_label.replace('[', '').replace(']', '') 147 | str_label = str_label.replace(',', '') + '\n' 148 | f.write(str_label) 149 | cv2.imwrite(save_img_path, img) 150 | 151 | -------------------------------------------------------------------------------- /data/scripts/get_argoverse_hd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ 3 | # Download command: bash data/scripts/get_argoverse_hd.sh 4 | # Train command: python train.py --data argoverse_hd.yaml 5 | # Default dataset location is next to /yolov5: 6 | # /parent_folder 7 | # /argoverse 8 | # /yolov5 9 | 10 | # Download/unzip images 11 | d='../argoverse/' # unzip directory 12 | mkdir $d 13 | url=https://argoverse-hd.s3.us-east-2.amazonaws.com/ 14 | f=Argoverse-HD-Full.zip 15 | curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &# download, unzip, remove in background 16 | wait # finish background tasks 17 | 18 | cd ../argoverse/Argoverse-1.1/ 19 | ln -s tracking images 20 | 21 | cd ../Argoverse-HD/annotations/ 22 | 23 | python3 - "$@" <train.txt 91 | cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt 92 | 93 | python3 - "$@" < 0 && s.maxHeight > 0 { 114 | return width > s.maxWidth || height > s.maxHeight 115 | } else { 116 | return false 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /go/service_test.go: -------------------------------------------------------------------------------- 1 | package yolov5face_test 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "gitlab.gtvorg.tk/pannixilin/yolov5face/go" 8 | ) 9 | 10 | func Example() { 11 | var ( 12 | result *yolov5face.Result 13 | service *yolov5face.Service 14 | imgFile string 15 | err error 16 | ) 17 | 18 | imgFile = "test/zidane.jpg" 19 | 20 | // service only need to be created once 21 | service = yolov5face.NewHttpService( 22 | "http://localhost:8080/predictions/fd1", 23 | time.Millisecond * time.Duration(5000), 24 | 1920, 25 | 1080, 26 | ) 27 | if !service.IsAlive() { 28 | fmt.Println("service is not availabe") 29 | return 30 | } 31 | 32 | if result, err = service.DetectHeadFromFile(imgFile); err != nil { 33 | fmt.Println(err) 34 | return 35 | } 36 | 37 | fmt.Printf("image file: %s\n", imgFile) 38 | fmt.Printf("image width: %d\n", result.Width) 39 | fmt.Printf("image height: %d\n", result.Height) 40 | for i, h := range result.Heads { 41 | fmt.Printf( 42 | "head[%d]: Box[x:%d, y:%d, x:%d, y:%d], Score: %f\n", 43 | i, h.Box[0], h.Box[1], h.Box[2], h.Box[3], h.Score) 44 | } 45 | 46 | // Output: 47 | // image file: test/zidane.jpg 48 | // image width: 1280 49 | // image height: 720 50 | // head[0]: Box[x:910, y:91, x:1058, y:279], Score: 0.864714 51 | // head[1]: Box[x:518, y:236, x:672, y:448], Score: 0.726488 52 | } 53 | -------------------------------------------------------------------------------- /go/test/perf_test.go: -------------------------------------------------------------------------------- 1 | package unitests 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "testing" 7 | "time" 8 | 9 | "gitlab.gtvorg.tk/pannixilin/yolov5face/go" 10 | ) 11 | 12 | func BenchmarkPerf(b *testing.B) { 13 | var ( 14 | result *yolov5face.Result 15 | service *yolov5face.Service 16 | imgByte []byte 17 | file string 18 | err error 19 | ) 20 | 21 | file = os.Args[len(os.Args) - 1] 22 | 23 | service = yolov5face.NewHttpService( 24 | "http://localhost:8080/predictions/fd1", 25 | time.Millisecond * time.Duration(50), 26 | 1920, 27 | 1080, 28 | ) 29 | if !service.IsAlive() { 30 | b.Error("service is not availabe") 31 | b.FailNow() 32 | } 33 | 34 | if imgByte, err = ioutil.ReadFile(file); err != nil { 35 | b.Error(err) 36 | } 37 | 38 | test := func () { 39 | if result, err = service.DetectHeadFromByte(imgByte); err != nil { 40 | b.Log(err) 41 | } 42 | _ = result 43 | } 44 | 45 | b.ResetTimer() 46 | b.RunParallel(func(pb *testing.PB) { 47 | for pb.Next() { 48 | test() 49 | } 50 | }) 51 | } 52 | -------------------------------------------------------------------------------- /go/test/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/go/test/zidane.jpg -------------------------------------------------------------------------------- /go/util.go: -------------------------------------------------------------------------------- 1 | package yolov5face 2 | 3 | import ( 4 | "image" 5 | _ "image/gif" 6 | _ "image/jpeg" 7 | _ "image/png" 8 | "io" 9 | ) 10 | 11 | func HttpResult2Result(width int, height int, httpResults []HttpResult) *Result { 12 | var res = Result{Width: width, Height: height} 13 | for _, v := range httpResults { 14 | centr_x := int(float32(width) * v.Xywh_ratio[0]) 15 | centr_y := int(float32(height) * v.Xywh_ratio[1]) 16 | head_width := int(float32(width) * v.Xywh_ratio[2]) 17 | head_height := int(float32(height) * v.Xywh_ratio[3]) 18 | upLeft_x := centr_x - head_width / 2 19 | upLeft_y := centr_y - head_height / 2 20 | bottomRight_x := centr_x + head_width / 2 21 | bottomRight_y := centr_y + head_height / 2 22 | h := Head{ 23 | Box: [4]int{upLeft_x, upLeft_y, bottomRight_x, bottomRight_y}, 24 | Score: v.Conf, 25 | } 26 | res.Heads = append(res.Heads, h) 27 | } 28 | return &res 29 | } 30 | 31 | func GetImageSize(img io.Reader) (width int, height int, err error) { 32 | if image, _, err := image.DecodeConfig(img); err == nil { 33 | width = image.Width 34 | height = image.Height 35 | } else { 36 | width = 0 37 | height = 0 38 | } 39 | return 40 | } 41 | 42 | // unify the width and height to the size of a horizontal image 43 | func unifyImageWidthHeight(width int, height int) (width_unified int, height_unified int) { 44 | if width >= height { 45 | width_unified = width 46 | height_unified = height 47 | } else { 48 | width_unified = height 49 | height_unified = width 50 | } 51 | return 52 | } 53 | -------------------------------------------------------------------------------- /hubconf.py: -------------------------------------------------------------------------------- 1 | """File for accessing YOLOv5 via PyTorch Hub https://pytorch.org/hub/ 2 | 3 | Usage: 4 | import torch 5 | model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, channels=3, classes=80) 6 | """ 7 | 8 | from pathlib import Path 9 | 10 | import torch 11 | 12 | from models.yolo import Model 13 | from utils.general import set_logging 14 | from utils.google_utils import attempt_download 15 | 16 | dependencies = ['torch', 'yaml'] 17 | set_logging() 18 | 19 | 20 | def create(name, pretrained, channels, classes, autoshape): 21 | """Creates a specified YOLOv5 model 22 | 23 | Arguments: 24 | name (str): name of model, i.e. 'yolov5s' 25 | pretrained (bool): load pretrained weights into the model 26 | channels (int): number of input channels 27 | classes (int): number of model classes 28 | 29 | Returns: 30 | pytorch model 31 | """ 32 | config = Path(__file__).parent / 'models' / f'{name}.yaml' # model.yaml path 33 | try: 34 | model = Model(config, channels, classes) 35 | if pretrained: 36 | fname = f'{name}.pt' # checkpoint filename 37 | attempt_download(fname) # download if not found locally 38 | ckpt = torch.load(fname, map_location=torch.device('cpu')) # load 39 | state_dict = ckpt['model'].float().state_dict() # to FP32 40 | state_dict = {k: v for k, v in state_dict.items() if model.state_dict()[k].shape == v.shape} # filter 41 | model.load_state_dict(state_dict, strict=False) # load 42 | if len(ckpt['model'].names) == classes: 43 | model.names = ckpt['model'].names # set class names attribute 44 | if autoshape: 45 | model = model.autoshape() # for file/URI/PIL/cv2/np inputs and NMS 46 | return model 47 | 48 | except Exception as e: 49 | help_url = 'https://github.com/ultralytics/yolov5/issues/36' 50 | s = 'Cache maybe be out of date, try force_reload=True. See %s for help.' % help_url 51 | raise Exception(s) from e 52 | 53 | 54 | def yolov5s(pretrained=False, channels=3, classes=80, autoshape=True): 55 | """YOLOv5-small model from https://github.com/ultralytics/yolov5 56 | 57 | Arguments: 58 | pretrained (bool): load pretrained weights into the model, default=False 59 | channels (int): number of input channels, default=3 60 | classes (int): number of model classes, default=80 61 | 62 | Returns: 63 | pytorch model 64 | """ 65 | return create('yolov5s', pretrained, channels, classes, autoshape) 66 | 67 | 68 | def yolov5m(pretrained=False, channels=3, classes=80, autoshape=True): 69 | """YOLOv5-medium model from https://github.com/ultralytics/yolov5 70 | 71 | Arguments: 72 | pretrained (bool): load pretrained weights into the model, default=False 73 | channels (int): number of input channels, default=3 74 | classes (int): number of model classes, default=80 75 | 76 | Returns: 77 | pytorch model 78 | """ 79 | return create('yolov5m', pretrained, channels, classes, autoshape) 80 | 81 | 82 | def yolov5l(pretrained=False, channels=3, classes=80, autoshape=True): 83 | """YOLOv5-large model from https://github.com/ultralytics/yolov5 84 | 85 | Arguments: 86 | pretrained (bool): load pretrained weights into the model, default=False 87 | channels (int): number of input channels, default=3 88 | classes (int): number of model classes, default=80 89 | 90 | Returns: 91 | pytorch model 92 | """ 93 | return create('yolov5l', pretrained, channels, classes, autoshape) 94 | 95 | 96 | def yolov5x(pretrained=False, channels=3, classes=80, autoshape=True): 97 | """YOLOv5-xlarge model from https://github.com/ultralytics/yolov5 98 | 99 | Arguments: 100 | pretrained (bool): load pretrained weights into the model, default=False 101 | channels (int): number of input channels, default=3 102 | classes (int): number of model classes, default=80 103 | 104 | Returns: 105 | pytorch model 106 | """ 107 | return create('yolov5x', pretrained, channels, classes, autoshape) 108 | 109 | 110 | def custom(path_or_model='path/to/model.pt', autoshape=True): 111 | """YOLOv5-custom model from https://github.com/ultralytics/yolov5 112 | 113 | Arguments (3 options): 114 | path_or_model (str): 'path/to/model.pt' 115 | path_or_model (dict): torch.load('path/to/model.pt') 116 | path_or_model (nn.Module): torch.load('path/to/model.pt')['model'] 117 | 118 | Returns: 119 | pytorch model 120 | """ 121 | model = torch.load(path_or_model) if isinstance(path_or_model, str) else path_or_model # load checkpoint 122 | if isinstance(model, dict): 123 | model = model['model'] # load model 124 | 125 | hub_model = Model(model.yaml).to(next(model.parameters()).device) # create 126 | hub_model.load_state_dict(model.float().state_dict()) # load state_dict 127 | hub_model.names = model.names # class names 128 | return hub_model.autoshape() if autoshape else hub_model 129 | 130 | 131 | if __name__ == '__main__': 132 | model = create(name='yolov5s', pretrained=True, channels=3, classes=80, autoshape=True) # pretrained example 133 | # model = custom(path_or_model='path/to/model.pt') # custom example 134 | 135 | # Verify inference 136 | from PIL import Image 137 | 138 | imgs = [Image.open(x) for x in Path('data/images').glob('*.jpg')] 139 | results = model(imgs) 140 | results.show() 141 | results.print() 142 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/models/__init__.py -------------------------------------------------------------------------------- /models/experimental.py: -------------------------------------------------------------------------------- 1 | # This file contains experimental modules 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from models.common import Conv, DWConv 8 | from utils.google_utils import attempt_download 9 | 10 | 11 | class CrossConv(nn.Module): 12 | # Cross Convolution Downsample 13 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 14 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 15 | super(CrossConv, self).__init__() 16 | c_ = int(c2 * e) # hidden channels 17 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 18 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 19 | self.add = shortcut and c1 == c2 20 | 21 | def forward(self, x): 22 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 23 | 24 | 25 | class Sum(nn.Module): 26 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 27 | def __init__(self, n, weight=False): # n: number of inputs 28 | super(Sum, self).__init__() 29 | self.weight = weight # apply weights boolean 30 | self.iter = range(n - 1) # iter object 31 | if weight: 32 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights 33 | 34 | def forward(self, x): 35 | y = x[0] # no weight 36 | if self.weight: 37 | w = torch.sigmoid(self.w) * 2 38 | for i in self.iter: 39 | y = y + x[i + 1] * w[i] 40 | else: 41 | for i in self.iter: 42 | y = y + x[i + 1] 43 | return y 44 | 45 | 46 | class GhostConv(nn.Module): 47 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 48 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups 49 | super(GhostConv, self).__init__() 50 | c_ = c2 // 2 # hidden channels 51 | self.cv1 = Conv(c1, c_, k, s, None, g, act) 52 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) 53 | 54 | def forward(self, x): 55 | y = self.cv1(x) 56 | return torch.cat([y, self.cv2(y)], 1) 57 | 58 | 59 | class GhostBottleneck(nn.Module): 60 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 61 | def __init__(self, c1, c2, k, s): 62 | super(GhostBottleneck, self).__init__() 63 | c_ = c2 // 2 64 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw 65 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 66 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear 67 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), 68 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() 69 | 70 | def forward(self, x): 71 | return self.conv(x) + self.shortcut(x) 72 | 73 | 74 | class MixConv2d(nn.Module): 75 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 76 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 77 | super(MixConv2d, self).__init__() 78 | groups = len(k) 79 | if equal_ch: # equal c_ per group 80 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices 81 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 82 | else: # equal weight.numel() per group 83 | b = [c2] + [0] * groups 84 | a = np.eye(groups + 1, groups, k=-1) 85 | a -= np.roll(a, 1, axis=1) 86 | a *= np.array(k) ** 2 87 | a[0] = 1 88 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 89 | 90 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) 91 | self.bn = nn.BatchNorm2d(c2) 92 | self.act = nn.LeakyReLU(0.1, inplace=True) 93 | 94 | def forward(self, x): 95 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 96 | 97 | 98 | class Ensemble(nn.ModuleList): 99 | # Ensemble of models 100 | def __init__(self): 101 | super(Ensemble, self).__init__() 102 | 103 | def forward(self, x, augment=False): 104 | y = [] 105 | for module in self: 106 | y.append(module(x, augment)[0]) 107 | # y = torch.stack(y).max(0)[0] # max ensemble 108 | # y = torch.stack(y).mean(0) # mean ensemble 109 | y = torch.cat(y, 1) # nms ensemble 110 | return y, None # inference, train output 111 | 112 | 113 | def attempt_load(weights, map_location=None, fp16=0): 114 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 115 | model = Ensemble() 116 | for w in weights if isinstance(weights, list) else [weights]: 117 | attempt_download(w) 118 | dict1 = torch.load(w, map_location=map_location)['model'] 119 | if fp16: 120 | dict1 = dict1.fuse().half().eval() 121 | else: 122 | dict1 = dict1.fuse().float().eval() 123 | model.append(dict1) # load FP32 model 124 | 125 | # Compatibility updates 126 | for m in model.modules(): 127 | if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: 128 | m.inplace = True # pytorch 1.7.0 compatibility 129 | elif type(m) is Conv: 130 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 131 | 132 | if len(model) == 1: 133 | return model[-1] # return model 134 | else: 135 | print('Ensemble created with %s\n' % weights) 136 | for k in ['names', 'stride']: 137 | setattr(model, k, getattr(model[-1], k)) 138 | return model # return ensemble 139 | -------------------------------------------------------------------------------- /models/export.py: -------------------------------------------------------------------------------- 1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats 2 | 3 | Usage: 4 | $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1 5 | """ 6 | 7 | import argparse 8 | import sys 9 | import time 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | import models 17 | from models.experimental import attempt_load 18 | from utils.activations import Hardswish, SiLU 19 | from utils.general import set_logging, check_img_size 20 | import onnx 21 | 22 | if __name__ == '__main__': 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/ 25 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width 26 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 27 | opt = parser.parse_args() 28 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand 29 | print(opt) 30 | set_logging() 31 | t = time.time() 32 | 33 | # Load PyTorch model 34 | model = attempt_load(opt.weights, map_location=torch.device('cpu')) # load FP32 model 35 | model.eval() 36 | labels = model.names 37 | 38 | # Checks 39 | gs = int(max(model.stride)) # grid size (max stride) 40 | opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples 41 | 42 | # Input 43 | img = torch.zeros(opt.batch_size, 3, *opt.img_size) # image size(1,3,320,192) iDetection 44 | 45 | # Update model 46 | for k, m in model.named_modules(): 47 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 48 | if isinstance(m, models.common.Conv): # assign export-friendly activations 49 | if isinstance(m.act, nn.Hardswish): 50 | m.act = Hardswish() 51 | elif isinstance(m.act, nn.SiLU): 52 | m.act = SiLU() 53 | # elif isinstance(m, models.yolo.Detect): 54 | # m.forward = m.forward_export # assign forward (optional) 55 | if isinstance(m, models.common.ShuffleV2Block):#shufflenet block nn.SiLU 56 | for i in range(len(m.branch1)): 57 | if isinstance(m.branch1[i], nn.SiLU): 58 | m.branch1[i] = SiLU() 59 | for i in range(len(m.branch2)): 60 | if isinstance(m.branch2[i], nn.SiLU): 61 | m.branch2[i] = SiLU() 62 | model.model[-1].export = True # set Detect() layer export=True 63 | y = model(img) # dry run 64 | 65 | # ONNX export 66 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__) 67 | f = opt.weights.replace('.pt', '.onnx') # filename 68 | model.fuse() # only for ONNX 69 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['data'], 70 | output_names=['stride_' + str(int(x)) for x in model.stride]) 71 | 72 | # Checks 73 | onnx_model = onnx.load(f) # load onnx model 74 | onnx.checker.check_model(onnx_model) # check onnx model 75 | # print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model 76 | print('ONNX export success, saved as %s' % f) 77 | # Finish 78 | print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t)) 79 | -------------------------------------------------------------------------------- /models/yolo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import math 4 | import sys 5 | from copy import deepcopy 6 | from pathlib import Path 7 | 8 | import torch 9 | import torch.nn as nn 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | logger = logging.getLogger(__name__) 13 | 14 | from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, C3, ShuffleV2Block, Concat, NMS, autoShape, StemBlock 15 | from models.experimental import MixConv2d, CrossConv 16 | from utils.autoanchor import check_anchor_order 17 | from utils.general import make_divisible, check_file, set_logging 18 | from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \ 19 | select_device, copy_attr 20 | 21 | try: 22 | import thop # for FLOPS computation 23 | except ImportError: 24 | thop = None 25 | 26 | class Detect(nn.Module): 27 | stride = None # strides computed during build 28 | export = False # onnx export 29 | 30 | def __init__(self, nc=80, anchors=(), ch=()): # detection layer 31 | super(Detect, self).__init__() 32 | self.nc = nc # number of classes 33 | #self.no = nc + 5 # number of outputs per anchor 34 | self.no = nc + 5 + 10 # number of outputs per anchor 35 | 36 | self.nl = len(anchors) # number of detection layers 37 | self.na = len(anchors[0]) // 2 # number of anchors 38 | self.grid = [torch.zeros(1)] * self.nl # init grid 39 | #a = torch.tensor(anchors).float().view(self.nl, -1, 2) 40 | #self.register_buffer('anchors', a) # shape(nl,na,2) 41 | #self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 42 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 43 | 44 | 45 | def forward(self, x): 46 | for i in range(self.nl): 47 | x[i] = self.m[i](x[i]) # conv 48 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 49 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 50 | return x 51 | 52 | 53 | class Model(nn.Module): 54 | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None): # model, input channels, number of classes 55 | super(Model, self).__init__() 56 | if isinstance(cfg, dict): 57 | self.yaml = cfg # model dict 58 | else: # is *.yaml 59 | import yaml # for torch hub 60 | self.yaml_file = Path(cfg).name 61 | with open(cfg) as f: 62 | self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict 63 | 64 | # Define model 65 | ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels 66 | if nc and nc != self.yaml['nc']: 67 | logger.info('Overriding model.yaml nc=%g with nc=%g' % (self.yaml['nc'], nc)) 68 | self.yaml['nc'] = nc # override yaml value 69 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist 70 | self.names = [str(i) for i in range(self.yaml['nc'])] # default names 71 | # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) 72 | 73 | # Build strides, anchors 74 | m = self.model[-1] # Detect() 75 | if isinstance(m, Detect): 76 | s = 128 # 2x min stride 77 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward 78 | m.anchors /= m.stride.view(-1, 1, 1) 79 | check_anchor_order(m) 80 | self.stride = m.stride 81 | self._initialize_biases() # only run once 82 | # print('Strides: %s' % m.stride.tolist()) 83 | 84 | # Init weights, biases 85 | initialize_weights(self) 86 | self.info() 87 | logger.info('') 88 | 89 | 90 | def forward(self, x): 91 | y = [] 92 | for m in self.model: 93 | if m.f != -1: # if not from previous layer 94 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 95 | x = m(x) # run 96 | y.append(x if m.i in self.save else None) # save output 97 | return x 98 | 99 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 100 | # https://arxiv.org/abs/1708.02002 section 3.3 101 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 102 | m = self.model[-1] # Detect() module 103 | for mi, s in zip(m.m, m.stride): # from 104 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 105 | b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 106 | b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 107 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 108 | 109 | def _print_biases(self): 110 | m = self.model[-1] # Detect() module 111 | for mi in m.m: # from 112 | b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) 113 | print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) 114 | 115 | # def _print_weights(self): 116 | # for m in self.model.modules(): 117 | # if type(m) is Bottleneck: 118 | # print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights 119 | 120 | def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers 121 | print('Fusing layers... ') 122 | for m in self.model.modules(): 123 | if type(m) is Conv and hasattr(m, 'bn'): 124 | m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv 125 | delattr(m, 'bn') # remove batchnorm 126 | m.forward = m.fuseforward # update forward 127 | self.info() 128 | return self 129 | 130 | def nms(self, mode=True): # add or remove NMS module 131 | present = type(self.model[-1]) is NMS # last layer is NMS 132 | if mode and not present: 133 | print('Adding NMS... ') 134 | m = NMS() # module 135 | m.f = -1 # from 136 | m.i = self.model[-1].i + 1 # index 137 | self.model.add_module(name='%s' % m.i, module=m) # add 138 | self.eval() 139 | elif not mode and present: 140 | print('Removing NMS... ') 141 | self.model = self.model[:-1] # remove 142 | return self 143 | 144 | def autoshape(self): # add autoShape module 145 | print('Adding autoShape... ') 146 | m = autoShape(self) # wrap model 147 | copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes 148 | return m 149 | 150 | def info(self, verbose=False, img_size=640): # print model information 151 | model_info(self, verbose, img_size) 152 | 153 | 154 | def parse_model(d, ch): # model_dict, input_channels(3) 155 | logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) 156 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] 157 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 158 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 159 | 160 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 161 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args 162 | m = eval(m) if isinstance(m, str) else m # eval strings 163 | for j, a in enumerate(args): 164 | try: 165 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 166 | except: 167 | pass 168 | 169 | n = max(round(n * gd), 1) if n > 1 else n # depth gain 170 | if m in [Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3, ShuffleV2Block, StemBlock]: 171 | c1, c2 = ch[f], args[0] 172 | 173 | # Normal 174 | # if i > 0 and args[0] != no: # channel expansion factor 175 | # ex = 1.75 # exponential (default 2.0) 176 | # e = math.log(c2 / ch[1]) / math.log(2) 177 | # c2 = int(ch[1] * ex ** e) 178 | # if m != Focus: 179 | 180 | c2 = make_divisible(c2 * gw, 8) if c2 != no else c2 181 | 182 | # Experimental 183 | # if i > 0 and args[0] != no: # channel expansion factor 184 | # ex = 1 + gw # exponential (default 2.0) 185 | # ch1 = 32 # ch[1] 186 | # e = math.log(c2 / ch1) / math.log(2) # level 1-n 187 | # c2 = int(ch1 * ex ** e) 188 | # if m != Focus: 189 | # c2 = make_divisible(c2, 8) if c2 != no else c2 190 | 191 | args = [c1, c2, *args[1:]] 192 | if m in [BottleneckCSP, C3]: 193 | args.insert(2, n) 194 | n = 1 195 | elif m is nn.BatchNorm2d: 196 | args = [ch[f]] 197 | elif m is Concat: 198 | c2 = sum([ch[-1 if x == -1 else x + 1] for x in f]) 199 | elif m is Detect: 200 | args.append([ch[x + 1] for x in f]) 201 | if isinstance(args[1], int): # number of anchors 202 | args[1] = [list(range(args[1] * 2))] * len(f) 203 | else: 204 | c2 = ch[f] 205 | 206 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module 207 | t = str(m)[8:-2].replace('__main__.', '') # module type 208 | np = sum([x.numel() for x in m_.parameters()]) # number params 209 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 210 | logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print 211 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 212 | layers.append(m_) 213 | ch.append(c2) 214 | return nn.Sequential(*layers), sorted(save) 215 | 216 | 217 | from thop import profile 218 | from thop import clever_format 219 | if __name__ == '__main__': 220 | parser = argparse.ArgumentParser() 221 | parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml') 222 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 223 | opt = parser.parse_args() 224 | opt.cfg = check_file(opt.cfg) # check file 225 | set_logging() 226 | device = select_device(opt.device) 227 | 228 | # Create model 229 | model = Model(opt.cfg).to(device) 230 | stride = model.stride.max() 231 | if stride == 32: 232 | input = torch.Tensor(1, 3, 480, 640).to(device) 233 | else: 234 | input = torch.Tensor(1, 3, 512, 640).to(device) 235 | model.train() 236 | print(model) 237 | flops, params = profile(model, inputs=(input, )) 238 | flops, params = clever_format([flops, params], "%.3f") 239 | print('Flops:', flops, ',Params:' ,params) 240 | -------------------------------------------------------------------------------- /models/yolov5l.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [4,5, 8,10, 13,16] # P3/8 9 | - [23,29, 43,55, 73,105] # P4/16 10 | - [146,217, 231,300, 335,433] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2 16 | [-1, 3, C3, [128]], 17 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8 18 | [-1, 9, C3, [256]], 19 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16 20 | [-1, 9, C3, [512]], 21 | [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32 22 | [-1, 1, SPP, [1024, [3,5,7]]], 23 | [-1, 3, C3, [1024, False]], # 8 24 | ] 25 | 26 | # YOLOv5 head 27 | head: 28 | [[-1, 1, Conv, [512, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [[-1, 5], 1, Concat, [1]], # cat backbone P4 31 | [-1, 3, C3, [512, False]], # 12 32 | 33 | [-1, 1, Conv, [256, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 36 | [-1, 3, C3, [256, False]], # 16 (P3/8-small) 37 | 38 | [-1, 1, Conv, [256, 3, 2]], 39 | [[-1, 13], 1, Concat, [1]], # cat head P4 40 | [-1, 3, C3, [512, False]], # 19 (P4/16-medium) 41 | 42 | [-1, 1, Conv, [512, 3, 2]], 43 | [[-1, 9], 1, Concat, [1]], # cat head P5 44 | [-1, 3, C3, [1024, False]], # 22 (P5/32-large) 45 | 46 | [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 47 | ] 48 | -------------------------------------------------------------------------------- /models/yolov5l6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [6,7, 9,11, 13,16] # P3/8 9 | - [18,23, 26,33, 37,47] # P4/16 10 | - [54,67, 77,104, 112,154] # P5/32 11 | - [174,238, 258,355, 445,568] # P6/64 12 | 13 | # YOLOv5 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [ [ -1, 1, StemBlock, [ 64, 3, 2 ] ], # 0-P1/2 17 | [ -1, 3, C3, [ 128 ] ], 18 | [ -1, 1, Conv, [ 256, 3, 2 ] ], # 2-P3/8 19 | [ -1, 9, C3, [ 256 ] ], 20 | [ -1, 1, Conv, [ 512, 3, 2 ] ], # 4-P4/16 21 | [ -1, 9, C3, [ 512 ] ], 22 | [ -1, 1, Conv, [ 768, 3, 2 ] ], # 6-P5/32 23 | [ -1, 3, C3, [ 768 ] ], 24 | [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 8-P6/64 25 | [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], 26 | [ -1, 3, C3, [ 1024, False ] ], # 10 27 | ] 28 | 29 | # YOLOv5 head 30 | head: 31 | [ [ -1, 1, Conv, [ 768, 1, 1 ] ], 32 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 33 | [ [ -1, 7 ], 1, Concat, [ 1 ] ], # cat backbone P5 34 | [ -1, 3, C3, [ 768, False ] ], # 14 35 | 36 | [ -1, 1, Conv, [ 512, 1, 1 ] ], 37 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 38 | [ [ -1, 5 ], 1, Concat, [ 1 ] ], # cat backbone P4 39 | [ -1, 3, C3, [ 512, False ] ], # 18 40 | 41 | [ -1, 1, Conv, [ 256, 1, 1 ] ], 42 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 43 | [ [ -1, 3 ], 1, Concat, [ 1 ] ], # cat backbone P3 44 | [ -1, 3, C3, [ 256, False ] ], # 22 (P3/8-small) 45 | 46 | [ -1, 1, Conv, [ 256, 3, 2 ] ], 47 | [ [ -1, 19 ], 1, Concat, [ 1 ] ], # cat head P4 48 | [ -1, 3, C3, [ 512, False ] ], # 25 (P4/16-medium) 49 | 50 | [ -1, 1, Conv, [ 512, 3, 2 ] ], 51 | [ [ -1, 15 ], 1, Concat, [ 1 ] ], # cat head P5 52 | [ -1, 3, C3, [ 768, False ] ], # 28 (P5/32-large) 53 | 54 | [ -1, 1, Conv, [ 768, 3, 2 ] ], 55 | [ [ -1, 11 ], 1, Concat, [ 1 ] ], # cat head P6 56 | [ -1, 3, C3, [ 1024, False ] ], # 31 (P6/64-xlarge) 57 | 58 | [ [ 22, 25, 28, 31 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) 59 | ] 60 | 61 | -------------------------------------------------------------------------------- /models/yolov5m.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 0.67 # model depth multiple 4 | width_multiple: 0.75 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [4,5, 8,10, 13,16] # P3/8 9 | - [23,29, 43,55, 73,105] # P4/16 10 | - [146,217, 231,300, 335,433] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2 16 | [-1, 3, C3, [128]], 17 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8 18 | [-1, 9, C3, [256]], 19 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16 20 | [-1, 9, C3, [512]], 21 | [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32 22 | [-1, 1, SPP, [1024, [3,5,7]]], 23 | [-1, 3, C3, [1024, False]], # 8 24 | ] 25 | 26 | # YOLOv5 head 27 | head: 28 | [[-1, 1, Conv, [512, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [[-1, 5], 1, Concat, [1]], # cat backbone P4 31 | [-1, 3, C3, [512, False]], # 12 32 | 33 | [-1, 1, Conv, [256, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 36 | [-1, 3, C3, [256, False]], # 16 (P3/8-small) 37 | 38 | [-1, 1, Conv, [256, 3, 2]], 39 | [[-1, 13], 1, Concat, [1]], # cat head P4 40 | [-1, 3, C3, [512, False]], # 19 (P4/16-medium) 41 | 42 | [-1, 1, Conv, [512, 3, 2]], 43 | [[-1, 9], 1, Concat, [1]], # cat head P5 44 | [-1, 3, C3, [1024, False]], # 22 (P5/32-large) 45 | 46 | [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 47 | ] 48 | -------------------------------------------------------------------------------- /models/yolov5m6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 0.67 # model depth multiple 4 | width_multiple: 0.75 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [6,7, 9,11, 13,16] # P3/8 9 | - [18,23, 26,33, 37,47] # P4/16 10 | - [54,67, 77,104, 112,154] # P5/32 11 | - [174,238, 258,355, 445,568] # P6/64 12 | 13 | # YOLOv5 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [ [ -1, 1, StemBlock, [ 64, 3, 2 ] ], # 0-P1/2 17 | [ -1, 3, C3, [ 128 ] ], 18 | [ -1, 1, Conv, [ 256, 3, 2 ] ], # 2-P3/8 19 | [ -1, 9, C3, [ 256 ] ], 20 | [ -1, 1, Conv, [ 512, 3, 2 ] ], # 4-P4/16 21 | [ -1, 9, C3, [ 512 ] ], 22 | [ -1, 1, Conv, [ 768, 3, 2 ] ], # 6-P5/32 23 | [ -1, 3, C3, [ 768 ] ], 24 | [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 8-P6/64 25 | [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], 26 | [ -1, 3, C3, [ 1024, False ] ], # 10 27 | ] 28 | 29 | # YOLOv5 head 30 | head: 31 | [ [ -1, 1, Conv, [ 768, 1, 1 ] ], 32 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 33 | [ [ -1, 7 ], 1, Concat, [ 1 ] ], # cat backbone P5 34 | [ -1, 3, C3, [ 768, False ] ], # 14 35 | 36 | [ -1, 1, Conv, [ 512, 1, 1 ] ], 37 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 38 | [ [ -1, 5 ], 1, Concat, [ 1 ] ], # cat backbone P4 39 | [ -1, 3, C3, [ 512, False ] ], # 18 40 | 41 | [ -1, 1, Conv, [ 256, 1, 1 ] ], 42 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 43 | [ [ -1, 3 ], 1, Concat, [ 1 ] ], # cat backbone P3 44 | [ -1, 3, C3, [ 256, False ] ], # 22 (P3/8-small) 45 | 46 | [ -1, 1, Conv, [ 256, 3, 2 ] ], 47 | [ [ -1, 19 ], 1, Concat, [ 1 ] ], # cat head P4 48 | [ -1, 3, C3, [ 512, False ] ], # 25 (P4/16-medium) 49 | 50 | [ -1, 1, Conv, [ 512, 3, 2 ] ], 51 | [ [ -1, 15 ], 1, Concat, [ 1 ] ], # cat head P5 52 | [ -1, 3, C3, [ 768, False ] ], # 28 (P5/32-large) 53 | 54 | [ -1, 1, Conv, [ 768, 3, 2 ] ], 55 | [ [ -1, 11 ], 1, Concat, [ 1 ] ], # cat head P6 56 | [ -1, 3, C3, [ 1024, False ] ], # 31 (P6/64-xlarge) 57 | 58 | [ [ 22, 25, 28, 31 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) 59 | ] 60 | 61 | -------------------------------------------------------------------------------- /models/yolov5n-0.5.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 0.5 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [4,5, 8,10, 13,16] # P3/8 9 | - [23,29, 43,55, 73,105] # P4/16 10 | - [146,217, 231,300, 335,433] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, StemBlock, [32, 3, 2]], # 0-P2/4 16 | [-1, 1, ShuffleV2Block, [128, 2]], # 1-P3/8 17 | [-1, 3, ShuffleV2Block, [128, 1]], # 2 18 | [-1, 1, ShuffleV2Block, [256, 2]], # 3-P4/16 19 | [-1, 7, ShuffleV2Block, [256, 1]], # 4 20 | [-1, 1, ShuffleV2Block, [512, 2]], # 5-P5/32 21 | [-1, 3, ShuffleV2Block, [512, 1]], # 6 22 | ] 23 | 24 | # YOLOv5 head 25 | head: 26 | [[-1, 1, Conv, [128, 1, 1]], 27 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 28 | [[-1, 4], 1, Concat, [1]], # cat backbone P4 29 | [-1, 1, C3, [128, False]], # 10 30 | 31 | [-1, 1, Conv, [128, 1, 1]], 32 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 33 | [[-1, 2], 1, Concat, [1]], # cat backbone P3 34 | [-1, 1, C3, [128, False]], # 14 (P3/8-small) 35 | 36 | [-1, 1, Conv, [128, 3, 2]], 37 | [[-1, 11], 1, Concat, [1]], # cat head P4 38 | [-1, 1, C3, [128, False]], # 17 (P4/16-medium) 39 | 40 | [-1, 1, Conv, [128, 3, 2]], 41 | [[-1, 7], 1, Concat, [1]], # cat head P5 42 | [-1, 1, C3, [128, False]], # 20 (P5/32-large) 43 | 44 | [[14, 17, 20], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 45 | ] 46 | 47 | -------------------------------------------------------------------------------- /models/yolov5n.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [4,5, 8,10, 13,16] # P3/8 9 | - [23,29, 43,55, 73,105] # P4/16 10 | - [146,217, 231,300, 335,433] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, StemBlock, [32, 3, 2]], # 0-P2/4 16 | [-1, 1, ShuffleV2Block, [128, 2]], # 1-P3/8 17 | [-1, 3, ShuffleV2Block, [128, 1]], # 2 18 | [-1, 1, ShuffleV2Block, [256, 2]], # 3-P4/16 19 | [-1, 7, ShuffleV2Block, [256, 1]], # 4 20 | [-1, 1, ShuffleV2Block, [512, 2]], # 5-P5/32 21 | [-1, 3, ShuffleV2Block, [512, 1]], # 6 22 | ] 23 | 24 | # YOLOv5 head 25 | head: 26 | [[-1, 1, Conv, [128, 1, 1]], 27 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 28 | [[-1, 4], 1, Concat, [1]], # cat backbone P4 29 | [-1, 1, C3, [128, False]], # 10 30 | 31 | [-1, 1, Conv, [128, 1, 1]], 32 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 33 | [[-1, 2], 1, Concat, [1]], # cat backbone P3 34 | [-1, 1, C3, [128, False]], # 14 (P3/8-small) 35 | 36 | [-1, 1, Conv, [128, 3, 2]], 37 | [[-1, 11], 1, Concat, [1]], # cat head P4 38 | [-1, 1, C3, [128, False]], # 17 (P4/16-medium) 39 | 40 | [-1, 1, Conv, [128, 3, 2]], 41 | [[-1, 7], 1, Concat, [1]], # cat head P5 42 | [-1, 1, C3, [128, False]], # 20 (P5/32-large) 43 | 44 | [[14, 17, 20], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 45 | ] 46 | 47 | -------------------------------------------------------------------------------- /models/yolov5n6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [6,7, 9,11, 13,16] # P3/8 9 | - [18,23, 26,33, 37,47] # P4/16 10 | - [54,67, 77,104, 112,154] # P5/32 11 | - [174,238, 258,355, 445,568] # P6/64 12 | 13 | # YOLOv5 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [[-1, 1, StemBlock, [32, 3, 2]], # 0-P2/4 17 | [-1, 1, ShuffleV2Block, [128, 2]], # 1-P3/8 18 | [-1, 3, ShuffleV2Block, [128, 1]], # 2 19 | [-1, 1, ShuffleV2Block, [256, 2]], # 3-P4/16 20 | [-1, 7, ShuffleV2Block, [256, 1]], # 4 21 | [-1, 1, ShuffleV2Block, [384, 2]], # 5-P5/32 22 | [-1, 3, ShuffleV2Block, [384, 1]], # 6 23 | [-1, 1, ShuffleV2Block, [512, 2]], # 7-P6/64 24 | [-1, 3, ShuffleV2Block, [512, 1]], # 8 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [128, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P5 32 | [-1, 1, C3, [128, False]], # 12 33 | 34 | [-1, 1, Conv, [128, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P4 37 | [-1, 1, C3, [128, False]], # 16 (P4/8-small) 38 | 39 | [-1, 1, Conv, [128, 1, 1]], 40 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 41 | [[-1, 2], 1, Concat, [1]], # cat backbone P3 42 | [-1, 1, C3, [128, False]], # 20 (P3/8-small) 43 | 44 | [-1, 1, Conv, [128, 3, 2]], 45 | [[-1, 17], 1, Concat, [1]], # cat head P4 46 | [-1, 1, C3, [128, False]], # 23 (P4/16-medium) 47 | 48 | [-1, 1, Conv, [128, 3, 2]], 49 | [[-1, 13], 1, Concat, [1]], # cat head P5 50 | [-1, 1, C3, [128, False]], # 26 (P5/32-large) 51 | 52 | [-1, 1, Conv, [128, 3, 2]], 53 | [[-1, 9], 1, Concat, [1]], # cat head P6 54 | [-1, 1, C3, [128, False]], # 29 (P6/64-large) 55 | 56 | [[20, 23, 26, 29], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 57 | ] 58 | 59 | -------------------------------------------------------------------------------- /models/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.35 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [4,5, 8,10, 13,16] # P3/8 9 | - [23,29, 43,55, 73,105] # P4/16 10 | - [146,217, 231,300, 335,433] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2 16 | [-1, 3, C3, [128]], 17 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8 18 | [-1, 9, C3, [256]], 19 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16 20 | [-1, 9, C3, [512]], 21 | [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32 22 | [-1, 1, SPP, [1024, [3,5,7]]], 23 | [-1, 3, C3, [1024, False]], # 8 24 | ] 25 | 26 | # YOLOv5 head 27 | head: 28 | [[-1, 1, Conv, [512, 1, 1]], 29 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 30 | [[-1, 5], 1, Concat, [1]], # cat backbone P4 31 | [-1, 3, C3, [512, False]], # 12 32 | 33 | [-1, 1, Conv, [256, 1, 1]], 34 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 35 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 36 | [-1, 3, C3, [256, False]], # 16 (P3/8-small) 37 | 38 | [-1, 1, Conv, [256, 3, 2]], 39 | [[-1, 13], 1, Concat, [1]], # cat head P4 40 | [-1, 3, C3, [512, False]], # 19 (P4/16-medium) 41 | 42 | [-1, 1, Conv, [512, 3, 2]], 43 | [[-1, 9], 1, Concat, [1]], # cat head P5 44 | [-1, 3, C3, [1024, False]], # 22 (P5/32-large) 45 | 46 | [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 47 | ] 48 | -------------------------------------------------------------------------------- /models/yolov5s6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [6,7, 9,11, 13,16] # P3/8 9 | - [18,23, 26,33, 37,47] # P4/16 10 | - [54,67, 77,104, 112,154] # P5/32 11 | - [174,238, 258,355, 445,568] # P6/64 12 | 13 | # YOLOv5 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [ [ -1, 1, StemBlock, [ 64, 3, 2 ] ], # 0-P1/2 17 | [ -1, 3, C3, [ 128 ] ], 18 | [ -1, 1, Conv, [ 256, 3, 2 ] ], # 2-P3/8 19 | [ -1, 9, C3, [ 256 ] ], 20 | [ -1, 1, Conv, [ 512, 3, 2 ] ], # 4-P4/16 21 | [ -1, 9, C3, [ 512 ] ], 22 | [ -1, 1, Conv, [ 768, 3, 2 ] ], # 6-P5/32 23 | [ -1, 3, C3, [ 768 ] ], 24 | [ -1, 1, Conv, [ 1024, 3, 2 ] ], # 8-P6/64 25 | [ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ], 26 | [ -1, 3, C3, [ 1024, False ] ], # 10 27 | ] 28 | 29 | # YOLOv5 head 30 | head: 31 | [ [ -1, 1, Conv, [ 768, 1, 1 ] ], 32 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 33 | [ [ -1, 7 ], 1, Concat, [ 1 ] ], # cat backbone P5 34 | [ -1, 3, C3, [ 768, False ] ], # 14 35 | 36 | [ -1, 1, Conv, [ 512, 1, 1 ] ], 37 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 38 | [ [ -1, 5 ], 1, Concat, [ 1 ] ], # cat backbone P4 39 | [ -1, 3, C3, [ 512, False ] ], # 18 40 | 41 | [ -1, 1, Conv, [ 256, 1, 1 ] ], 42 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 43 | [ [ -1, 3 ], 1, Concat, [ 1 ] ], # cat backbone P3 44 | [ -1, 3, C3, [ 256, False ] ], # 22 (P3/8-small) 45 | 46 | [ -1, 1, Conv, [ 256, 3, 2 ] ], 47 | [ [ -1, 19 ], 1, Concat, [ 1 ] ], # cat head P4 48 | [ -1, 3, C3, [ 512, False ] ], # 25 (P4/16-medium) 49 | 50 | [ -1, 1, Conv, [ 512, 3, 2 ] ], 51 | [ [ -1, 15 ], 1, Concat, [ 1 ] ], # cat head P5 52 | [ -1, 3, C3, [ 768, False ] ], # 28 (P5/32-large) 53 | 54 | [ -1, 1, Conv, [ 768, 3, 2 ] ], 55 | [ [ -1, 11 ], 1, Concat, [ 1 ] ], # cat head P6 56 | [ -1, 3, C3, [ 1024, False ] ], # 31 (P6/64-xlarge) 57 | 58 | [ [ 22, 25, 28, 31 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6) 59 | ] 60 | 61 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | thop 2 | torch 3 | torchvision 4 | torchserve 5 | torch-model-archiver 6 | requests 7 | opencv-python-headless 8 | tqdm 9 | pyyaml 10 | matplotlib 11 | pandas 12 | seaborn 13 | google-api-python-client 14 | grpcio 15 | -------------------------------------------------------------------------------- /scripts/build_model.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SCRIPT_DIR="$(dirname $(readlink -f ${0}))" 4 | ROOT_DIR="$(readlink -f ${SCRIPT_DIR}/..)" 5 | OUT_DIR="${ROOT_DIR}/weights" 6 | 7 | # DOCKER_BUILDKIT=1 cause build failure 8 | # because BuildKit does allow to access gpu. 9 | # see issue https://github.com/moby/buildkit/issues/1800 10 | # DOCKER_BUILDKIT=1 docker build --target export_model -f docker/Dockerfile -o "${OUT_DIR}" . 11 | # use standard docker build instead 12 | docker build -t yolov5face:model -f ${ROOT_DIR}/docker/Dockerfile_model ${ROOT_DIR} && \ 13 | docker create -it --name yolov5face_model_tmp yolov5face:model bash && \ 14 | docker cp yolov5face_model_tmp:/build/weights/yolov5s-face.torch2trt "${OUT_DIR}" && \ 15 | docker rm yolov5face_model_tmp && \ 16 | docker rmi yolov5face:model 17 | -------------------------------------------------------------------------------- /test_widerface.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import glob 3 | import time 4 | from pathlib import Path 5 | 6 | import os 7 | import cv2 8 | import torch 9 | import torch.backends.cudnn as cudnn 10 | from numpy import random 11 | import numpy as np 12 | from models.experimental import attempt_load 13 | from utils.datasets import letterbox 14 | from utils.general import check_img_size, check_requirements, non_max_suppression_face, apply_classifier, \ 15 | scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path 16 | from utils.plots import plot_one_box 17 | from utils.torch_utils import select_device, load_classifier, time_synchronized 18 | from tqdm import tqdm 19 | 20 | def dynamic_resize(shape, stride=64): 21 | max_size = max(shape[0], shape[1]) 22 | if max_size % stride != 0: 23 | max_size = (int(max_size / stride) + 1) * stride 24 | return max_size 25 | 26 | def scale_coords_landmarks(img1_shape, coords, img0_shape, ratio_pad=None): 27 | # Rescale coords (xyxy) from img1_shape to img0_shape 28 | if ratio_pad is None: # calculate from img0_shape 29 | gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new 30 | pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding 31 | else: 32 | gain = ratio_pad[0][0] 33 | pad = ratio_pad[1] 34 | 35 | coords[:, [0, 2, 4, 6, 8]] -= pad[0] # x padding 36 | coords[:, [1, 3, 5, 7, 9]] -= pad[1] # y padding 37 | coords[:, :10] /= gain 38 | #clip_coords(coords, img0_shape) 39 | coords[:, 0].clamp_(0, img0_shape[1]) # x1 40 | coords[:, 1].clamp_(0, img0_shape[0]) # y1 41 | coords[:, 2].clamp_(0, img0_shape[1]) # x2 42 | coords[:, 3].clamp_(0, img0_shape[0]) # y2 43 | coords[:, 4].clamp_(0, img0_shape[1]) # x3 44 | coords[:, 5].clamp_(0, img0_shape[0]) # y3 45 | coords[:, 6].clamp_(0, img0_shape[1]) # x4 46 | coords[:, 7].clamp_(0, img0_shape[0]) # y4 47 | coords[:, 8].clamp_(0, img0_shape[1]) # x5 48 | coords[:, 9].clamp_(0, img0_shape[0]) # y5 49 | return coords 50 | 51 | def show_results(img, xywh, conf, landmarks, class_num): 52 | h,w,c = img.shape 53 | tl = 1 or round(0.002 * (h + w) / 2) + 1 # line/font thickness 54 | x1 = int(xywh[0] * w - 0.5 * xywh[2] * w) 55 | y1 = int(xywh[1] * h - 0.5 * xywh[3] * h) 56 | x2 = int(xywh[0] * w + 0.5 * xywh[2] * w) 57 | y2 = int(xywh[1] * h + 0.5 * xywh[3] * h) 58 | cv2.rectangle(img, (x1,y1), (x2, y2), (0,255,0), thickness=tl, lineType=cv2.LINE_AA) 59 | 60 | clors = [(255,0,0),(0,255,0),(0,0,255),(255,255,0),(0,255,255)] 61 | 62 | for i in range(5): 63 | point_x = int(landmarks[2 * i] * w) 64 | point_y = int(landmarks[2 * i + 1] * h) 65 | cv2.circle(img, (point_x, point_y), tl+1, clors[i], -1) 66 | 67 | tf = max(tl - 1, 1) # font thickness 68 | label = str(int(class_num)) + ': ' + str(conf)[:5] 69 | cv2.putText(img, label, (x1, y1 - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) 70 | return img 71 | 72 | def detect(model, img0): 73 | stride = int(model.stride.max()) # model stride 74 | imgsz = opt.img_size 75 | if imgsz <= 0: # original size 76 | imgsz = dynamic_resize(img0.shape) 77 | imgsz = check_img_size(imgsz, s=64) # check img_size 78 | img = letterbox(img0, imgsz)[0] 79 | # Convert 80 | img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 81 | img = np.ascontiguousarray(img) 82 | img = torch.from_numpy(img).to(device) 83 | img = img.float() # uint8 to fp16/32 84 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 85 | if img.ndimension() == 3: 86 | img = img.unsqueeze(0) 87 | 88 | # Inference 89 | pred = model(img, augment=opt.augment)[0] 90 | # Apply NMS 91 | pred = non_max_suppression_face(pred, opt.conf_thres, opt.iou_thres)[0] 92 | gn = torch.tensor(img0.shape)[[1, 0, 1, 0]].to(device) # normalization gain whwh 93 | gn_lks = torch.tensor(img0.shape)[[1, 0, 1, 0, 1, 0, 1, 0, 1, 0]].to(device) # normalization gain landmarks 94 | boxes = [] 95 | h, w, c = img0.shape 96 | if pred is not None: 97 | pred[:, :4] = scale_coords(img.shape[2:], pred[:, :4], img0.shape).round() 98 | pred[:, 5:15] = scale_coords_landmarks(img.shape[2:], pred[:, 5:15], img0.shape).round() 99 | for j in range(pred.size()[0]): 100 | xywh = (xyxy2xywh(pred[j, :4].view(1, 4)) / gn).view(-1) 101 | xywh = xywh.data.cpu().numpy() 102 | conf = pred[j, 4].cpu().numpy() 103 | landmarks = (pred[j, 5:15].view(1, 10) / gn_lks).view(-1).tolist() 104 | class_num = pred[j, 15].cpu().numpy() 105 | x1 = int(xywh[0] * w - 0.5 * xywh[2] * w) 106 | y1 = int(xywh[1] * h - 0.5 * xywh[3] * h) 107 | x2 = int(xywh[0] * w + 0.5 * xywh[2] * w) 108 | y2 = int(xywh[1] * h + 0.5 * xywh[3] * h) 109 | boxes.append([x1, y1, x2-x1, y2-y1, conf]) 110 | return boxes 111 | 112 | 113 | if __name__ == '__main__': 114 | parser = argparse.ArgumentParser() 115 | parser.add_argument('--weights', nargs='+', type=str, default='runs/train/exp5/weights/last.pt', help='model.pt path(s)') 116 | parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') 117 | parser.add_argument('--conf-thres', type=float, default=0.02, help='object confidence threshold') 118 | parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS') 119 | parser.add_argument('--device', default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 120 | parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') 121 | parser.add_argument('--augment', action='store_true', help='augmented inference') 122 | parser.add_argument('--update', action='store_true', help='update all models') 123 | parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') 124 | parser.add_argument('--project', default='runs/detect', help='save results to project/name') 125 | parser.add_argument('--name', default='exp', help='save results to project/name') 126 | parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') 127 | parser.add_argument('--save_folder', default='./widerface_evaluate/widerface_txt/', type=str, help='Dir to save txt results') 128 | parser.add_argument('--dataset_folder', default='../WiderFace/val/images/', type=str, help='dataset path') 129 | parser.add_argument('--folder_pict', default='/yolov5-face/data/widerface/val/wider_val.txt', type=str, help='folder_pict') 130 | opt = parser.parse_args() 131 | print(opt) 132 | 133 | # changhy : read folder_pict 134 | pict_folder = {} 135 | with open(opt.folder_pict, 'r') as f: 136 | lines = f.readlines() 137 | for line in lines: 138 | line = line.strip().split('/') 139 | pict_folder[line[-1]] = line[-2] 140 | 141 | # Load model 142 | device = select_device(opt.device) 143 | model = attempt_load(opt.weights, map_location=device) # load FP32 model 144 | with torch.no_grad(): 145 | # testing dataset 146 | testset_folder = opt.dataset_folder 147 | 148 | for image_path in tqdm(glob.glob(os.path.join(testset_folder, '*'))): 149 | if image_path.endswith('.txt'): 150 | continue 151 | img0 = cv2.imread(image_path) # BGR 152 | if img0 is None: 153 | print(f'ignore : {image_path}') 154 | continue 155 | boxes = detect(model, img0) 156 | # -------------------------------------------------------------------- 157 | image_name = os.path.basename(image_path) 158 | txt_name = os.path.splitext(image_name)[0] + ".txt" 159 | save_name = os.path.join(opt.save_folder, pict_folder[image_name], txt_name) 160 | dirname = os.path.dirname(save_name) 161 | if not os.path.isdir(dirname): 162 | os.makedirs(dirname) 163 | with open(save_name, "w") as fd: 164 | file_name = os.path.basename(save_name)[:-4] + "\n" 165 | bboxs_num = str(len(boxes)) + "\n" 166 | fd.write(file_name) 167 | fd.write(bboxs_num) 168 | for box in boxes: 169 | fd.write('%d %d %d %d %.03f' % (box[0], box[1], box[2], box[3], box[4] if box[4] <= 1 else 1) + '\n') 170 | print('done.') 171 | -------------------------------------------------------------------------------- /torchserve/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/torchserve/__init__.py -------------------------------------------------------------------------------- /torchserve/api.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("./") 3 | import cv2 4 | import torch 5 | import io 6 | 7 | from torchserve.qpstest import show_results 8 | from torchserve.client import TorchServe_Local_Simulator, TorchServeClientBase 9 | 10 | 11 | if __name__ == "__main__": 12 | file1 = "data/images/zidane.jpg" 13 | if 0: # 初始化本地模拟API 14 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 15 | model = torch.load("weights/yolov5s-face.pt", map_location=device)['model'] 16 | client = TorchServe_Local_Simulator(model, device, fp16=0) 17 | else: # 初始化torchserve API 18 | client = TorchServeClientBase(url="http://127.0.0.1:8080/predictions/", deployment_name='fd1', grpcFlag=0) 19 | 20 | # 请求服务器 21 | #xywh_ratio, conf, landmarks_ratio = client.batch_inference([img])[0] 22 | 23 | b_img = open(file1, "rb").read() 24 | result = client.post(b_img) 25 | 26 | # 显示结果 27 | img = cv2.imread(file1) # 读取图片 28 | for face in result: 29 | xywh_ratio, conf, landmarks_ratio = face["xywh_ratio"], face["conf"], face["landmarks_ratio"] 30 | show_results(img, xywh_ratio, conf, landmarks_ratio) 31 | cv2.imshow("orgimg", img) 32 | cv2.waitKey() 33 | -------------------------------------------------------------------------------- /torchserve/client.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import requests 3 | import time 4 | import torch 5 | from concurrent.futures import ThreadPoolExecutor 6 | import json 7 | 8 | import grpc 9 | from torchserve.grpc import inference_pb2, inference_pb2_grpc 10 | 11 | 12 | class Client_Base(object): 13 | def batch_inference(self, imgList0): 14 | N = len(imgList0) 15 | t1 = time.time() 16 | with ThreadPoolExecutor(max_workers=100) as executor: 17 | res = list(executor.map(self.post, imgList0)) 18 | t2 = time.time() 19 | serverQPS = N/(t2-t1) 20 | print("batchsize:{}, time:{:.3f}, serverQPS:{}".format(N, t2-t1, serverQPS)) 21 | return res 22 | 23 | class TorchServeClientBase(Client_Base): 24 | """ 25 | torchserve 接口 26 | """ 27 | def __init__(self, url="http://127.0.0.1:8080/predictions/", deployment_name='fd1', grpcFlag=1): 28 | self.url = url + deployment_name 29 | self.grpcFlag = grpcFlag 30 | self.model_name = deployment_name 31 | self.grpc_url = self.url.split("/")[2].replace("8080", "7070") 32 | 33 | def post(self, b_imgs, user_params=None): 34 | #b_imgs = pickle.dumps(orgimg) 35 | if self.grpcFlag: 36 | channel = grpc.insecure_channel(self.grpc_url) 37 | stub = inference_pb2_grpc.InferenceAPIsServiceStub(channel) 38 | respond = stub.Predictions(inference_pb2.PredictionsRequest(model_name=self.model_name, input={"data":b_imgs})) 39 | result = json.loads(respond.prediction) 40 | else: 41 | respond = requests.put(url=self.url, headers={}, data=b_imgs) 42 | result = json.loads(respond.content) 43 | return result 44 | 45 | 46 | class TorchServe_Local_Simulator(object): 47 | def __init__(self, model, device, fp16=0): 48 | from torchserve.handler import Yolov5FaceHandler 49 | self.handler = Yolov5FaceHandler(fp16) 50 | self.handler.model = model 51 | self.handler.device = device 52 | 53 | 54 | def batch_inference(self, imgList0): 55 | N = len(imgList0) 56 | t1 = time.time() 57 | imgT, padsizeList = self.handler.preprocess([{"data":img1} for img1 in imgList0]) 58 | t2 = time.time() 59 | preds = self.handler.inference(imgT) # 神经网络 60 | torch.cuda.synchronize() 61 | t3 = time.time() 62 | preds = self.handler.postprocess(preds, padsizeList) # 解码和NMS 63 | torch.cuda.synchronize() 64 | t4 = time.time() 65 | packList = [json.loads(pred) for pred in preds] 66 | serverQPS = N/(t4-t1) 67 | print("batchsize:{}, serverQPS:{}".format(N, serverQPS)) 68 | print("Gpre:{:.3f}, model:{:.3f}, Gpost:{:.3f}".format(t2-t1,t3-t2,t4-t3)) 69 | return packList -------------------------------------------------------------------------------- /torchserve/grpc/inference_pb2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by the protocol buffer compiler. DO NOT EDIT! 3 | # source: inference.proto 4 | """Generated protocol buffer code.""" 5 | from google.protobuf import descriptor as _descriptor 6 | from google.protobuf import message as _message 7 | from google.protobuf import reflection as _reflection 8 | from google.protobuf import symbol_database as _symbol_database 9 | # @@protoc_insertion_point(imports) 10 | 11 | _sym_db = _symbol_database.Default() 12 | 13 | 14 | from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 15 | 16 | 17 | DESCRIPTOR = _descriptor.FileDescriptor( 18 | name='inference.proto', 19 | package='org.pytorch.serve.grpc.inference', 20 | syntax='proto3', 21 | serialized_options=b'P\001', 22 | create_key=_descriptor._internal_create_key, 23 | serialized_pb=b'\n\x0finference.proto\x12 org.pytorch.serve.grpc.inference\x1a\x1bgoogle/protobuf/empty.proto\"\xbd\x01\n\x12PredictionsRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\x12N\n\x05input\x18\x03 \x03(\x0b\x32?.org.pytorch.serve.grpc.inference.PredictionsRequest.InputEntry\x1a,\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\"(\n\x12PredictionResponse\x12\x12\n\nprediction\x18\x01 \x01(\x0c\"*\n\x18TorchServeHealthResponse\x12\x0e\n\x06health\x18\x01 \x01(\t2\xf1\x01\n\x14InferenceAPIsService\x12\\\n\x04Ping\x12\x16.google.protobuf.Empty\x1a:.org.pytorch.serve.grpc.inference.TorchServeHealthResponse\"\x00\x12{\n\x0bPredictions\x12\x34.org.pytorch.serve.grpc.inference.PredictionsRequest\x1a\x34.org.pytorch.serve.grpc.inference.PredictionResponse\"\x00\x42\x02P\x01\x62\x06proto3' 24 | , 25 | dependencies=[google_dot_protobuf_dot_empty__pb2.DESCRIPTOR,]) 26 | 27 | 28 | 29 | 30 | _PREDICTIONSREQUEST_INPUTENTRY = _descriptor.Descriptor( 31 | name='InputEntry', 32 | full_name='org.pytorch.serve.grpc.inference.PredictionsRequest.InputEntry', 33 | filename=None, 34 | file=DESCRIPTOR, 35 | containing_type=None, 36 | create_key=_descriptor._internal_create_key, 37 | fields=[ 38 | _descriptor.FieldDescriptor( 39 | name='key', full_name='org.pytorch.serve.grpc.inference.PredictionsRequest.InputEntry.key', index=0, 40 | number=1, type=9, cpp_type=9, label=1, 41 | has_default_value=False, default_value=b"".decode('utf-8'), 42 | message_type=None, enum_type=None, containing_type=None, 43 | is_extension=False, extension_scope=None, 44 | serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), 45 | _descriptor.FieldDescriptor( 46 | name='value', full_name='org.pytorch.serve.grpc.inference.PredictionsRequest.InputEntry.value', index=1, 47 | number=2, type=12, cpp_type=9, label=1, 48 | has_default_value=False, default_value=b"", 49 | message_type=None, enum_type=None, containing_type=None, 50 | is_extension=False, extension_scope=None, 51 | serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), 52 | ], 53 | extensions=[ 54 | ], 55 | nested_types=[], 56 | enum_types=[ 57 | ], 58 | serialized_options=b'8\001', 59 | is_extendable=False, 60 | syntax='proto3', 61 | extension_ranges=[], 62 | oneofs=[ 63 | ], 64 | serialized_start=228, 65 | serialized_end=272, 66 | ) 67 | 68 | _PREDICTIONSREQUEST = _descriptor.Descriptor( 69 | name='PredictionsRequest', 70 | full_name='org.pytorch.serve.grpc.inference.PredictionsRequest', 71 | filename=None, 72 | file=DESCRIPTOR, 73 | containing_type=None, 74 | create_key=_descriptor._internal_create_key, 75 | fields=[ 76 | _descriptor.FieldDescriptor( 77 | name='model_name', full_name='org.pytorch.serve.grpc.inference.PredictionsRequest.model_name', index=0, 78 | number=1, type=9, cpp_type=9, label=1, 79 | has_default_value=False, default_value=b"".decode('utf-8'), 80 | message_type=None, enum_type=None, containing_type=None, 81 | is_extension=False, extension_scope=None, 82 | serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), 83 | _descriptor.FieldDescriptor( 84 | name='model_version', full_name='org.pytorch.serve.grpc.inference.PredictionsRequest.model_version', index=1, 85 | number=2, type=9, cpp_type=9, label=1, 86 | has_default_value=False, default_value=b"".decode('utf-8'), 87 | message_type=None, enum_type=None, containing_type=None, 88 | is_extension=False, extension_scope=None, 89 | serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), 90 | _descriptor.FieldDescriptor( 91 | name='input', full_name='org.pytorch.serve.grpc.inference.PredictionsRequest.input', index=2, 92 | number=3, type=11, cpp_type=10, label=3, 93 | has_default_value=False, default_value=[], 94 | message_type=None, enum_type=None, containing_type=None, 95 | is_extension=False, extension_scope=None, 96 | serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), 97 | ], 98 | extensions=[ 99 | ], 100 | nested_types=[_PREDICTIONSREQUEST_INPUTENTRY, ], 101 | enum_types=[ 102 | ], 103 | serialized_options=None, 104 | is_extendable=False, 105 | syntax='proto3', 106 | extension_ranges=[], 107 | oneofs=[ 108 | ], 109 | serialized_start=83, 110 | serialized_end=272, 111 | ) 112 | 113 | 114 | _PREDICTIONRESPONSE = _descriptor.Descriptor( 115 | name='PredictionResponse', 116 | full_name='org.pytorch.serve.grpc.inference.PredictionResponse', 117 | filename=None, 118 | file=DESCRIPTOR, 119 | containing_type=None, 120 | create_key=_descriptor._internal_create_key, 121 | fields=[ 122 | _descriptor.FieldDescriptor( 123 | name='prediction', full_name='org.pytorch.serve.grpc.inference.PredictionResponse.prediction', index=0, 124 | number=1, type=12, cpp_type=9, label=1, 125 | has_default_value=False, default_value=b"", 126 | message_type=None, enum_type=None, containing_type=None, 127 | is_extension=False, extension_scope=None, 128 | serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), 129 | ], 130 | extensions=[ 131 | ], 132 | nested_types=[], 133 | enum_types=[ 134 | ], 135 | serialized_options=None, 136 | is_extendable=False, 137 | syntax='proto3', 138 | extension_ranges=[], 139 | oneofs=[ 140 | ], 141 | serialized_start=274, 142 | serialized_end=314, 143 | ) 144 | 145 | 146 | _TORCHSERVEHEALTHRESPONSE = _descriptor.Descriptor( 147 | name='TorchServeHealthResponse', 148 | full_name='org.pytorch.serve.grpc.inference.TorchServeHealthResponse', 149 | filename=None, 150 | file=DESCRIPTOR, 151 | containing_type=None, 152 | create_key=_descriptor._internal_create_key, 153 | fields=[ 154 | _descriptor.FieldDescriptor( 155 | name='health', full_name='org.pytorch.serve.grpc.inference.TorchServeHealthResponse.health', index=0, 156 | number=1, type=9, cpp_type=9, label=1, 157 | has_default_value=False, default_value=b"".decode('utf-8'), 158 | message_type=None, enum_type=None, containing_type=None, 159 | is_extension=False, extension_scope=None, 160 | serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), 161 | ], 162 | extensions=[ 163 | ], 164 | nested_types=[], 165 | enum_types=[ 166 | ], 167 | serialized_options=None, 168 | is_extendable=False, 169 | syntax='proto3', 170 | extension_ranges=[], 171 | oneofs=[ 172 | ], 173 | serialized_start=316, 174 | serialized_end=358, 175 | ) 176 | 177 | _PREDICTIONSREQUEST_INPUTENTRY.containing_type = _PREDICTIONSREQUEST 178 | _PREDICTIONSREQUEST.fields_by_name['input'].message_type = _PREDICTIONSREQUEST_INPUTENTRY 179 | DESCRIPTOR.message_types_by_name['PredictionsRequest'] = _PREDICTIONSREQUEST 180 | DESCRIPTOR.message_types_by_name['PredictionResponse'] = _PREDICTIONRESPONSE 181 | DESCRIPTOR.message_types_by_name['TorchServeHealthResponse'] = _TORCHSERVEHEALTHRESPONSE 182 | _sym_db.RegisterFileDescriptor(DESCRIPTOR) 183 | 184 | PredictionsRequest = _reflection.GeneratedProtocolMessageType('PredictionsRequest', (_message.Message,), { 185 | 186 | 'InputEntry' : _reflection.GeneratedProtocolMessageType('InputEntry', (_message.Message,), { 187 | 'DESCRIPTOR' : _PREDICTIONSREQUEST_INPUTENTRY, 188 | '__module__' : 'inference_pb2' 189 | # @@protoc_insertion_point(class_scope:org.pytorch.serve.grpc.inference.PredictionsRequest.InputEntry) 190 | }) 191 | , 192 | 'DESCRIPTOR' : _PREDICTIONSREQUEST, 193 | '__module__' : 'inference_pb2' 194 | # @@protoc_insertion_point(class_scope:org.pytorch.serve.grpc.inference.PredictionsRequest) 195 | }) 196 | _sym_db.RegisterMessage(PredictionsRequest) 197 | _sym_db.RegisterMessage(PredictionsRequest.InputEntry) 198 | 199 | PredictionResponse = _reflection.GeneratedProtocolMessageType('PredictionResponse', (_message.Message,), { 200 | 'DESCRIPTOR' : _PREDICTIONRESPONSE, 201 | '__module__' : 'inference_pb2' 202 | # @@protoc_insertion_point(class_scope:org.pytorch.serve.grpc.inference.PredictionResponse) 203 | }) 204 | _sym_db.RegisterMessage(PredictionResponse) 205 | 206 | TorchServeHealthResponse = _reflection.GeneratedProtocolMessageType('TorchServeHealthResponse', (_message.Message,), { 207 | 'DESCRIPTOR' : _TORCHSERVEHEALTHRESPONSE, 208 | '__module__' : 'inference_pb2' 209 | # @@protoc_insertion_point(class_scope:org.pytorch.serve.grpc.inference.TorchServeHealthResponse) 210 | }) 211 | _sym_db.RegisterMessage(TorchServeHealthResponse) 212 | 213 | 214 | DESCRIPTOR._options = None 215 | _PREDICTIONSREQUEST_INPUTENTRY._options = None 216 | 217 | _INFERENCEAPISSERVICE = _descriptor.ServiceDescriptor( 218 | name='InferenceAPIsService', 219 | full_name='org.pytorch.serve.grpc.inference.InferenceAPIsService', 220 | file=DESCRIPTOR, 221 | index=0, 222 | serialized_options=None, 223 | create_key=_descriptor._internal_create_key, 224 | serialized_start=361, 225 | serialized_end=602, 226 | methods=[ 227 | _descriptor.MethodDescriptor( 228 | name='Ping', 229 | full_name='org.pytorch.serve.grpc.inference.InferenceAPIsService.Ping', 230 | index=0, 231 | containing_service=None, 232 | input_type=google_dot_protobuf_dot_empty__pb2._EMPTY, 233 | output_type=_TORCHSERVEHEALTHRESPONSE, 234 | serialized_options=None, 235 | create_key=_descriptor._internal_create_key, 236 | ), 237 | _descriptor.MethodDescriptor( 238 | name='Predictions', 239 | full_name='org.pytorch.serve.grpc.inference.InferenceAPIsService.Predictions', 240 | index=1, 241 | containing_service=None, 242 | input_type=_PREDICTIONSREQUEST, 243 | output_type=_PREDICTIONRESPONSE, 244 | serialized_options=None, 245 | create_key=_descriptor._internal_create_key, 246 | ), 247 | ]) 248 | _sym_db.RegisterServiceDescriptor(_INFERENCEAPISSERVICE) 249 | 250 | DESCRIPTOR.services_by_name['InferenceAPIsService'] = _INFERENCEAPISSERVICE 251 | 252 | # @@protoc_insertion_point(module_scope) 253 | -------------------------------------------------------------------------------- /torchserve/grpc/inference_pb2_grpc.py: -------------------------------------------------------------------------------- 1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! 2 | """Client and server classes corresponding to protobuf-defined services.""" 3 | import grpc 4 | 5 | from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 6 | from . import inference_pb2 as inference__pb2 7 | 8 | 9 | class InferenceAPIsServiceStub(object): 10 | """Missing associated documentation comment in .proto file.""" 11 | 12 | def __init__(self, channel): 13 | """Constructor. 14 | 15 | Args: 16 | channel: A grpc.Channel. 17 | """ 18 | self.Ping = channel.unary_unary( 19 | '/org.pytorch.serve.grpc.inference.InferenceAPIsService/Ping', 20 | request_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, 21 | response_deserializer=inference__pb2.TorchServeHealthResponse.FromString, 22 | ) 23 | self.Predictions = channel.unary_unary( 24 | '/org.pytorch.serve.grpc.inference.InferenceAPIsService/Predictions', 25 | request_serializer=inference__pb2.PredictionsRequest.SerializeToString, 26 | response_deserializer=inference__pb2.PredictionResponse.FromString, 27 | ) 28 | 29 | 30 | class InferenceAPIsServiceServicer(object): 31 | """Missing associated documentation comment in .proto file.""" 32 | 33 | def Ping(self, request, context): 34 | """Missing associated documentation comment in .proto file.""" 35 | context.set_code(grpc.StatusCode.UNIMPLEMENTED) 36 | context.set_details('Method not implemented!') 37 | raise NotImplementedError('Method not implemented!') 38 | 39 | def Predictions(self, request, context): 40 | """Predictions entry point to get inference using default model version. 41 | """ 42 | context.set_code(grpc.StatusCode.UNIMPLEMENTED) 43 | context.set_details('Method not implemented!') 44 | raise NotImplementedError('Method not implemented!') 45 | 46 | 47 | def add_InferenceAPIsServiceServicer_to_server(servicer, server): 48 | rpc_method_handlers = { 49 | 'Ping': grpc.unary_unary_rpc_method_handler( 50 | servicer.Ping, 51 | request_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, 52 | response_serializer=inference__pb2.TorchServeHealthResponse.SerializeToString, 53 | ), 54 | 'Predictions': grpc.unary_unary_rpc_method_handler( 55 | servicer.Predictions, 56 | request_deserializer=inference__pb2.PredictionsRequest.FromString, 57 | response_serializer=inference__pb2.PredictionResponse.SerializeToString, 58 | ), 59 | } 60 | generic_handler = grpc.method_handlers_generic_handler( 61 | 'org.pytorch.serve.grpc.inference.InferenceAPIsService', rpc_method_handlers) 62 | server.add_generic_rpc_handlers((generic_handler,)) 63 | 64 | 65 | # This class is part of an EXPERIMENTAL API. 66 | class InferenceAPIsService(object): 67 | """Missing associated documentation comment in .proto file.""" 68 | 69 | @staticmethod 70 | def Ping(request, 71 | target, 72 | options=(), 73 | channel_credentials=None, 74 | call_credentials=None, 75 | insecure=False, 76 | compression=None, 77 | wait_for_ready=None, 78 | timeout=None, 79 | metadata=None): 80 | return grpc.experimental.unary_unary(request, target, '/org.pytorch.serve.grpc.inference.InferenceAPIsService/Ping', 81 | google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, 82 | inference__pb2.TorchServeHealthResponse.FromString, 83 | options, channel_credentials, 84 | insecure, call_credentials, compression, wait_for_ready, timeout, metadata) 85 | 86 | @staticmethod 87 | def Predictions(request, 88 | target, 89 | options=(), 90 | channel_credentials=None, 91 | call_credentials=None, 92 | insecure=False, 93 | compression=None, 94 | wait_for_ready=None, 95 | timeout=None, 96 | metadata=None): 97 | return grpc.experimental.unary_unary(request, target, '/org.pytorch.serve.grpc.inference.InferenceAPIsService/Predictions', 98 | inference__pb2.PredictionsRequest.SerializeToString, 99 | inference__pb2.PredictionResponse.FromString, 100 | options, channel_credentials, 101 | insecure, call_credentials, compression, wait_for_ready, timeout, metadata) 102 | -------------------------------------------------------------------------------- /torchserve/handler.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['OPENBLAS_NUM_THREADS'] = '1' 3 | os.environ["MKL_NUM_THREADS"] = "1" 4 | os.environ["NUMEXPR_NUM_THREADS"] = "1" 5 | os.environ["OMP_NUM_THREADS"] = "1" 6 | 7 | from ts.torch_handler.base_handler import BaseHandler 8 | #from ts.utils.util import load_label_mapping 9 | import time 10 | import json 11 | import torch 12 | import torch.nn 13 | import torch.backends.cudnn as cudnn 14 | cudnn.enabled = True 15 | cudnn.benchmark = True 16 | torch.set_num_threads(1) 17 | 18 | import numpy as np 19 | import torchvision 20 | import cv2 21 | import logging 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | 26 | 27 | 28 | 29 | def xyxy2xywh(x): 30 | # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right 31 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 32 | y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center 33 | y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center 34 | y[:, 2] = x[:, 2] - x[:, 0] # width 35 | y[:, 3] = x[:, 3] - x[:, 1] # height 36 | return y 37 | 38 | def preprocess_client(b_img, img_size=320, stride_max=32): 39 | #t1 = time.time() 40 | orgimg = None 41 | try: 42 | if isinstance(b_img, (bytes, bytearray)): 43 | orgimg = cv2.imdecode(np.frombuffer(b_img, np.uint8), cv2.IMREAD_COLOR) 44 | # 40ms on 2000*1000 45 | else: # elif isinstance(np.ndarray) 46 | orgimg = b_img 47 | h0, w0 = orgimg.shape[:2] 48 | except Exception as e: 49 | print(e) 50 | logger.error("failed to load image") 51 | orgimg = np.zeros([img_size, img_size, 3], np.uint8) 52 | h0, w0 = img_size, img_size 53 | assert((img_size%stride_max)==0) 54 | 55 | padh, padw = 0, 0 56 | 57 | #t2 = time.time() 58 | if max(h0, w0)>img_size: 59 | if h0>w0: 60 | s = img_size / h0 61 | h1, w1 = img_size, int(w0*s//2*2) 62 | else: 63 | s = img_size / w0 64 | h1, w1 = int(h0*s//2*2), img_size 65 | img = cv2.resize(orgimg, (w1, h1), interpolation=cv2.INTER_LINEAR) 66 | else: 67 | h1, w1 = h0, w0 68 | img = orgimg.copy() 69 | #t3 = time.time() 70 | padw = (img_size - w1)//2 71 | padh = (img_size - h1)//2 72 | pad = np.ones([h1, padw, 3], np.uint8)*128 73 | img = np.hstack([pad, img, pad]) 74 | pad = np.ones([padh, img_size, 3], np.uint8)*128 75 | img = np.vstack([pad, img, pad]) 76 | 77 | assert(img.shape[0]==img_size and img.shape[1]==img_size) 78 | img = img[:, :, ::-1].transpose(2, 0, 1).copy() # BGR to RGB, to 3x416x416 79 | #img = img.transpose(2, 0, 1).copy() # RGB to 3x416x416 80 | padsize = np.array([padw, padh]) 81 | #t4 = time.time() 82 | #print("prec, read:{:.4f}, resize:{:.4f}, pad:{:.4f}".format(t2-t1, t3-t2, t4-t3)) # 83 | return img, padsize 84 | 85 | 86 | def preprocess_server(imgList, device, fp16=0): 87 | if isinstance(imgList[0], np.ndarray): 88 | img = np.stack(imgList, axis=0) 89 | imgT = torch.from_numpy(img).to(device) 90 | if fp16: 91 | imgT = imgT.half() 92 | else: 93 | imgT = imgT.float() 94 | if imgT.ndimension() == 3: 95 | imgT = imgT.unsqueeze(0) 96 | imgT /= 255.0 # 0 - 255 to 0.0 - 1.0 97 | else: 98 | imgT = torch.stack(imgList, dim=0).to(device) 99 | return imgT 100 | 101 | 102 | def postprocess_client(pack, img_size=320): 103 | #t1 = time.time() 104 | dets, padsize = pack 105 | #dets = nms(dets).numpy() 106 | N = len(dets) 107 | if N==0: 108 | return json.dumps([]) 109 | imgsz1 = np.array([img_size, img_size]) - padsize * 2 110 | xyxy = dets[:,:4] - np.tile(padsize, 2)[None] 111 | conf = dets[:,4] 112 | landmarks = dets[:, 5:15] - np.tile(padsize, 5)[None] 113 | 114 | xyxy_ratio = xyxy / np.tile(imgsz1, 2)[None] 115 | landmarks_ratio = (landmarks / np.tile(imgsz1, 5)[None]).round(3) 116 | xywh_ratio = xyxy2xywh(xyxy_ratio).round(3) 117 | result = [{"xywh_ratio": xywh_ratio[i].tolist(), "conf": conf[i].tolist(), "landmarks_ratio":landmarks_ratio[i].tolist()} for i in range(N)] 118 | 119 | #t2 = time.time() 120 | result = json.dumps(result) # slow if many faces 121 | #t3 = time.time() 122 | #print(t2-t1, t3-t2) 123 | return result 124 | 125 | 126 | 127 | def postprocess_server(x): 128 | x = decode(x) 129 | #x = x.cpu() 130 | print(x.shape) 131 | x = non_max_suppression_face(x) 132 | x = [xx.cpu().numpy() for xx in x] 133 | print(x[0].shape) 134 | return x 135 | 136 | 137 | # handler 138 | 139 | def xywh2xyxy(x): 140 | # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right 141 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 142 | y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x 143 | y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y 144 | y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x 145 | y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y 146 | return y 147 | 148 | def box_iou(box1, box2): 149 | # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py 150 | """ 151 | Return intersection-over-union (Jaccard index) of boxes. 152 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 153 | Arguments: 154 | box1 (Tensor[N, 4]) 155 | box2 (Tensor[M, 4]) 156 | Returns: 157 | iou (Tensor[N, M]): the NxM matrix containing the pairwise 158 | IoU values for every element in boxes1 and boxes2 159 | """ 160 | 161 | def box_area(box): 162 | # box = 4xn 163 | return (box[2] - box[0]) * (box[3] - box[1]) 164 | 165 | area1 = box_area(box1.T) 166 | area2 = box_area(box2.T) 167 | 168 | # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) 169 | inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - 170 | torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) 171 | # iou = inter / (area1 + area2 - inter) 172 | return inter / (area1[:, None] + area2 - inter) 173 | 174 | def nms(x, conf_thres=0.3, iou_thres=0.5): 175 | x[:,4]*=x[:,15] 176 | i = x[:,4] > conf_thres 177 | x = x[i] # confidence 178 | if x.shape[0]==0: 179 | return x 180 | boxes = xywh2xyxy(x[:, :4]) 181 | scores = x[:,4] 182 | i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS 183 | x = torch.cat((boxes, scores[:,None], x[:, 5:15], torch.ones_like(scores[:,None])), 1) 184 | return x[i] 185 | 186 | def non_max_suppression_face(prediction, conf_thres=0.3, iou_thres=0.5): 187 | """Performs Non-Maximum Suppression (NMS) on inference results 188 | Returns: 189 | detections with shape: nx6 (x1, y1, x2, y2, conf, cls) 190 | """ 191 | #prediction = prediction.cpu() 192 | output = [] 193 | for x in prediction: # image index, image inference 194 | x = nms(x) 195 | output.append(x) 196 | return output 197 | 198 | def make_grid(nx=20, ny=20): 199 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 200 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 201 | 202 | def decode(x, stride=[8,16,32], nc=1, 203 | anchors=([4,5, 8,10, 13,16], [23,29, 43,55, 73,105], [146,217, 231,300, 335,433]) # yolov5s 204 | ): 205 | device = x[0].device 206 | no = nc + 5 + 10 # number of outputs per anchor 207 | nl = 3 208 | grid = [torch.zeros(1)] * nl # init grid 209 | a = torch.tensor(anchors).float().view(nl, -1, 2) 210 | anchors = a.to(device) 211 | anchor_grid = a.clone().view(nl, 1, -1, 1, 1, 2).to(device) # shape(nl,1,na,1,1,2) 212 | z = [] 213 | for i in range(nl): 214 | bs, _, ny, nx, _ = x[i].shape 215 | #if self.grid[i].shape[2:4] != x[i].shape[2:4]: 216 | grid = make_grid(nx, ny).to(device) 217 | 218 | y = torch.full_like(x[i], 0) 219 | class_range = list(range(5)) + list(range(15,15+nc)) 220 | y[..., class_range] = x[i][..., class_range].sigmoid() 221 | y[..., 5:15] = x[i][..., 5:15] 222 | 223 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + grid) * stride[i] # xy 224 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * anchor_grid[i] # wh 225 | y[..., 5:15] = y[..., 5:15] * anchor_grid[i].tile(5) + (grid * stride[i]).tile(5) 226 | z.append(y.view(bs, -1, no)) 227 | return torch.cat(z, 1) 228 | 229 | class Yolov5FaceHandler(BaseHandler): 230 | """ 231 | Ref. https://github.com/pytorch/serve/blob/master/docs/custom_service.md 232 | """ 233 | def __init__(self, fp16=1): 234 | super().__init__() 235 | self.fp16=fp16 236 | import multiprocessing 237 | import os 238 | self.pool = multiprocessing.Pool(os.cpu_count()) 239 | 240 | def initialize(self, context): 241 | serialized_file = context.manifest["model"]["serializedFile"] 242 | if serialized_file.split(".")[-1] == "torch2trt": 243 | self._load_torchscript_model = self._load_torch2trt_model # overwrite load model function 244 | super().initialize(context) 245 | 246 | def _load_torch2trt_model(self, torch2trt_path): 247 | logger.info("Loading torch2trt model") 248 | from torch2trt import TRTModule 249 | model_trt = TRTModule() 250 | model_trt.load_state_dict(torch.load(torch2trt_path)) 251 | self.fp16=0 252 | self.pool = None 253 | return model_trt 254 | 255 | def preprocess(self, data): 256 | """不得不在GPU服务器执行的依赖CPU的前处理,一般包括数据解压和图片预处理""" 257 | taskList = [dict1.get("data") or dict1.get("body") for dict1 in data] 258 | if self.pool is None: 259 | packList = [preprocess_client(pack) for pack in taskList] 260 | else: 261 | packList = self.pool.map(preprocess_client, taskList) 262 | imageList, padsizeList = zip(*packList) 263 | #imageList = [pickle.loads(task) for task in taskList] 264 | data = preprocess_server(imageList, self.device, self.fp16) 265 | return data, padsizeList 266 | 267 | def inference(self, x): 268 | """gpu inference part, 这部分torchserve会自动调度batch size使qps最大化""" 269 | with torch.no_grad(): 270 | x = self.model(x) 271 | return x 272 | 273 | def postprocess(self, preds, padsizeList): 274 | """不得不在GPU服务器执行的依赖CPU的后处理,一般包括nms和数据压缩""" 275 | t1 = time.time() 276 | res = postprocess_server(preds) 277 | torch.cuda.synchronize() 278 | t2 = time.time() 279 | taskList = [(dets, padsizeList[i]) for i, dets in enumerate(res)] 280 | if self.pool is None: 281 | res = [postprocess_client(task) for task in taskList] 282 | else: 283 | res = self.pool.map(postprocess_client, taskList) 284 | t3 = time.time() 285 | print("posts:{:.4f}, postc:{:.4f}".format(t2-t1, t3-t2)) 286 | return res 287 | 288 | def handle(self, data, context): 289 | start_time = time.time() 290 | 291 | self.context = context 292 | metrics = self.context.metrics 293 | 294 | data_preprocess, padsizeList = self.preprocess(data) 295 | 296 | if not self._is_explain(): 297 | output = self.inference(data_preprocess) 298 | output = self.postprocess(output, padsizeList) 299 | else: 300 | output = self.explain_handle(data_preprocess, data) 301 | 302 | stop_time = time.time() 303 | metrics.add_time('HandlerTime', round((stop_time - start_time) * 1000, 2), None, 'ms') 304 | return output 305 | -------------------------------------------------------------------------------- /torchserve/model_repack.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.append("./") 3 | import torch 4 | import argparse 5 | from models.experimental import attempt_load 6 | 7 | def pth2pt(model_pth, ptPath=None, image_size=320, fp16=0): 8 | """标准pytorch模型转换成torchscript模型""" 9 | # ignore TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. 10 | # https://stackoverflow.com/questions/66746307/torch-jit-trace-tracerwarning-converting-a-tensor-to-a-python-boolean-might-c 11 | imgT = (torch.zeros([1,3,image_size,image_size], dtype=torch.float32)).to(device) 12 | if fp16: 13 | imgT = imgT.half() 14 | model_pth.half() 15 | #print(model_pth.state_dict().keys()) 16 | model = torch.jit.trace(model_pth, imgT) 17 | model.save(ptPath) 18 | 19 | def pt2mar(ptPath, modelFile=None, marName="jit_fd1"): 20 | """所有相关资源打包为torchserve需要的.mar文件""" 21 | marDir = "./torchserve/model_store/" 22 | if not os.path.isdir(marDir): 23 | os.mkdir(marDir) 24 | handlerFile = "./torchserve/handler.py" 25 | extraFiles = [] 26 | 27 | tma_path = "torch-model-archiver " 28 | # 确保当前环境安装了torch-model-archiver 29 | command = tma_path \ 30 | + " --model-name " + marName \ 31 | + " --version 1.0 " \ 32 | + " --serialized-file " + ptPath \ 33 | + " --export-path " + marDir \ 34 | + " --handler " + handlerFile 35 | if len(extraFiles)>0: 36 | command += " --extra-files " + ",".join(extraFiles) 37 | if modelFile is not None: 38 | command += "--model-file " + modelFile 39 | command += " -f " 40 | print("Run command:\n", command) 41 | os.system(command) 42 | print("Info:Exported [%s] to [%s]\n"%(marName, marDir)) 43 | #os.remove(ptPath)#删除中间生成的pt模型文件 44 | 45 | def pth2mar(model_pth, pthPath): 46 | torch.save(model_pth.state_dict(), pthPath) 47 | pt2mar(pthPath) 48 | 49 | def pth2trt(model_pth, torch2trtPath, fp16, image_size=320): 50 | from torch2trt import torch2trt 51 | import tensorrt as trt 52 | print("torch2trt, may take 1 minute...") 53 | x = torch.ones(1, 3, image_size, image_size).to(device) 54 | model_pth.float() 55 | model_trt = torch2trt(model_pth, [x], fp16_mode=fp16, 56 | log_level=trt.Logger.INFO, 57 | max_workspace_size=(1 << 32),) 58 | #能被torch2trt.TRTModule导入的pytorch模型 59 | pred = model_trt(x) 60 | torch.save(model_trt.state_dict(), torch2trtPath) 61 | 62 | 63 | if __name__ == "__main__": 64 | parser = argparse.ArgumentParser() 65 | parser.add_argument('--fp16', type=int, default=1, help='fp16 inference') 66 | parser.add_argument('--trt', type=int, default=1, help='pack with torch2trt model, otherwise with torchscript model') 67 | parser.add_argument('--trt_rebuild', type=int, default=1, help='rebuild torch2trt model') 68 | args = parser.parse_args() 69 | 70 | pthPath0 = "weights/yolov5s-face.pth" 71 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 72 | model_pth = attempt_load(pthPath0, map_location=device, fp16=args.fp16) 73 | ptPath = "weights/yolov5s-jit.pt" 74 | if args.trt: # torch2trt 75 | torch2trtPath = "./weights/yolov5s-face.torch2trt" 76 | if args.trt_rebuild: 77 | pth2trt(model_pth, torch2trtPath, args.fp16) 78 | pt2mar(torch2trtPath, marName="trt_fd1") 79 | else: # jit 80 | pth2pt(model_pth, ptPath, fp16=args.fp16) 81 | pt2mar(ptPath, marName="jit_fd1") 82 | -------------------------------------------------------------------------------- /torchserve/qpstest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | import sys 3 | sys.path.append("./") 4 | import argparse 5 | import time 6 | 7 | import cv2 8 | import torch 9 | import torch.backends.cudnn as cudnn 10 | import numpy as np 11 | import json 12 | 13 | from models.experimental import attempt_load 14 | from torchserve.client import TorchServe_Local_Simulator, TorchServeClientBase 15 | 16 | 17 | def show_results(img, xywh, conf, landmarks): 18 | h,w,c = img.shape 19 | tl = 1 or round(0.002 * (h + w) / 2) + 1 # line/font thickness 20 | x1 = int(xywh[0] * w - 0.5 * xywh[2] * w) 21 | y1 = int(xywh[1] * h - 0.5 * xywh[3] * h) 22 | x2 = int(xywh[0] * w + 0.5 * xywh[2] * w) 23 | y2 = int(xywh[1] * h + 0.5 * xywh[3] * h) 24 | cv2.rectangle(img, (x1,y1), (x2, y2), (0,255,0), thickness=tl, lineType=cv2.LINE_AA) 25 | 26 | clors = [(255,0,0),(0,255,0),(0,0,255),(255,255,0),(0,255,255)] 27 | 28 | for i in range(5): 29 | point_x = int(landmarks[2 * i] * w) 30 | point_y = int(landmarks[2 * i + 1] * h) 31 | cv2.circle(img, (point_x, point_y), tl+1, clors[i], -1) 32 | 33 | tf = max(tl - 1, 1) # font thickness 34 | label = str(conf)[:5] 35 | cv2.putText(img, label, (x1, y1 - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) 36 | return img 37 | 38 | if __name__ == '__main__': 39 | parser = argparse.ArgumentParser() 40 | parser.add_argument('--weights', nargs='+', type=str, default='weights/yolov5s-face.pt', help='model.pt path(s)') 41 | parser.add_argument('--image', type=str, default='data/images/head.jpg', help='source') # file/folder, 0 for webcam 42 | parser.add_argument('--img-size', type=int, default=320, help='inference size (pixels)') 43 | parser.add_argument('--fp16', type=int, default=0, help='fp16 inference') 44 | parser.add_argument('--vis', type=int, default=0, help='visualization') 45 | parser.add_argument('--mode', type=int, default=1, choices=[1,2,3], help='test mode') 46 | 47 | opt = parser.parse_args() 48 | print(opt) 49 | img_size = opt.img_size 50 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 51 | 52 | if opt.mode ==1: 53 | print("testing pytorch local inference") 54 | #model = attempt_load(opt.weights, map_location=device, fp16=opt.fp16) 55 | model = torch.load(opt.weights, map_location=device)['model'] 56 | client = TorchServe_Local_Simulator(model, device, fp16=opt.fp16) 57 | elif opt.mode==2: 58 | print("testing tensorrt local inference") 59 | opt.weights = opt.weights.split(".")[0] + ".torch2trt" 60 | from torch2trt import TRTModule 61 | torch2trt_path = "weights/yolov5s-face.torch2trt" #torch2trt模型路径 62 | model = TRTModule() 63 | model.load_state_dict(torch.load(torch2trt_path)) 64 | client = TorchServe_Local_Simulator(model, device, fp16=0) 65 | else: # opt.mode==3 66 | print("testing torchserve inference") 67 | client = TorchServeClientBase(url="http://127.0.0.1:8080/predictions/", deployment_name='fd1', grpcFlag=1) 68 | 69 | b_img = open(opt.image, "rb").read() 70 | if 1: 71 | orgimg = cv2.imdecode(np.frombuffer(b_img, np.uint8), cv2.IMREAD_COLOR) # BGR 72 | assert orgimg is not None, 'Image Not Found ' + opt.image 73 | result = client.batch_inference([b_img])[0] # omit cold start 74 | 75 | if not opt.vis: 76 | for batchsize in [1,1,1,1,4,16,32,64,128]: 77 | #b_img = bytes('', encoding = 'utf-8') 78 | result = client.batch_inference([b_img]*batchsize)[0] 79 | else: 80 | for face in result: 81 | xywh_ratio, conf, landmarks_ratio = face["xywh_ratio"], face["conf"], face["landmarks_ratio"] 82 | show_results(orgimg, xywh_ratio, conf, landmarks_ratio) 83 | print('det size:', img_size) 84 | print('orgimg.shape: ', orgimg.shape) 85 | 86 | cv2.imwrite('result.jpg', orgimg) 87 | cv2.imshow("orgimg", orgimg) 88 | cv2.waitKey() 89 | 90 | -------------------------------------------------------------------------------- /torchserve/readme_torchserve_depricated.md: -------------------------------------------------------------------------------- 1 | # Serve Yolov5face Pytorch Model on TorchServe 2 | 模型:yolov5s 3 | 神经网络inference框架:TorchScript 4 | 服务框架:Torchserve 5 | 6 | ## install (ubuntu) 7 | ``` 8 | pip install -r requirements 9 | ``` 10 | install java11 dependence. https://www.ubuntu18.com/ubuntu-install-openjdk-11/ 11 | On cloud server, if cuda version is different from cuda10.2, manually edit the pytorch version in requirements.txt 12 | https://pytorch.org/get-started/locally/ 13 | 14 | ### Download model file 15 | download 50M file **yolov5s** https://drive.google.com/file/d/1zxaHeLDyID9YU4-hqK7KNepXIwbTkRIO/view?usp=sharing 16 | unzip to folder 17 | ``` 18 | weights/yolov5s-face.pt 19 | ``` 20 | 21 | ## Quickstart 22 | ### Start Server and Register Model 23 | 1. pack models and python code to torchserve .mar format 24 | ``` 25 | python ./torchserve/model_repack.py 26 | ``` 27 | will generate a file "./torchserve/model_store/jit_fd1.mar". 28 | - start server 29 | ``` 30 | torchserve --start --ncs --model-store ./torchserve/model_store/ 31 | ``` 32 | - localhost register model 33 | ``` 34 | curl -X POST "127.0.0.1:8081/models?url=jit_fd1.mar&batch_size=32&max_batch_delay=2&initial_workers=2&model_name=fd1" 35 | ``` 36 | 37 | ### API example 38 | ` 39 | python torchserve/api.py 40 | ` 41 | 42 | or 43 | ` 44 | curl 127.0.0.1:8080/predictions/fd1 -T ./data/images/zidane.jpg 45 | ` 46 | ### QPS test on torchserve 47 | ``` 48 | python torchserve/qpstest.py --mode 2 49 | ``` 50 | QPS test on local model 51 | ``` 52 | python torchserve/qpstest.py --mode 1 53 | ``` 54 | ## other torchserve commands 55 | unregister model 56 | ``` 57 | curl -X DELETE http://localhost:8081/models/fd1 58 | ``` 59 | check model status 60 | ``` 61 | curl 127.0.0.1:8081/models 62 | curl 127.0.0.1:8081/models/fd1 63 | ``` 64 | stop server 65 | ``` 66 | torchserve --stop 67 | ``` 68 | ## log dir 69 | ` 70 | ./logs/ 71 | ` 72 | 73 | ## Reference 74 | torchserve documentations 75 | https://github.com/pytorch/serve 76 | torchserve + docker 77 | https://github.com/pytorch/serve#quick-start-with-docker 78 | -------------------------------------------------------------------------------- /torchserve/resize_client/client.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import requests 3 | import time 4 | import torch 5 | from concurrent.futures import ThreadPoolExecutor 6 | 7 | import grpc 8 | from torchserve.grpc import inference_pb2, inference_pb2_grpc 9 | from torchserve.client_utils import preprocess_client, postprocess_client 10 | 11 | 12 | class Client_Base(object): 13 | def batch_inference(self, imgList0): 14 | N = len(imgList0) 15 | t1 = time.time() 16 | with ThreadPoolExecutor(max_workers=100) as executor: 17 | res = list(executor.map(self.post, imgList0)) 18 | t2 = time.time() 19 | serverQPS = N/(t2-t1) 20 | print("batchsize:{}, time:{:.3f}, serverQPS:{}".format(N, t2-t1, serverQPS)) 21 | return res 22 | 23 | class TorchServeClientBase(Client_Base): 24 | """ 25 | torchserve 接口 26 | """ 27 | def __init__(self, img_size, stride_max=32, url="http://127.0.0.1:8080/predictions/", deployment_name='fd1', grpcFlag=1): 28 | self.url = url + deployment_name 29 | self.grpcFlag = grpcFlag 30 | self.model_name = deployment_name 31 | self.grpc_url = self.url.split("/")[2].replace("8080", "7070") 32 | self.img_size = img_size 33 | self.stride_max = stride_max 34 | 35 | def post(self, orgimg, user_params=None): 36 | img, padsize = preprocess_client(orgimg, self.img_size, self.stride_max) 37 | b_imgs = pickle.dumps(img) 38 | if self.grpcFlag: 39 | channel = grpc.insecure_channel(self.grpc_url) 40 | stub = inference_pb2_grpc.InferenceAPIsServiceStub(channel) 41 | respond = stub.Predictions(inference_pb2.PredictionsRequest(model_name=self.model_name, input={"data":b_imgs})) 42 | result = pickle.loads(respond.prediction) 43 | else: 44 | respond = requests.put(url=self.url, headers={}, data=b_imgs) 45 | result = pickle.loads(respond.content) 46 | 47 | result = postprocess_client(result, padsize, self.img_size) 48 | return result 49 | 50 | 51 | class TorchServe_Local_Simulator(object): 52 | def __init__(self, model, img_size, device, fp16=0): 53 | from torchserve.handler import Yolov5FaceHandler 54 | self.handler = Yolov5FaceHandler(fp16) 55 | self.handler.model = model 56 | self.handler.device = device 57 | self.stride_max = 32 58 | self.img_size = img_size 59 | 60 | def batch_inference(self, imgList0): 61 | N = len(imgList0) 62 | t1 = time.time() 63 | packs = [preprocess_client(orgimg, self.img_size, self.stride_max) for orgimg in imgList0] # client端多进程,大图resize缩小 64 | #imgList1, imgsz1List, imgsz0List = zip(*packs) 65 | imgList1, padsizeList = zip(*packs) 66 | t2 = time.time() 67 | imgT = self.handler.preprocess([{"data":pickle.dumps(img1)} for img1 in imgList1]) # 图片padding到特定尺寸,batching 68 | t3 = time.time() 69 | preds = self.handler.inference(imgT) # 神经网络 70 | torch.cuda.synchronize() 71 | t4 = time.time() 72 | preds = self.handler.postprocess(preds) # 解码和NMS 73 | torch.cuda.synchronize() 74 | t5 = time.time() 75 | preds = [pickle.loads(pred) for pred in preds] 76 | #packList = [postprocess_client(dets, imgsz0List[i], imgsz1List[i]) for i, dets in enumerate(preds)] # 还原到原图坐标 77 | packList = [postprocess_client(dets, padsizeList[i], self.img_size) for i, dets in enumerate(preds)] # 还原到原图坐标 78 | t6 = time.time() 79 | serverQPS = N/(t5-t2) 80 | print("batchsize:{}, pre:{:.3f}, server:{:.3f}, post:{:.3f}, serverQPS:{}".format(N, t2-t1,t5-t2,t6-t5, serverQPS)) 81 | print("Gpre:{:.3f}, model:{:.3f}, Gpost:{:.3f}".format(t3-t2,t4-t3,t5-t4)) 82 | return packList -------------------------------------------------------------------------------- /torchserve/resize_client/client_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import cv2 4 | 5 | from utils.datasets import letterbox 6 | from utils.general import check_img_size, non_max_suppression_face 7 | 8 | 9 | def xyxy2xywh(x): 10 | # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right 11 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 12 | y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center 13 | y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center 14 | y[:, 2] = x[:, 2] - x[:, 0] # width 15 | y[:, 3] = x[:, 3] - x[:, 1] # height 16 | return y 17 | 18 | 19 | def preprocess_client(orgimg, img_size=320, stride_max=32): 20 | assert((img_size%stride_max)==0) 21 | h0, w0 = orgimg.shape[:2] 22 | padh, padw = 0, 0 23 | 24 | if max(h0, w0)>img_size: 25 | if h0>w0: 26 | s = img_size / h0 27 | h1, w1 = img_size, int(w0*s//2*2) 28 | padw = (img_size - w1)//2 29 | else: 30 | s = img_size / w0 31 | h1, w1 = int(h0*s//2*2), img_size 32 | padh = (img_size - h1)//2 33 | img = cv2.resize(orgimg, (w1, h1), interpolation=cv2.INTER_LINEAR) 34 | if h0>w0: 35 | pad = np.ones([h1, padw, 3], np.uint8)*128 36 | img = np.hstack([pad, img, pad]) 37 | else: 38 | pad = np.ones([padh, w1, 3], np.uint8)*128 39 | img = np.vstack([pad, img, pad]) 40 | assert(img.shape[0]==img_size and img.shape[1]==img_size) 41 | img = img[:, :, ::-1].transpose(2, 0, 1).copy() # BGR to RGB, to 3x416x416 42 | padsize = np.array([padw, padh]) 43 | return img, padsize 44 | 45 | 46 | def postprocess_client(dets, padsize, img_size=320): 47 | if len(dets)==0: 48 | return [],[],[],[] 49 | imgsz1 = np.array([img_size, img_size]) - padsize * 2 50 | xyxy = dets[:,:4] - np.tile(padsize, 2)[None] 51 | conf = dets[:,4] 52 | landmarks = dets[:, 5:15] - np.tile(padsize, 5)[None] 53 | 54 | xyxy_ratio = xyxy / np.tile(imgsz1, 2)[None] 55 | landmarks_ratio = landmarks / np.tile(imgsz1, 5)[None] 56 | xywh_ratio = xyxy2xywh(xyxy_ratio) 57 | return xywh_ratio, conf, landmarks_ratio 58 | 59 | -------------------------------------------------------------------------------- /torchserve/resize_client/client_utils_old.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import cv2 4 | 5 | from utils.datasets import letterbox 6 | from utils.general import check_img_size, non_max_suppression_face 7 | 8 | 9 | def xyxy2xywh(x): 10 | # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right 11 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 12 | y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center 13 | y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center 14 | y[:, 2] = x[:, 2] - x[:, 0] # width 15 | y[:, 3] = x[:, 3] - x[:, 1] # height 16 | return y 17 | 18 | def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): 19 | # Rescale coords (xyxy) from img1_shape to img0_shape 20 | if ratio_pad is None: # calculate from img0_shape 21 | gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new 22 | pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding 23 | else: 24 | gain = ratio_pad[0][0] 25 | pad = ratio_pad[1] 26 | 27 | coords[:, [0, 2]] -= pad[0] # x padding 28 | coords[:, [1, 3]] -= pad[1] # y padding 29 | coords[:, :4] /= gain 30 | #clip_coords(coords, img0_shape) 31 | return coords 32 | 33 | 34 | def scale_coords_landmarks(img1_shape, coords, img0_shape, ratio_pad=None): 35 | # Rescale coords (xyxy) from img1_shape to img0_shape 36 | if ratio_pad is None: # calculate from img0_shape 37 | gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new 38 | pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding 39 | else: 40 | gain = ratio_pad[0][0] 41 | pad = ratio_pad[1] 42 | 43 | coords[:, [0, 2, 4, 6, 8]] -= pad[0] # x padding 44 | coords[:, [1, 3, 5, 7, 9]] -= pad[1] # y padding 45 | coords[:, :10] /= gain 46 | #clip_coords(coords, img0_shape) 47 | return coords 48 | 49 | 50 | 51 | def preprocess_client(orgimg, img_size, model_stride_max): 52 | imgsz0 = orgimg.shape[:2] # orig hw 53 | h0, w0 = imgsz0 54 | r = img_size / max(h0, w0) # resize image to img_size 55 | if r != 1: # always resize down, only resize up if training with augmentation 56 | interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR 57 | img0 = cv2.resize(orgimg, (int(w0 * r), int(h0 * r)), interpolation=interp) 58 | 59 | img1w = check_img_size(img_size, s=model_stride_max) # check img_size 60 | img = letterbox(img0, new_shape=img1w)[0] 61 | imgsz1 = img.shape[:2] 62 | # Convert 63 | img = img[:, :, ::-1].transpose(2, 0, 1).copy() # BGR to RGB, to 3x416x416 64 | return img, imgsz1, imgsz0 65 | 66 | 67 | def postprocess_client(dets, imgsz0, imgsz1): 68 | if len(dets)==0: 69 | return [],[],[],[] 70 | xyxy = dets[:,:4] 71 | conf = dets[:,4] 72 | landmarks = dets[:, 5:15] 73 | xyxy_origin = scale_coords(imgsz1, xyxy, imgsz0).round() # 推算神经网络padding像素,还原到原图坐标,比较复杂 74 | landmarks_origin = scale_coords_landmarks(imgsz1, landmarks, imgsz0).round() 75 | 76 | gn = np.array(imgsz0)[[1, 0, 1, 0]] 77 | gn_lks = np.array(imgsz0)[[1, 0, 1, 0, 1, 0, 1, 0, 1, 0]] 78 | xyxy_ratio = xyxy_origin / gn 79 | landmarks_ratio = landmarks_origin / gn_lks 80 | xywh_ratio = xyxy2xywh(xyxy_ratio) 81 | return xywh_ratio, conf, landmarks_ratio 82 | 83 | -------------------------------------------------------------------------------- /torchserve/resize_client/handler (copy).py: -------------------------------------------------------------------------------- 1 | # 系统import 2 | from ts.torch_handler.base_handler import BaseHandler 3 | import time 4 | import json 5 | import torch 6 | import torch.nn 7 | import torch.backends.cudnn as cudnn 8 | cudnn.enabled = True 9 | cudnn.benchmark = True # distilbert batchsize 完全不变,所以benchmark打开比较快 10 | 11 | import numpy as np 12 | import torchvision 13 | import pickle 14 | 15 | 16 | 17 | # handler 18 | 19 | def xywh2xyxy(x): 20 | # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right 21 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 22 | y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x 23 | y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y 24 | y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x 25 | y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y 26 | return y 27 | 28 | def box_iou(box1, box2): 29 | # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py 30 | """ 31 | Return intersection-over-union (Jaccard index) of boxes. 32 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 33 | Arguments: 34 | box1 (Tensor[N, 4]) 35 | box2 (Tensor[M, 4]) 36 | Returns: 37 | iou (Tensor[N, M]): the NxM matrix containing the pairwise 38 | IoU values for every element in boxes1 and boxes2 39 | """ 40 | 41 | def box_area(box): 42 | # box = 4xn 43 | return (box[2] - box[0]) * (box[3] - box[1]) 44 | 45 | area1 = box_area(box1.T) 46 | area2 = box_area(box2.T) 47 | 48 | # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) 49 | inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - 50 | torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) 51 | # iou = inter / (area1 + area2 - inter) 52 | return inter / (area1[:, None] + area2 - inter) 53 | 54 | def non_max_suppression_face(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()): 55 | """Performs Non-Maximum Suppression (NMS) on inference results 56 | Returns: 57 | detections with shape: nx6 (x1, y1, x2, y2, conf, cls) 58 | """ 59 | 60 | nc = prediction.shape[2] - 15 # number of classes 61 | xc = prediction[..., 4] > conf_thres # candidates 62 | 63 | # Settings 64 | min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height 65 | time_limit = 10.0 # seconds to quit after 66 | redundant = True # require redundant detections 67 | multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) 68 | merge = False # use merge-NMS 69 | 70 | t = time.time() 71 | output = [torch.zeros((0, 16), device=prediction.device)] * prediction.shape[0] 72 | for xi, x in enumerate(prediction): # image index, image inference 73 | # Apply constraints 74 | # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height 75 | x = x[xc[xi]] # confidence 76 | 77 | # Cat apriori labels if autolabelling 78 | if labels and len(labels[xi]): 79 | l = labels[xi] 80 | v = torch.zeros((len(l), nc + 15), device=x.device) 81 | v[:, :4] = l[:, 1:5] # box 82 | v[:, 4] = 1.0 # conf 83 | v[range(len(l)), l[:, 0].long() + 15] = 1.0 # cls 84 | x = torch.cat((x, v), 0) 85 | 86 | # If none remain process next image 87 | if not x.shape[0]: 88 | continue 89 | 90 | # Compute conf 91 | x[:, 15:] *= x[:, 4:5] # conf = obj_conf * cls_conf 92 | 93 | # Box (center x, center y, width, height) to (x1, y1, x2, y2) 94 | box = xywh2xyxy(x[:, :4]) 95 | 96 | # Detections matrix nx6 (xyxy, conf, landmarks, cls) 97 | if multi_label: 98 | i, j = (x[:, 15:] > conf_thres).nonzero(as_tuple=False).T 99 | x = torch.cat((box[i], x[i, j + 15, None], x[:, 5:15] ,j[:, None].float()), 1) 100 | else: # best class only 101 | conf, j = x[:, 15:].max(1, keepdim=True) 102 | x = torch.cat((box, conf, x[:, 5:15], j.float()), 1)[conf.view(-1) > conf_thres] 103 | 104 | # Filter by class 105 | if classes is not None: 106 | x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] 107 | 108 | # If none remain process next image 109 | n = x.shape[0] # number of boxes 110 | if not n: 111 | continue 112 | 113 | # Batched NMS 114 | c = x[:, 15:16] * (0 if agnostic else max_wh) # classes 115 | boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores 116 | i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS 117 | #if i.shape[0] > max_det: # limit detections 118 | # i = i[:max_det] 119 | if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) 120 | # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) 121 | iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix 122 | weights = iou * scores[None] # box weights 123 | x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes 124 | if redundant: 125 | i = i[iou.sum(1) > 1] # require redundancy 126 | 127 | output[xi] = x[i] 128 | if (time.time() - t) > time_limit: 129 | break # time limit exceeded 130 | 131 | return output 132 | 133 | def make_grid(nx=20, ny=20): 134 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 135 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 136 | 137 | def decode(x, stride=[8,16,32], nc=1, 138 | anchors=([4,5, 8,10, 13,16], [23,29, 43,55, 73,105], [146,217, 231,300, 335,433]) # yolov5s 139 | ): 140 | device = x[0].device 141 | no = nc + 5 + 10 # number of outputs per anchor 142 | nl = 3 143 | grid = [torch.zeros(1)] * nl # init grid 144 | a = torch.tensor(anchors).float().view(nl, -1, 2) 145 | anchors = a.to(device) 146 | anchor_grid = a.clone().view(nl, 1, -1, 1, 1, 2).to(device) # shape(nl,1,na,1,1,2) 147 | z = [] 148 | for i in range(nl): 149 | bs, _, ny, nx, _ = x[i].shape 150 | #if self.grid[i].shape[2:4] != x[i].shape[2:4]: 151 | grid = make_grid(nx, ny).to(device) 152 | 153 | y = torch.full_like(x[i], 0) 154 | class_range = list(range(5)) + list(range(15,15+nc)) 155 | y[..., class_range] = x[i][..., class_range].sigmoid() 156 | y[..., 5:15] = x[i][..., 5:15] 157 | 158 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + grid.to(x[i].device)) * stride[i] # xy 159 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * anchor_grid[i] # wh 160 | 161 | y[..., 5:7] = y[..., 5:7] * anchor_grid[i] + grid * stride[i] # landmark x1 y1 162 | y[..., 7:9] = y[..., 7:9] * anchor_grid[i] + grid * stride[i]# landmark x2 y2 163 | y[..., 9:11] = y[..., 9:11] * anchor_grid[i] + grid * stride[i]# landmark x3 y3 164 | y[..., 11:13] = y[..., 11:13] * anchor_grid[i] + grid * stride[i]# landmark x4 y4 165 | y[..., 13:15] = y[..., 13:15] * anchor_grid[i] + grid * stride[i]# landmark x5 y5 166 | 167 | z.append(y.view(bs, -1, no)) 168 | return torch.cat(z, 1) 169 | 170 | def preprocess_server(imgList, device, fp16=0): 171 | img = np.stack(imgList, axis=0) 172 | img = torch.from_numpy(img).to(device) 173 | if img.ndimension() == 3: 174 | img = img.unsqueeze(0) 175 | if fp16: 176 | img = img.half() 177 | else: 178 | img = img.float() 179 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 180 | return img 181 | 182 | def postprocess_server(x): 183 | x = decode(x) 184 | conf_thres = 0.3 185 | iou_thres = 0.5 186 | preds = non_max_suppression_face(x, conf_thres, iou_thres) 187 | preds = [x.cpu().numpy() for x in preds] 188 | return preds 189 | 190 | class Yolov5FaceHandler(BaseHandler): 191 | """ 192 | This handler takes a list of raw text 193 | and returns the tags of each text. 194 | Ref. https://github.com/pytorch/serve/blob/master/docs/custom_service.md 195 | """ 196 | def __init__(self, fp16=0): 197 | super().__init__() 198 | self.fp16=fp16 199 | 200 | def preprocess(self, data): 201 | """不得不在GPU服务器执行的依赖CPU的前处理,一般包括数据解压和图片预处理,这部分torchserve会自动CPU多进程""" 202 | imageList = [] 203 | for dict1 in data: 204 | img1p = dict1.get("data") or dict1.get("body") 205 | img1 = pickle.loads(img1p) 206 | imageList.append(img1) 207 | data = preprocess_server(imageList, self.device, self.fp16) 208 | return data 209 | 210 | def inference(self, x): 211 | """gpu inference part, 这部分torchserve会自动调度batch size使qps最大化""" 212 | with torch.no_grad(): 213 | x = self.model(x) 214 | return x 215 | 216 | def postprocess(self, preds): 217 | """不得不在GPU服务器执行的依赖CPU的后处理,一般包括nms和数据压缩,这部分torchserve会自动CPU多进程""" 218 | res = postprocess_server(preds) 219 | torch.cuda.synchronize() 220 | res = [pickle.dumps(x) for x in res] 221 | return res -------------------------------------------------------------------------------- /torchserve/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # python3 ./torchserve/model_repack.py --trt 1 # 这一步通过nvidia-container-runtime 放在docker build中 4 | SCRIPT_DIR="$(dirname $(readlink -f ${0}))" 5 | ROOT_DIR="$(readlink -f ${SCRIPT_DIR}/..)" 6 | 7 | 8 | torchserve --start --ncs --ts-config ${ROOT_DIR}/config/config.properties --model-store ${ROOT_DIR}/torchserve/model_store/ # start torchserve, log_dir=./logs 9 | 10 | echo "********************************************************" 11 | echo "Waiting for the torchserve to start on port 8080 and 7070" 12 | echo "********************************************************" 13 | while ! `nc -vz localhost 7070`; do sleep 3; done 14 | echo "******* torchserve has started" 15 | 16 | curl -X POST "127.0.0.1:8081/models?url=trt_fd1.mar&batch_size=1&max_batch_delay=2&initial_workers=4&model_name=fd1" # register model 17 | 18 | tail -f /dev/null # 阻止自动退出 19 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/utils/__init__.py -------------------------------------------------------------------------------- /utils/activations.py: -------------------------------------------------------------------------------- 1 | # Activation functions 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | # SiLU https://arxiv.org/pdf/1606.08415.pdf ---------------------------------------------------------------------------- 9 | class SiLU(nn.Module): # export-friendly version of nn.SiLU() 10 | @staticmethod 11 | def forward(x): 12 | return x * torch.sigmoid(x) 13 | 14 | 15 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() 16 | @staticmethod 17 | def forward(x): 18 | # return x * F.hardsigmoid(x) # for torchscript and CoreML 19 | return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX 20 | 21 | 22 | class MemoryEfficientSwish(nn.Module): 23 | class F(torch.autograd.Function): 24 | @staticmethod 25 | def forward(ctx, x): 26 | ctx.save_for_backward(x) 27 | return x * torch.sigmoid(x) 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | x = ctx.saved_tensors[0] 32 | sx = torch.sigmoid(x) 33 | return grad_output * (sx * (1 + x * (1 - sx))) 34 | 35 | def forward(self, x): 36 | return self.F.apply(x) 37 | 38 | 39 | # Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- 40 | class Mish(nn.Module): 41 | @staticmethod 42 | def forward(x): 43 | return x * F.softplus(x).tanh() 44 | 45 | 46 | class MemoryEfficientMish(nn.Module): 47 | class F(torch.autograd.Function): 48 | @staticmethod 49 | def forward(ctx, x): 50 | ctx.save_for_backward(x) 51 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 52 | 53 | @staticmethod 54 | def backward(ctx, grad_output): 55 | x = ctx.saved_tensors[0] 56 | sx = torch.sigmoid(x) 57 | fx = F.softplus(x).tanh() 58 | return grad_output * (fx + x * sx * (1 - fx * fx)) 59 | 60 | def forward(self, x): 61 | return self.F.apply(x) 62 | 63 | 64 | # FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- 65 | class FReLU(nn.Module): 66 | def __init__(self, c1, k=3): # ch_in, kernel 67 | super().__init__() 68 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 69 | self.bn = nn.BatchNorm2d(c1) 70 | 71 | def forward(self, x): 72 | return torch.max(x, self.bn(self.conv(x))) 73 | -------------------------------------------------------------------------------- /utils/autoanchor.py: -------------------------------------------------------------------------------- 1 | # Auto-anchor utils 2 | 3 | import numpy as np 4 | import torch 5 | import yaml 6 | from scipy.cluster.vq import kmeans 7 | from tqdm import tqdm 8 | 9 | from utils.general import colorstr 10 | 11 | 12 | def check_anchor_order(m): 13 | # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary 14 | a = m.anchor_grid.prod(-1).view(-1) # anchor area 15 | da = a[-1] - a[0] # delta a 16 | ds = m.stride[-1] - m.stride[0] # delta s 17 | if da.sign() != ds.sign(): # same order 18 | print('Reversing anchor order') 19 | m.anchors[:] = m.anchors.flip(0) 20 | m.anchor_grid[:] = m.anchor_grid.flip(0) 21 | 22 | 23 | def check_anchors(dataset, model, thr=4.0, imgsz=640): 24 | # Check anchor fit to data, recompute if necessary 25 | prefix = colorstr('autoanchor: ') 26 | print(f'\n{prefix}Analyzing anchors... ', end='') 27 | m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() 28 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) 29 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale 30 | wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh 31 | 32 | def metric(k): # compute metric 33 | r = wh[:, None] / k[None] 34 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 35 | best = x.max(1)[0] # best_x 36 | aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold 37 | bpr = (best > 1. / thr).float().mean() # best possible recall 38 | return bpr, aat 39 | 40 | bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2)) 41 | print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='') 42 | if bpr < 0.98: # threshold to recompute 43 | print('. Attempting to improve anchors, please wait...') 44 | na = m.anchor_grid.numel() // 2 # number of anchors 45 | new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) 46 | new_bpr = metric(new_anchors.reshape(-1, 2))[0] 47 | if new_bpr > bpr: # replace anchors 48 | new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors) 49 | m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid) # for inference 50 | m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss 51 | check_anchor_order(m) 52 | print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.') 53 | else: 54 | print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.') 55 | print('') # newline 56 | 57 | 58 | def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): 59 | """ Creates kmeans-evolved anchors from training dataset 60 | 61 | Arguments: 62 | path: path to dataset *.yaml, or a loaded dataset 63 | n: number of anchors 64 | img_size: image size used for training 65 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 66 | gen: generations to evolve anchors using genetic algorithm 67 | verbose: print all results 68 | 69 | Return: 70 | k: kmeans evolved anchors 71 | 72 | Usage: 73 | from utils.autoanchor import *; _ = kmean_anchors() 74 | """ 75 | thr = 1. / thr 76 | prefix = colorstr('autoanchor: ') 77 | 78 | def metric(k, wh): # compute metrics 79 | r = wh[:, None] / k[None] 80 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 81 | # x = wh_iou(wh, torch.tensor(k)) # iou metric 82 | return x, x.max(1)[0] # x, best_x 83 | 84 | def anchor_fitness(k): # mutation fitness 85 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh) 86 | return (best * (best > thr).float()).mean() # fitness 87 | 88 | def print_results(k): 89 | k = k[np.argsort(k.prod(1))] # sort small to large 90 | x, best = metric(k, wh0) 91 | bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr 92 | print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr') 93 | print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' 94 | f'past_thr={x[x > thr].mean():.3f}-mean: ', end='') 95 | for i, x in enumerate(k): 96 | print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg 97 | return k 98 | 99 | if isinstance(path, str): # *.yaml file 100 | with open(path) as f: 101 | data_dict = yaml.load(f, Loader=yaml.SafeLoader) # model dict 102 | from utils.datasets import LoadImagesAndLabels 103 | dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) 104 | else: 105 | dataset = path # dataset 106 | 107 | # Get label wh 108 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) 109 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh 110 | 111 | # Filter 112 | i = (wh0 < 3.0).any(1).sum() 113 | if i: 114 | print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.') 115 | wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels 116 | # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 117 | 118 | # Kmeans calculation 119 | print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...') 120 | s = wh.std(0) # sigmas for whitening 121 | k, dist = kmeans(wh / s, n, iter=30) # points, mean distance 122 | k *= s 123 | wh = torch.tensor(wh, dtype=torch.float32) # filtered 124 | wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered 125 | k = print_results(k) 126 | 127 | # Plot 128 | # k, d = [None] * 20, [None] * 20 129 | # for i in tqdm(range(1, 21)): 130 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance 131 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True) 132 | # ax = ax.ravel() 133 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') 134 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh 135 | # ax[0].hist(wh[wh[:, 0]<100, 0],400) 136 | # ax[1].hist(wh[wh[:, 1]<100, 1],400) 137 | # fig.savefig('wh.png', dpi=200) 138 | 139 | # Evolve 140 | npr = np.random 141 | f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma 142 | pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:') # progress bar 143 | for _ in pbar: 144 | v = np.ones(sh) 145 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates) 146 | v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) 147 | kg = (k.copy() * v).clip(min=2.0) 148 | fg = anchor_fitness(kg) 149 | if fg > f: 150 | f, k = fg, kg.copy() 151 | pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}' 152 | if verbose: 153 | print_results(k) 154 | 155 | return print_results(k) 156 | -------------------------------------------------------------------------------- /utils/aws/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/utils/aws/__init__.py -------------------------------------------------------------------------------- /utils/aws/mime.sh: -------------------------------------------------------------------------------- 1 | # AWS EC2 instance startup 'MIME' script https://aws.amazon.com/premiumsupport/knowledge-center/execute-user-data-ec2/ 2 | # This script will run on every instance restart, not only on first start 3 | # --- DO NOT COPY ABOVE COMMENTS WHEN PASTING INTO USERDATA --- 4 | 5 | Content-Type: multipart/mixed; boundary="//" 6 | MIME-Version: 1.0 7 | 8 | --// 9 | Content-Type: text/cloud-config; charset="us-ascii" 10 | MIME-Version: 1.0 11 | Content-Transfer-Encoding: 7bit 12 | Content-Disposition: attachment; filename="cloud-config.txt" 13 | 14 | #cloud-config 15 | cloud_final_modules: 16 | - [scripts-user, always] 17 | 18 | --// 19 | Content-Type: text/x-shellscript; charset="us-ascii" 20 | MIME-Version: 1.0 21 | Content-Transfer-Encoding: 7bit 22 | Content-Disposition: attachment; filename="userdata.txt" 23 | 24 | #!/bin/bash 25 | # --- paste contents of userdata.sh here --- 26 | --// 27 | -------------------------------------------------------------------------------- /utils/aws/resume.py: -------------------------------------------------------------------------------- 1 | # Resume all interrupted trainings in yolov5/ dir including DDP trainings 2 | # Usage: $ python utils/aws/resume.py 3 | 4 | import os 5 | import sys 6 | from pathlib import Path 7 | 8 | import torch 9 | import yaml 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | 13 | port = 0 # --master_port 14 | path = Path('').resolve() 15 | for last in path.rglob('*/**/last.pt'): 16 | ckpt = torch.load(last) 17 | if ckpt['optimizer'] is None: 18 | continue 19 | 20 | # Load opt.yaml 21 | with open(last.parent.parent / 'opt.yaml') as f: 22 | opt = yaml.load(f, Loader=yaml.SafeLoader) 23 | 24 | # Get device count 25 | d = opt['device'].split(',') # devices 26 | nd = len(d) # number of devices 27 | ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1) # distributed data parallel 28 | 29 | if ddp: # multi-GPU 30 | port += 1 31 | cmd = f'python -m torch.distributed.launch --nproc_per_node {nd} --master_port {port} train.py --resume {last}' 32 | else: # single-GPU 33 | cmd = f'python train.py --resume {last}' 34 | 35 | cmd += ' > /dev/null 2>&1 &' # redirect output to dev/null and run in daemon thread 36 | print(cmd) 37 | os.system(cmd) 38 | -------------------------------------------------------------------------------- /utils/aws/userdata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # AWS EC2 instance startup script https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html 3 | # This script will run only once on first instance start (for a re-start script see mime.sh) 4 | # /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir 5 | # Use >300 GB SSD 6 | 7 | cd home/ubuntu 8 | if [ ! -d yolov5 ]; then 9 | echo "Running first-time script." # install dependencies, download COCO, pull Docker 10 | git clone https://github.com/ultralytics/yolov5 && sudo chmod -R 777 yolov5 11 | cd yolov5 12 | bash data/scripts/get_coco.sh && echo "Data done." & 13 | sudo docker pull ultralytics/yolov5:latest && echo "Docker done." & 14 | python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." & 15 | wait && echo "All tasks done." # finish background tasks 16 | else 17 | echo "Running re-start script." # resume interrupted runs 18 | i=0 19 | list=$(sudo docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour' 20 | while IFS= read -r id; do 21 | ((i++)) 22 | echo "restarting container $i: $id" 23 | sudo docker start $id 24 | # sudo docker exec -it $id python train.py --resume # single-GPU 25 | sudo docker exec -d $id python utils/aws/resume.py # multi-scenario 26 | done <<<"$list" 27 | fi 28 | -------------------------------------------------------------------------------- /utils/google_app_engine/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/google-appengine/python 2 | 3 | # Create a virtualenv for dependencies. This isolates these packages from 4 | # system-level packages. 5 | # Use -p python3 or -p python3.7 to select python version. Default is version 2. 6 | RUN virtualenv /env -p python3 7 | 8 | # Setting these environment variables are the same as running 9 | # source /env/bin/activate. 10 | ENV VIRTUAL_ENV /env 11 | ENV PATH /env/bin:$PATH 12 | 13 | RUN apt-get update && apt-get install -y python-opencv 14 | 15 | # Copy the application's requirements.txt and run pip to install all 16 | # dependencies into the virtualenv. 17 | ADD requirements.txt /app/requirements.txt 18 | RUN pip install -r /app/requirements.txt 19 | 20 | # Add the application source code. 21 | ADD . /app 22 | 23 | # Run a WSGI server to serve the application. gunicorn must be declared as 24 | # a dependency in requirements.txt. 25 | CMD gunicorn -b :$PORT main:app 26 | -------------------------------------------------------------------------------- /utils/google_app_engine/additional_requirements.txt: -------------------------------------------------------------------------------- 1 | # add these requirements in your app on top of the existing ones 2 | pip==18.1 3 | Flask==1.0.2 4 | gunicorn==19.9.0 5 | -------------------------------------------------------------------------------- /utils/google_app_engine/app.yaml: -------------------------------------------------------------------------------- 1 | runtime: custom 2 | env: flex 3 | 4 | service: yolov5app 5 | 6 | liveness_check: 7 | initial_delay_sec: 600 8 | 9 | manual_scaling: 10 | instances: 1 11 | resources: 12 | cpu: 1 13 | memory_gb: 4 14 | disk_size_gb: 20 -------------------------------------------------------------------------------- /utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | 3 | import os 4 | import platform 5 | import subprocess 6 | import time 7 | from pathlib import Path 8 | 9 | import requests 10 | import torch 11 | 12 | 13 | def gsutil_getsize(url=''): 14 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du 15 | s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8') 16 | return eval(s.split(' ')[0]) if len(s) else 0 # bytes 17 | 18 | 19 | def attempt_download(file, repo='ultralytics/yolov5'): 20 | # Attempt file download if does not exist 21 | file = Path(str(file).strip().replace("'", '').lower()) 22 | 23 | if not file.exists(): 24 | try: 25 | response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json() # github api 26 | assets = [x['name'] for x in response['assets']] # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...] 27 | tag = response['tag_name'] # i.e. 'v1.0' 28 | except: # fallback plan 29 | assets = ['yolov5.pt', 'yolov5.pt', 'yolov5l.pt', 'yolov5x.pt'] 30 | tag = subprocess.check_output('git tag', shell=True).decode('utf-8').split('\n')[-2] 31 | 32 | name = file.name 33 | if name in assets: 34 | msg = f'{file} missing, try downloading from https://github.com/{repo}/releases/' 35 | redundant = False # second download option 36 | try: # GitHub 37 | url = f'https://github.com/{repo}/releases/download/{tag}/{name}' 38 | print(f'Downloading {url} to {file}...') 39 | torch.hub.download_url_to_file(url, file) 40 | assert file.exists() and file.stat().st_size > 1E6 # check 41 | except Exception as e: # GCP 42 | print(f'Download error: {e}') 43 | assert redundant, 'No secondary mirror' 44 | url = f'https://storage.googleapis.com/{repo}/ckpt/{name}' 45 | print(f'Downloading {url} to {file}...') 46 | os.system(f'curl -L {url} -o {file}') # torch.hub.download_url_to_file(url, weights) 47 | finally: 48 | if not file.exists() or file.stat().st_size < 1E6: # check 49 | file.unlink(missing_ok=True) # remove partial downloads 50 | print(f'ERROR: Download failure: {msg}') 51 | print('') 52 | return 53 | 54 | 55 | def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'): 56 | # Downloads a file from Google Drive. from yolov5.utils.google_utils import *; gdrive_download() 57 | t = time.time() 58 | file = Path(file) 59 | cookie = Path('cookie') # gdrive cookie 60 | print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='') 61 | file.unlink(missing_ok=True) # remove existing file 62 | cookie.unlink(missing_ok=True) # remove existing cookie 63 | 64 | # Attempt file download 65 | out = "NUL" if platform.system() == "Windows" else "/dev/null" 66 | os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}') 67 | if os.path.exists('cookie'): # large file 68 | s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}' 69 | else: # small file 70 | s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"' 71 | r = os.system(s) # execute, capture return 72 | cookie.unlink(missing_ok=True) # remove existing cookie 73 | 74 | # Error check 75 | if r != 0: 76 | file.unlink(missing_ok=True) # remove partial 77 | print('Download error ') # raise Exception('Download error') 78 | return r 79 | 80 | # Unzip if archive 81 | if file.suffix == '.zip': 82 | print('unzipping... ', end='') 83 | os.system(f'unzip -q {file}') # unzip 84 | file.unlink() # remove zip to free space 85 | 86 | print(f'Done ({time.time() - t:.1f}s)') 87 | return r 88 | 89 | 90 | def get_token(cookie="./cookie"): 91 | with open(cookie) as f: 92 | for line in f: 93 | if "download" in line: 94 | return line.split()[-1] 95 | return "" 96 | 97 | # def upload_blob(bucket_name, source_file_name, destination_blob_name): 98 | # # Uploads a file to a bucket 99 | # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 100 | # 101 | # storage_client = storage.Client() 102 | # bucket = storage_client.get_bucket(bucket_name) 103 | # blob = bucket.blob(destination_blob_name) 104 | # 105 | # blob.upload_from_filename(source_file_name) 106 | # 107 | # print('File {} uploaded to {}.'.format( 108 | # source_file_name, 109 | # destination_blob_name)) 110 | # 111 | # 112 | # def download_blob(bucket_name, source_blob_name, destination_file_name): 113 | # # Uploads a blob from a bucket 114 | # storage_client = storage.Client() 115 | # bucket = storage_client.get_bucket(bucket_name) 116 | # blob = bucket.blob(source_blob_name) 117 | # 118 | # blob.download_to_filename(destination_file_name) 119 | # 120 | # print('Blob {} downloaded to {}.'.format( 121 | # source_blob_name, 122 | # destination_file_name)) 123 | -------------------------------------------------------------------------------- /utils/infer_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | 5 | def decode_infer(output, stride): 6 | # logging.info(torch.tensor(output.shape[0])) 7 | # logging.info(output.shape) 8 | # # bz is batch-size 9 | # bz = tuple(torch.tensor(output.shape[0])) 10 | # gridsize = tuple(torch.tensor(output.shape[-1])) 11 | # logging.info(gridsize) 12 | sh = torch.tensor(output.shape) 13 | bz = sh[0] 14 | gridsize = sh[-1] 15 | 16 | output = output.permute(0, 2, 3, 1) 17 | output = output.view(bz, gridsize, gridsize, self.gt_per_grid, 5+self.numclass) 18 | x1y1, x2y2, conf, prob = torch.split( 19 | output, [2, 2, 1, self.numclass], dim=4) 20 | 21 | shiftx = torch.arange(0, gridsize, dtype=torch.float32) 22 | shifty = torch.arange(0, gridsize, dtype=torch.float32) 23 | shifty, shiftx = torch.meshgrid([shiftx, shifty]) 24 | shiftx = shiftx.unsqueeze(-1).repeat(bz, 1, 1, self.gt_per_grid) 25 | shifty = shifty.unsqueeze(-1).repeat(bz, 1, 1, self.gt_per_grid) 26 | 27 | xy_grid = torch.stack([shiftx, shifty], dim=4).cuda() 28 | x1y1 = (xy_grid+0.5-torch.exp(x1y1))*stride 29 | x2y2 = (xy_grid+0.5+torch.exp(x2y2))*stride 30 | 31 | xyxy = torch.cat((x1y1, x2y2), dim=4) 32 | conf = torch.sigmoid(conf) 33 | prob = torch.sigmoid(prob) 34 | output = torch.cat((xyxy, conf, prob), 4) 35 | output = output.view(bz, -1, 5+self.numclass) 36 | return output -------------------------------------------------------------------------------- /utils/metrics.py: -------------------------------------------------------------------------------- 1 | # Model validation metrics 2 | 3 | from pathlib import Path 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import torch 8 | 9 | from . import general 10 | 11 | 12 | def fitness(x): 13 | # Model fitness as a weighted combination of metrics 14 | w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] 15 | return (x[:, :4] * w).sum(1) 16 | 17 | 18 | def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=[]): 19 | """ Compute the average precision, given the recall and precision curves. 20 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. 21 | # Arguments 22 | tp: True positives (nparray, nx1 or nx10). 23 | conf: Objectness value from 0-1 (nparray). 24 | pred_cls: Predicted object classes (nparray). 25 | target_cls: True object classes (nparray). 26 | plot: Plot precision-recall curve at mAP@0.5 27 | save_dir: Plot save directory 28 | # Returns 29 | The average precision as computed in py-faster-rcnn. 30 | """ 31 | 32 | # Sort by objectness 33 | i = np.argsort(-conf) 34 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] 35 | 36 | # Find unique classes 37 | unique_classes = np.unique(target_cls) 38 | 39 | # Create Precision-Recall curve and compute AP for each class 40 | px, py = np.linspace(0, 1, 1000), [] # for plotting 41 | pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898 42 | s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95) 43 | ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s) 44 | for ci, c in enumerate(unique_classes): 45 | i = pred_cls == c 46 | n_l = (target_cls == c).sum() # number of labels 47 | n_p = i.sum() # number of predictions 48 | 49 | if n_p == 0 or n_l == 0: 50 | continue 51 | else: 52 | # Accumulate FPs and TPs 53 | fpc = (1 - tp[i]).cumsum(0) 54 | tpc = tp[i].cumsum(0) 55 | 56 | # Recall 57 | recall = tpc / (n_l + 1e-16) # recall curve 58 | r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0]) # r at pr_score, negative x, xp because xp decreases 59 | 60 | # Precision 61 | precision = tpc / (tpc + fpc) # precision curve 62 | p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) # p at pr_score 63 | 64 | # AP from recall-precision curve 65 | for j in range(tp.shape[1]): 66 | ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) 67 | if plot and (j == 0): 68 | py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 69 | 70 | # Compute F1 score (harmonic mean of precision and recall) 71 | f1 = 2 * p * r / (p + r + 1e-16) 72 | 73 | if plot: 74 | plot_pr_curve(px, py, ap, save_dir, names) 75 | 76 | return p, r, ap, f1, unique_classes.astype('int32') 77 | 78 | 79 | def compute_ap(recall, precision): 80 | """ Compute the average precision, given the recall and precision curves 81 | # Arguments 82 | recall: The recall curve (list) 83 | precision: The precision curve (list) 84 | # Returns 85 | Average precision, precision curve, recall curve 86 | """ 87 | 88 | # Append sentinel values to beginning and end 89 | mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01])) 90 | mpre = np.concatenate(([1.], precision, [0.])) 91 | 92 | # Compute the precision envelope 93 | mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) 94 | 95 | # Integrate area under curve 96 | method = 'interp' # methods: 'continuous', 'interp' 97 | if method == 'interp': 98 | x = np.linspace(0, 1, 101) # 101-point interp (COCO) 99 | ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate 100 | else: # 'continuous' 101 | i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes 102 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve 103 | 104 | return ap, mpre, mrec 105 | 106 | 107 | class ConfusionMatrix: 108 | # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix 109 | def __init__(self, nc, conf=0.25, iou_thres=0.45): 110 | self.matrix = np.zeros((nc + 1, nc + 1)) 111 | self.nc = nc # number of classes 112 | self.conf = conf 113 | self.iou_thres = iou_thres 114 | 115 | def process_batch(self, detections, labels): 116 | """ 117 | Return intersection-over-union (Jaccard index) of boxes. 118 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 119 | Arguments: 120 | detections (Array[N, 6]), x1, y1, x2, y2, conf, class 121 | labels (Array[M, 5]), class, x1, y1, x2, y2 122 | Returns: 123 | None, updates confusion matrix accordingly 124 | """ 125 | detections = detections[detections[:, 4] > self.conf] 126 | gt_classes = labels[:, 0].int() 127 | detection_classes = detections[:, 5].int() 128 | iou = general.box_iou(labels[:, 1:], detections[:, :4]) 129 | 130 | x = torch.where(iou > self.iou_thres) 131 | if x[0].shape[0]: 132 | matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() 133 | if x[0].shape[0] > 1: 134 | matches = matches[matches[:, 2].argsort()[::-1]] 135 | matches = matches[np.unique(matches[:, 1], return_index=True)[1]] 136 | matches = matches[matches[:, 2].argsort()[::-1]] 137 | matches = matches[np.unique(matches[:, 0], return_index=True)[1]] 138 | else: 139 | matches = np.zeros((0, 3)) 140 | 141 | n = matches.shape[0] > 0 142 | m0, m1, _ = matches.transpose().astype(np.int16) 143 | for i, gc in enumerate(gt_classes): 144 | j = m0 == i 145 | if n and sum(j) == 1: 146 | self.matrix[gc, detection_classes[m1[j]]] += 1 # correct 147 | else: 148 | self.matrix[gc, self.nc] += 1 # background FP 149 | 150 | if n: 151 | for i, dc in enumerate(detection_classes): 152 | if not any(m1 == i): 153 | self.matrix[self.nc, dc] += 1 # background FN 154 | 155 | def matrix(self): 156 | return self.matrix 157 | 158 | def plot(self, save_dir='', names=()): 159 | try: 160 | import seaborn as sn 161 | 162 | array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6) # normalize 163 | array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) 164 | 165 | fig = plt.figure(figsize=(12, 9), tight_layout=True) 166 | sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size 167 | labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels 168 | sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True, 169 | xticklabels=names + ['background FN'] if labels else "auto", 170 | yticklabels=names + ['background FP'] if labels else "auto").set_facecolor((1, 1, 1)) 171 | fig.axes[0].set_xlabel('True') 172 | fig.axes[0].set_ylabel('Predicted') 173 | fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250) 174 | except Exception as e: 175 | pass 176 | 177 | def print(self): 178 | for i in range(self.nc + 1): 179 | print(' '.join(map(str, self.matrix[i]))) 180 | 181 | 182 | # Plots ---------------------------------------------------------------------------------------------------------------- 183 | 184 | def plot_pr_curve(px, py, ap, save_dir='.', names=()): 185 | fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) 186 | py = np.stack(py, axis=1) 187 | 188 | if 0 < len(names) < 21: # show mAP in legend if < 10 classes 189 | for i, y in enumerate(py.T): 190 | ax.plot(px, y, linewidth=1, label=f'{names[i]} %.3f' % ap[i, 0]) # plot(recall, precision) 191 | else: 192 | ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision) 193 | 194 | ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) 195 | ax.set_xlabel('Recall') 196 | ax.set_ylabel('Precision') 197 | ax.set_xlim(0, 1) 198 | ax.set_ylim(0, 1) 199 | plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") 200 | fig.savefig(Path(save_dir) / 'precision_recall_curve.png', dpi=250) 201 | -------------------------------------------------------------------------------- /utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | # PyTorch utils 2 | 3 | import logging 4 | import math 5 | import os 6 | import subprocess 7 | import time 8 | from contextlib import contextmanager 9 | from copy import deepcopy 10 | from pathlib import Path 11 | 12 | import torch 13 | import torch.backends.cudnn as cudnn 14 | import torch.nn as nn 15 | import torch.nn.functional as F 16 | import torchvision 17 | 18 | try: 19 | import thop # for FLOPS computation 20 | except ImportError: 21 | thop = None 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | @contextmanager 26 | def torch_distributed_zero_first(local_rank: int): 27 | """ 28 | Decorator to make all processes in distributed training wait for each local_master to do something. 29 | """ 30 | if local_rank not in [-1, 0]: 31 | torch.distributed.barrier() 32 | yield 33 | if local_rank == 0: 34 | torch.distributed.barrier() 35 | 36 | 37 | def init_torch_seeds(seed=0): 38 | # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html 39 | torch.manual_seed(seed) 40 | if seed == 0: # slower, more reproducible 41 | cudnn.benchmark, cudnn.deterministic = False, True 42 | else: # faster, less reproducible 43 | cudnn.benchmark, cudnn.deterministic = True, False 44 | 45 | 46 | def git_describe(): 47 | # return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe 48 | if Path('.git').exists(): 49 | return subprocess.check_output('git describe --tags --long --always', shell=True).decode('utf-8')[:-1] 50 | else: 51 | return '' 52 | 53 | 54 | def select_device(device='', batch_size=None): 55 | # device = 'cpu' or '0' or '0,1,2,3' 56 | s = f'YOLOv5 {git_describe()} torch {torch.__version__} ' # string 57 | cpu = device.lower() == 'cpu' 58 | if cpu: 59 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False 60 | elif device: # non-cpu device requested 61 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 62 | assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availability 63 | 64 | cuda = not cpu and torch.cuda.is_available() 65 | if cuda: 66 | n = torch.cuda.device_count() 67 | if n > 1 and batch_size: # check that batch_size is compatible with device_count 68 | assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}' 69 | space = ' ' * len(s) 70 | for i, d in enumerate(device.split(',') if device else range(n)): 71 | p = torch.cuda.get_device_properties(i) 72 | s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n" # bytes to MB 73 | else: 74 | s += 'CPU\n' 75 | 76 | logger.info(s) # skip a line 77 | return torch.device('cuda:0' if cuda else 'cpu') 78 | 79 | 80 | def time_synchronized(): 81 | # pytorch-accurate time 82 | if torch.cuda.is_available(): 83 | torch.cuda.synchronize() 84 | return time.time() 85 | 86 | 87 | def profile(x, ops, n=100, device=None): 88 | # profile a pytorch module or list of modules. Example usage: 89 | # x = torch.randn(16, 3, 640, 640) # input 90 | # m1 = lambda x: x * torch.sigmoid(x) 91 | # m2 = nn.SiLU() 92 | # profile(x, [m1, m2], n=100) # profile speed over 100 iterations 93 | 94 | device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 95 | x = x.to(device) 96 | x.requires_grad = True 97 | print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '') 98 | print(f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}") 99 | for m in ops if isinstance(ops, list) else [ops]: 100 | m = m.to(device) if hasattr(m, 'to') else m # device 101 | m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m # type 102 | dtf, dtb, t = 0., 0., [0., 0., 0.] # dt forward, backward 103 | try: 104 | flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPS 105 | except: 106 | flops = 0 107 | 108 | for _ in range(n): 109 | t[0] = time_synchronized() 110 | y = m(x) 111 | t[1] = time_synchronized() 112 | try: 113 | _ = y.sum().backward() 114 | t[2] = time_synchronized() 115 | except: # no backward method 116 | t[2] = float('nan') 117 | dtf += (t[1] - t[0]) * 1000 / n # ms per op forward 118 | dtb += (t[2] - t[1]) * 1000 / n # ms per op backward 119 | 120 | s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' 121 | s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list' 122 | p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0 # parameters 123 | print(f'{p:12.4g}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}') 124 | 125 | 126 | def is_parallel(model): 127 | return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) 128 | 129 | 130 | def intersect_dicts(da, db, exclude=()): 131 | # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values 132 | return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} 133 | 134 | 135 | def initialize_weights(model): 136 | for m in model.modules(): 137 | t = type(m) 138 | if t is nn.Conv2d: 139 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 140 | elif t is nn.BatchNorm2d: 141 | m.eps = 1e-3 142 | m.momentum = 0.03 143 | elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 144 | m.inplace = True 145 | 146 | 147 | def find_modules(model, mclass=nn.Conv2d): 148 | # Finds layer indices matching module class 'mclass' 149 | return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] 150 | 151 | 152 | def sparsity(model): 153 | # Return global model sparsity 154 | a, b = 0., 0. 155 | for p in model.parameters(): 156 | a += p.numel() 157 | b += (p == 0).sum() 158 | return b / a 159 | 160 | 161 | def prune(model, amount=0.3): 162 | # Prune model to requested global sparsity 163 | import torch.nn.utils.prune as prune 164 | print('Pruning model... ', end='') 165 | for name, m in model.named_modules(): 166 | if isinstance(m, nn.Conv2d): 167 | prune.l1_unstructured(m, name='weight', amount=amount) # prune 168 | prune.remove(m, 'weight') # make permanent 169 | print(' %.3g global sparsity' % sparsity(model)) 170 | 171 | 172 | def fuse_conv_and_bn(conv, bn): 173 | # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 174 | fusedconv = nn.Conv2d(conv.in_channels, 175 | conv.out_channels, 176 | kernel_size=conv.kernel_size, 177 | stride=conv.stride, 178 | padding=conv.padding, 179 | groups=conv.groups, 180 | bias=True).requires_grad_(False).to(conv.weight.device) 181 | 182 | # prepare filters 183 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 184 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 185 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 186 | 187 | # prepare spatial bias 188 | b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias 189 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 190 | fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) 191 | 192 | return fusedconv 193 | 194 | 195 | def model_info(model, verbose=False, img_size=640): 196 | # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320] 197 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 198 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 199 | if verbose: 200 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 201 | for i, (name, p) in enumerate(model.named_parameters()): 202 | name = name.replace('module_list.', '') 203 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 204 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 205 | 206 | try: # FLOPS 207 | from thop import profile 208 | stride = int(model.stride.max()) if hasattr(model, 'stride') else 32 209 | img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input 210 | flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPS 211 | img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float 212 | fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride) # 640x640 GFLOPS 213 | except (ImportError, Exception): 214 | fs = '' 215 | 216 | logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") 217 | 218 | 219 | def load_classifier(name='resnet101', n=2): 220 | # Loads a pretrained model reshaped to n-class output 221 | model = torchvision.models.__dict__[name](pretrained=True) 222 | 223 | # ResNet model properties 224 | # input_size = [3, 224, 224] 225 | # input_space = 'RGB' 226 | # input_range = [0, 1] 227 | # mean = [0.485, 0.456, 0.406] 228 | # std = [0.229, 0.224, 0.225] 229 | 230 | # Reshape output to n classes 231 | filters = model.fc.weight.shape[1] 232 | model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) 233 | model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) 234 | model.fc.out_features = n 235 | return model 236 | 237 | 238 | def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) 239 | # scales img(bs,3,y,x) by ratio constrained to gs-multiple 240 | if ratio == 1.0: 241 | return img 242 | else: 243 | h, w = img.shape[2:] 244 | s = (int(h * ratio), int(w * ratio)) # new size 245 | img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize 246 | if not same_shape: # pad/crop img 247 | h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] 248 | return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean 249 | 250 | 251 | def copy_attr(a, b, include=(), exclude=()): 252 | # Copy attributes from b to a, options to only include [...] and to exclude [...] 253 | for k, v in b.__dict__.items(): 254 | if (len(include) and k not in include) or k.startswith('_') or k in exclude: 255 | continue 256 | else: 257 | setattr(a, k, v) 258 | 259 | 260 | class ModelEMA: 261 | """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models 262 | Keep a moving average of everything in the model state_dict (parameters and buffers). 263 | This is intended to allow functionality like 264 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage 265 | A smoothed version of the weights is necessary for some training schemes to perform well. 266 | This class is sensitive where it is initialized in the sequence of model init, 267 | GPU assignment and distributed training wrappers. 268 | """ 269 | 270 | def __init__(self, model, decay=0.9999, updates=0): 271 | # Create EMA 272 | self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA 273 | # if next(model.parameters()).device.type != 'cpu': 274 | # self.ema.half() # FP16 EMA 275 | self.updates = updates # number of EMA updates 276 | self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) 277 | for p in self.ema.parameters(): 278 | p.requires_grad_(False) 279 | 280 | def update(self, model): 281 | # Update EMA parameters 282 | with torch.no_grad(): 283 | self.updates += 1 284 | d = self.decay(self.updates) 285 | 286 | msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict 287 | for k, v in self.ema.state_dict().items(): 288 | if v.dtype.is_floating_point: 289 | v *= d 290 | v += (1. - d) * msd[k].detach() 291 | 292 | def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): 293 | # Update EMA attributes 294 | copy_attr(self.ema, model, include, exclude) 295 | -------------------------------------------------------------------------------- /utils/wandb_logging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pallashadow/yolov5face_torchserve_tensorrt/41ee13d40c8b54317b4f7649d8f3eabb1f560633/utils/wandb_logging/__init__.py -------------------------------------------------------------------------------- /utils/wandb_logging/log_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import yaml 4 | 5 | from wandb_utils import WandbLogger 6 | 7 | WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' 8 | 9 | 10 | def create_dataset_artifact(opt): 11 | with open(opt.data) as f: 12 | data = yaml.load(f, Loader=yaml.SafeLoader) # data dict 13 | logger = WandbLogger(opt, '', None, data, job_type='Dataset Creation') 14 | 15 | 16 | if __name__ == '__main__': 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path') 19 | parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') 20 | parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project') 21 | opt = parser.parse_args() 22 | opt.resume = False # Explicitly disallow resume check for dataset upload job 23 | 24 | create_dataset_artifact(opt) 25 | -------------------------------------------------------------------------------- /weights/download_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Download latest models from https://github.com/ultralytics/yolov5/releases 3 | # Usage: 4 | # $ bash weights/download_weights.sh 5 | 6 | python3 - <