├── .gitignore ├── LICENSE ├── README.md ├── deepinversion_yolo.py ├── docker_environment ├── Dockerfile └── lpr_env.yml ├── images ├── coreidea.png ├── resnet50.png └── yolov3.jpg ├── knowledge_distillation └── yolov3-master │ ├── Dockerfile │ ├── LICENSE │ ├── README.md │ ├── cfg │ ├── cd53s-yolov3.cfg │ ├── cd53s.cfg │ ├── csresnext50-panet-spp.cfg │ ├── yolov3-1cls.cfg │ ├── yolov3-asff.cfg │ ├── yolov3-spp-1cls.cfg │ ├── yolov3-spp-3cls.cfg │ ├── yolov3-spp-matrix.cfg │ ├── yolov3-spp-pan-scale.cfg │ ├── yolov3-spp.cfg │ ├── yolov3-spp3.cfg │ ├── yolov3-tiny-1cls.cfg │ ├── yolov3-tiny-3cls.cfg │ ├── yolov3-tiny.cfg │ ├── yolov3-tiny3-1cls.cfg │ ├── yolov3-tiny3.cfg │ ├── yolov3.cfg │ ├── yolov4-relu.cfg │ ├── yolov4-tiny.cfg │ └── yolov4.cfg │ ├── data │ ├── NGC_bdd100k.data │ ├── NGC_coco2014.data │ ├── NGC_fakecoco.data │ ├── NGC_gta5.data │ ├── NGC_hallucinate.data │ ├── NGC_imagenet.data │ ├── NGC_onebox.data │ ├── NGC_realsynth.data │ ├── NGC_tiles.data │ ├── NGC_voc.data │ ├── coco.names │ ├── coco1.data │ ├── coco1.txt │ ├── coco16.data │ ├── coco16.txt │ ├── coco1cls.data │ ├── coco1cls.txt │ ├── coco2014.data │ ├── coco2017.data │ ├── coco64.data │ ├── coco64.txt │ ├── coco_paper.names │ ├── get_coco2014.sh │ ├── get_coco2017.sh │ └── samples │ │ ├── bus.jpg │ │ └── zidane.jpg │ ├── detect.py │ ├── distill.py │ ├── models.py │ ├── requirements.txt │ ├── run.sh │ ├── test.py │ ├── train.py │ ├── tutorial.ipynb │ ├── utils │ ├── __init__.py │ ├── adabound.py │ ├── cleanup.sh │ ├── datasets.py │ ├── distill_utils.py │ ├── evolve.sh │ ├── gcp.sh │ ├── google_utils.py │ ├── layers.py │ ├── parse_config.py │ ├── torch_utils.py │ └── utils.py │ └── weights │ └── download_yolov3_weights.sh ├── main_yolo.py ├── models └── yolo │ ├── 5k.txt │ ├── 5k_fullpath.txt │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ ├── cfg │ ├── csresnext50-panet-spp.cfg │ ├── yolov3-1cls.cfg │ ├── yolov3-spp-1cls.cfg │ ├── yolov3-spp-3cls.cfg │ ├── yolov3-spp-matrix.cfg │ ├── yolov3-spp-pan-scale.cfg │ ├── yolov3-spp.cfg │ ├── yolov3-spp3.cfg │ ├── yolov3-tiny-1cls.cfg │ ├── yolov3-tiny-3cls.cfg │ ├── yolov3-tiny.cfg │ ├── yolov3-tiny3-1cls.cfg │ ├── yolov3-tiny3.cfg │ ├── yolov3.cfg │ ├── yolov3s.cfg │ ├── yolov4-tiny-1cls.cfg │ └── yolov4-tiny.cfg │ ├── datasets.py │ ├── models.py │ ├── parse_config.py │ ├── torch_utils.py │ ├── utils.py │ └── yolostuff.py ├── scripts ├── LINE_looped_runner_yolo.sh ├── auto_gpu.sh ├── draw_all_boxes_dataset.py ├── runner_yolo_multiscale.sh └── singlebox_dataset.py └── utils_di.py /.gitignore: -------------------------------------------------------------------------------- 1 | generation/ 2 | temp/ 3 | __pycache__ 4 | .idea/ 5 | *.tar.gz 6 | *.zip 7 | *.pkl 8 | *.pyc 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | NVIDIA Source Code License for DIODE 2 | 3 | 1. Definitions 4 | 5 | “Licensor” means any person or entity that distributes its Work. 6 | 7 | “Software” means the original work of authorship made available under this License. 8 | 9 | “Work” means the Software and any additions to or derivative works of the Software that 10 | are made available under this License. 11 | 12 | The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the 13 | meaning as provided under U.S. copyright law; provided, however, that for the purposes 14 | of this License, derivative works shall not include works that remain separable from, 15 | or merely link (or bind by name) to the interfaces of, the Work. 16 | 17 | Works, including the Software, are “made available” under this License by including in 18 | or with the Work either (a) a copyright notice referencing the applicability of this 19 | License to the Work, or (b) a copy of this License. 20 | 21 | 2. License Grant 22 | 23 | 2.1 Copyright Grant. Subject to the terms and conditions of this License, each Licensor 24 | grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to 25 | reproduce, prepare derivative works of, publicly display, publicly perform, sublicense 26 | and distribute its Work and any resulting derivative works in any form. 27 | 28 | 3. Limitations 29 | 30 | 3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under 31 | this License, (b) you include a complete copy of this License with your distribution, 32 | and (c) you retain without modification any copyright, patent, trademark, or attribution 33 | notices that are present in the Work. 34 | 35 | 3.2 Derivative Works. You may specify that additional or different terms apply to the 36 | use, reproduction, and distribution of your derivative works of the Work (“Your Terms”) 37 | only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your 38 | derivative works, and (b) you identify the specific derivative works that are subject 39 | to Your Terms. Notwithstanding Your Terms, this License (including the redistribution 40 | requirements in Section 3.1) will continue to apply to the Work itself. 41 | 42 | 3.3 Use Limitation. The Work and any derivative works thereof only may be used or 43 | intended for use non-commercially. Notwithstanding the foregoing, NVIDIA and its 44 | affiliates may use the Work and any derivative works commercially. As used herein, 45 | “non-commercially” means for research or evaluation purposes only. 46 | 47 | 3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor 48 | (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents 49 | that you allege are infringed by any Work, then your rights under this License from such 50 | Licensor (including the grant in Section 2.1) will terminate immediately. 51 | 52 | 3.5 Trademarks. This License does not grant any rights to use any Licensor’s or its 53 | affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices 54 | described in this License. 55 | 56 | 3.6 Termination. If you violate any term of this License, then your rights under this 57 | License (including the grant in Section 2.1) will terminate immediately. 58 | 59 | 4. Disclaimer of Warranty. 60 | 61 | THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 62 | EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF MERCHANTABILITY, FITNESS FOR 63 | A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY 64 | ACTIVITIES UNDER THIS LICENSE. 65 | 66 | 5. Limitation of Liability. 67 | 68 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER 69 | IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO 70 | YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL 71 | DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK 72 | (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR 73 | DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES OR LOSSES), EVEN 74 | IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 75 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | 3 | This repository is the official PyTorch implementation of Data-free Knowledge Distillation for Object Detection, WACV 2021. 4 | 5 | **Data-free Knowledge Distillation for Object Detection**
6 | Akshay Chawla, Hongxu Yin, Pavlo Molchanov and Jose Alvarez
7 | NVIDIA 8 | 9 | **Abstract:** We present DeepInversion for Object Detection (DIODE) to enable data-free knowledge distillation for neural networks trained on the object detection task. From a data-free perspective, DIODE synthesizes images given only an off-the-shelf pre-trained detection network and without any prior domain knowledge, generator network, or pre-computed activations. DIODE relies on two key components—first, an extensive set of differentiable augmentations to improve image fidelity and distillation effectiveness. Second, a novel automated bounding box and category sampling scheme for image synthesis enabling generating a large number of images with a diverse set of spatial and category objects. The resulting images enable data-free knowledge distillation from a teacher to a student detector, initialized from scratch. In an extensive set of experiments, we demonstrate that DIODE’s ability to match the original training distribution consistently enables more effective knowledge distillation than out-of-distribution proxy datasets, which unavoidably occur in a data-free setup given the absence of the original domain knowledge. 10 | 11 | 12 | [[PDF - OpenAccess CVF](https://openaccess.thecvf.com/content/WACV2021/papers/Chawla_Data-Free_Knowledge_Distillation_for_Object_Detection_WACV_2021_paper.pdf)] 13 | 14 | ![Core idea](images/coreidea.png "Core idea graphic") 15 | 16 | ## LICENSE 17 | 18 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 19 | 20 | This work is made available under the Nvidia Source Code License (1-Way Commercial). 21 | To view a copy of this license, visit https://github.com/NVlabs/DIODE/blob/master/LICENSE 22 | 23 | ## Setup environment 24 | 25 | Install conda [[link](https://docs.conda.io/en/latest/)] python package manager then install the `lpr` environment and other packages as follows: 26 | ``` 27 | $ conda env create -f ./docker_environment/lpr_env.yml 28 | $ conda activate lpr 29 | $ conda install -y -c conda-forge opencv 30 | $ conda install -y tqdm 31 | $ git clone https://github.com/NVIDIA/apex 32 | $ cd apex 33 | $ pip install -v --no-cache-dir ./ 34 | ``` 35 | 36 | Note: You may also generate a docker image based on provided Dockerfile `docker_environments/Dockerfile`. 37 | 38 | ## How to run? 39 | 40 | This repository allows for generating location and category conditioned images from an off-the-shelf Yolo-V3 object detection model. 41 | 42 | 1. Download the directory *DIODE_data* from google cloud storage: [gcs-link](https://console.cloud.google.com/storage/browser/diode-yolo-wacv) (234 GB) 43 | 2. Copy pre-trained yolo-v3 checkpoint and pickle files as follows: 44 | ``` 45 | $ cp /path/to/DIODE_data/pretrained/names.pkl /pathto/lpr_deep_inversion/models/yolo/ 46 | $ cp /path/to/DIODE_data/pretrained/colors.pkl /pathto/lpr_deep_inversion/models/yolo/ 47 | $ cp /path/to/DIODE_data/pretrained/yolov3-tiny.pt /pathto/lpr_deep_inversion/models/yolo/ 48 | $ cp /path/to/DIODE_data/pretrained/yolov3-spp-ultralytics.pt /pathto/lpr_deep_inversion/models/yolo/ 49 | ``` 50 | 2. Extract the one-box dataset (single object per image) as follows: 51 | ``` 52 | $ cd /path/to/DIODE_data 53 | $ tar xzf onebox/onebox.tgz -C /tmp 54 | ``` 55 | 3. Confirm the folder `/tmp/onebox` containing the onebox dataset is present and has following directories and text file `manifest.txt`: 56 | ``` 57 | $ cd /tmp/onebox 58 | $ ls 59 | images labels manifest.txt 60 | ``` 61 | 4. Generate images from yolo-v3: 62 | ``` 63 | $ cd /path/to/lpr_deep_inversion 64 | $ chmod +x scripts/runner_yolo_multiscale.sh 65 | $ scripts/runner_yolo_multiscale.sh 66 | ``` 67 | 68 | 69 | ![Images](images/yolov3.jpg "DIODE on Yolo-V3") 70 | 71 | ## Notes: 72 | 73 | 1. For ngc, use the provided bash script `scripts/diode_ngc_interactivejob.sh` to start an interactive ngc job with environment setup, code and data setup. 74 | 2. To generate large dataset use bash script `scripts/LINE_looped_runner_yolo.sh`. 75 | 3. Check `knowledge_distillation` subfolder for code for knowledge distillation using generated datasets. 76 | 77 | ## Citation 78 | 79 | ``` 80 | @inproceedings{chawla2021diode, 81 | title = {Data-free Knowledge Distillation for Object Detection}, 82 | author = {Chawla, Akshay and Yin, Hongxu and Molchanov, Pavlo and Alvarez, Jose M.}, 83 | booktitle = {The IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)}, 84 | month = January, 85 | year = {2021} 86 | } 87 | ``` 88 | 89 | -------------------------------------------------------------------------------- /docker_environment/Dockerfile: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | # 9 | # Official PyTorch implementation of WACV2021 paper: 10 | # Data-Free Knowledge Distillation for Object Detection 11 | # A Chawla, H Yin, P Molchanov, J Alvarez 12 | # -------------------------------------------------------- 13 | 14 | 15 | FROM nvidia/cuda:10.1-base-ubuntu16.04 16 | 17 | # Install some basic utilities 18 | RUN apt-get update && apt-get install -y \ 19 | curl \ 20 | ca-certificates \ 21 | sudo \ 22 | git \ 23 | bzip2 \ 24 | libx11-6 25 | 26 | RUN sudo apt update 27 | 28 | # Install more basic utilities 29 | RUN sudo apt install -y tmux vim 30 | 31 | # Create a working directory 32 | RUN mkdir /app 33 | WORKDIR /app 34 | 35 | # Create a non-root user and switch to it 36 | RUN adduser --disabled-password --gecos '' --shell /bin/bash user \ 37 | && chown -R user:user /app 38 | RUN echo "user ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-user 39 | USER user 40 | 41 | # All users can use /home/user as their home directory 42 | ENV HOME=/home/user 43 | RUN chmod 777 /home/user 44 | 45 | # Install Miniconda 46 | RUN curl -so ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh \ 47 | && chmod +x ~/miniconda.sh \ 48 | && ~/miniconda.sh -b -p ~/miniconda \ 49 | && rm ~/miniconda.sh 50 | ENV PATH=/home/user/miniconda/bin:$PATH 51 | ENV CONDA_AUTO_UPDATE_CONDA=false 52 | 53 | # Create a Python 3.6 environment 54 | # RUN /home/user/miniconda/bin/conda create -y --name py36 python=3.6.9 \ 55 | # && /home/user/miniconda/bin/conda clean -ya 56 | # ENV CONDA_DEFAULT_ENV=py36 57 | # ENV CONDA_PREFIX=/home/user/miniconda/envs/$CONDA_DEFAULT_ENV 58 | # ENV PATH=$CONDA_PREFIX/bin:$PATH 59 | # RUN /home/user/miniconda/bin/conda install conda-build=3.18.9=py36_3 \ 60 | # && /home/user/miniconda/bin/conda clean -ya 61 | 62 | # Install packages for lpr environment 63 | COPY lpr_env.yml /home/user/ 64 | RUN /home/user/miniconda/bin/conda env create -f /home/user/lpr_env.yml 65 | ENV CONDA_DEFAULT_ENV=lpr 66 | ENV CONDA_PREFIX=/home/user/miniconda/envs/$CONDA_DEFAULT_ENV 67 | ENV PATH=$CONDA_PREFIX/bin:$PATH 68 | 69 | # RUN sudo rm -rf /var/lib/apt/lists/* 70 | RUN conda install -y -c conda-forge opencv 71 | RUN conda clean -ya 72 | 73 | # Install HDF5 Python bindings 74 | RUN conda install -y h5py \ 75 | && conda clean -ya 76 | # Install Requests, a Python library for making HTTP requests 77 | RUN conda install -y requests \ 78 | && conda clean -ya 79 | 80 | # Install Graphviz 81 | RUN conda install -y graphviz \ 82 | && conda clean -ya 83 | 84 | # Install tqdm 85 | RUN conda install -y tqdm && conda clean -ya 86 | 87 | # Install apex 88 | WORKDIR /home/user 89 | RUN git clone https://github.com/NVIDIA/apex 90 | WORKDIR /home/user/apex 91 | RUN pip install -v --no-cache-dir ./ 92 | WORKDIR /akshayws 93 | -------------------------------------------------------------------------------- /docker_environment/lpr_env.yml: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | # 9 | # Official PyTorch implementation of WACV2021 paper: 10 | # Data-Free Knowledge Distillation for Object Detection 11 | # A Chawla, H Yin, P Molchanov, J Alvarez 12 | # -------------------------------------------------------- 13 | 14 | 15 | name: lpr 16 | channels: 17 | - pytorch 18 | - conda-forge 19 | - defaults 20 | dependencies: 21 | - _libgcc_mutex=0.1=main 22 | - _tflow_select=2.3.0=mkl 23 | - absl-py=0.9.0=py37_0 24 | - asn1crypto=1.3.0=py37_0 25 | - astor=0.8.0=py37_0 26 | - autopep8=1.4.4=py_0 27 | - blas=1.0=mkl 28 | - blinker=1.4=py37_0 29 | - bzip2=1.0.8=h7b6447c_0 30 | - c-ares=1.15.0=h7b6447c_1001 31 | - ca-certificates=2019.11.28=hecc5488_0 32 | - cachetools=3.1.1=py_0 33 | - cairo=1.14.12=h8948797_3 34 | - certifi=2019.11.28=py37_0 35 | - cffi=1.14.0=py37h2e261b9_0 36 | - chardet=3.0.4=py37_1003 37 | - click=7.0=py37_0 38 | - cryptography=2.8=py37h1ba5d50_0 39 | - cudatoolkit=10.1.243=h6bb024c_0 40 | - cycler=0.10.0=py37_0 41 | - cython=0.29.15=py37he1b5a44_0 42 | - dbus=1.13.12=h746ee38_0 43 | - entrypoints=0.3=py37_0 44 | - expat=2.2.6=he6710b0_0 45 | - ffmpeg=4.0=hcdf2ecd_0 46 | - flake8=3.7.9=py37_0 47 | - fontconfig=2.13.0=h9420a91_0 48 | - freeglut=3.0.0=hf484d3e_5 49 | - freetype=2.9.1=h8a8886c_1 50 | - gast=0.2.2=py37_0 51 | - glib=2.63.1=h5a9c865_0 52 | - google-auth=1.11.2=py_0 53 | - google-auth-oauthlib=0.4.1=py_2 54 | - google-pasta=0.1.8=py_0 55 | - graphite2=1.3.13=h23475e2_0 56 | - grpcio=1.27.2=py37hf8bcb03_0 57 | - gst-plugins-base=1.14.0=hbbd80ab_1 58 | - gstreamer=1.14.0=hb453b48_1 59 | - h5py=2.8.0=py37h989c5e5_3 60 | - harfbuzz=1.8.8=hffaf4a1_0 61 | - hdf5=1.10.2=hba1933b_1 62 | - icu=58.2=h9c2bf20_1 63 | - idna=2.8=py37_0 64 | - intel-openmp=2020.0=166 65 | - jasper=2.0.14=h07fcdf6_1 66 | - jpeg=9b=h024ee3a_2 67 | - keras=2.3.1=0 68 | - keras-applications=1.0.8=py_0 69 | - keras-base=2.3.1=py37_0 70 | - keras-preprocessing=1.1.0=py_1 71 | - kiwisolver=1.1.0=py37he6710b0_0 72 | - ld_impl_linux-64=2.33.1=h53a641e_7 73 | - libedit=3.1.20181209=hc058e9b_0 74 | - libffi=3.2.1=hd88cf55_4 75 | - libgcc-ng=9.1.0=hdf63c60_0 76 | - libgfortran-ng=7.3.0=hdf63c60_0 77 | - libglu=9.0.0=hf484d3e_1 78 | - libopencv=3.4.2=hb342d67_1 79 | - libopus=1.3=h7b6447c_0 80 | - libpng=1.6.37=hbc83047_0 81 | - libprotobuf=3.11.4=hd408876_0 82 | - libstdcxx-ng=9.1.0=hdf63c60_0 83 | - libtiff=4.1.0=h2733197_0 84 | - libuuid=1.0.3=h1bed415_2 85 | - libvpx=1.7.0=h439df22_0 86 | - libxcb=1.13=h1bed415_1 87 | - libxml2=2.9.9=hea5a465_1 88 | - markdown=3.1.1=py37_0 89 | - matplotlib=3.1.3=py37_0 90 | - matplotlib-base=3.1.3=py37hef1b27d_0 91 | - mccabe=0.6.1=py37_1 92 | - mkl=2020.0=166 93 | - mkl-service=2.3.0=py37he904b0f_0 94 | - mkl_fft=1.0.15=py37ha843d7b_0 95 | - mkl_random=1.1.0=py37hd6b4f25_0 96 | - ncurses=6.1=he6710b0_1 97 | - ninja=1.9.0=py37hfd86e86_0 98 | - numpy=1.18.1=py37h4f9e942_0 99 | - numpy-base=1.18.1=py37hde5b4d6_1 100 | - oauthlib=3.1.0=py_0 101 | - olefile=0.46=py37_0 102 | - opencv=3.4.2=py37h6fd60c2_1 103 | - openssl=1.1.1d=h516909a_0 104 | - opt_einsum=3.1.0=py_0 105 | - pcre=8.43=he6710b0_0 106 | - pillow=7.0.0=py37hb39fc2d_0 107 | - pip=20.0.2=py37_1 108 | - pixman=0.38.0=h7b6447c_0 109 | - protobuf=3.11.4=py37he6710b0_0 110 | - py-opencv=3.4.2=py37hb342d67_1 111 | - pyasn1=0.4.8=py_0 112 | - pyasn1-modules=0.2.7=py_0 113 | - pycocotools=2.0.0=py37h516909a_1001 114 | - pycodestyle=2.5.0=py37_0 115 | - pycparser=2.19=py37_0 116 | - pyflakes=2.1.1=py37_0 117 | - pyjwt=1.7.1=py37_0 118 | - pyopenssl=19.1.0=py37_0 119 | - pyparsing=2.4.6=py_0 120 | - pyqt=5.9.2=py37h05f1152_2 121 | - pysocks=1.7.1=py37_0 122 | - python=3.7.6=h0371630_2 123 | - python-dateutil=2.8.1=py_0 124 | - pytorch=1.4.0=py3.7_cuda10.1.243_cudnn7.6.3_0 125 | - pyyaml=5.3=py37h7b6447c_0 126 | - qt=5.9.7=h5867ecd_1 127 | - readline=7.0=h7b6447c_5 128 | - requests=2.22.0=py37_1 129 | - requests-oauthlib=1.3.0=py_0 130 | - rsa=4.0=py_0 131 | - scipy=1.4.1=py37h0b6359f_0 132 | - setuptools=45.2.0=py37_0 133 | - sip=4.19.8=py37hf484d3e_0 134 | - six=1.14.0=py37_0 135 | - sqlite=3.31.1=h7b6447c_0 136 | - tensorboard=2.1.0=py3_0 137 | - tensorflow=2.1.0=mkl_py37h80a91df_0 138 | - tensorflow-base=2.1.0=mkl_py37h6d63fb7_0 139 | - tensorflow-estimator=2.1.0=pyhd54b08b_0 140 | - termcolor=1.1.0=py37_1 141 | - tk=8.6.8=hbc83047_0 142 | - torchvision=0.5.0=py37_cu101 143 | - tornado=6.0.3=py37h7b6447c_3 144 | - urllib3=1.25.8=py37_0 145 | - werkzeug=1.0.0=py_0 146 | - wheel=0.34.2=py37_0 147 | - wrapt=1.11.2=py37h7b6447c_0 148 | - xz=5.2.4=h14c3975_4 149 | - yaml=0.1.7=had09818_2 150 | - zlib=1.2.11=h7b6447c_3 151 | - zstd=1.3.7=h0b5b093_0 152 | - pip: 153 | - tensorboardx==2.0 154 | prefix: /home/achawla/anaconda3/envs/lpr 155 | 156 | -------------------------------------------------------------------------------- /images/coreidea.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DIODE/80a396d5772528d4c393a301b0a1390eb7e7e039/images/coreidea.png -------------------------------------------------------------------------------- /images/resnet50.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DIODE/80a396d5772528d4c393a301b0a1390eb7e7e039/images/resnet50.png -------------------------------------------------------------------------------- /images/yolov3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DIODE/80a396d5772528d4c393a301b0a1390eb7e7e039/images/yolov3.jpg -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/Dockerfile: -------------------------------------------------------------------------------- 1 | # Start FROM Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch 2 | FROM nvcr.io/nvidia/pytorch:20.03-py3 3 | 4 | # Install dependencies (pip or conda) 5 | RUN pip install -U gsutil 6 | # RUN pip install -U -r requirements.txt 7 | # RUN conda update -n base -c defaults conda 8 | # RUN conda install -y -c anaconda future numpy opencv matplotlib tqdm pillow 9 | # RUN conda install -y -c conda-forge scikit-image tensorboard pycocotools 10 | 11 | ## Install OpenCV with Gstreamer support 12 | #WORKDIR /usr/src 13 | #RUN pip uninstall -y opencv-python 14 | #RUN apt-get update 15 | #RUN apt-get install -y gstreamer1.0-tools gstreamer1.0-python3-dbg-plugin-loader libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev 16 | #RUN git clone https://github.com/opencv/opencv.git && cd opencv && git checkout 4.1.1 && mkdir build 17 | #RUN git clone https://github.com/opencv/opencv_contrib.git && cd opencv_contrib && git checkout 4.1.1 18 | #RUN cd opencv/build && cmake ../ \ 19 | # -D OPENCV_EXTRA_MODULES_PATH=../../opencv_contrib/modules \ 20 | # -D BUILD_OPENCV_PYTHON3=ON \ 21 | # -D PYTHON3_EXECUTABLE=/opt/conda/bin/python \ 22 | # -D PYTHON3_INCLUDE_PATH=/opt/conda/include/python3.6m \ 23 | # -D PYTHON3_LIBRARIES=/opt/conda/lib/python3.6/site-packages \ 24 | # -D WITH_GSTREAMER=ON \ 25 | # -D WITH_FFMPEG=OFF \ 26 | # && make && make install && ldconfig 27 | #RUN cd /usr/local/lib/python3.6/site-packages/cv2/python-3.6/ && mv cv2.cpython-36m-x86_64-linux-gnu.so cv2.so 28 | #RUN cd /opt/conda/lib/python3.6/site-packages/ && ln -s /usr/local/lib/python3.6/site-packages/cv2/python-3.6/cv2.so cv2.so 29 | #RUN python3 -c "import cv2; print(cv2.getBuildInformation())" 30 | 31 | # Create working directory 32 | RUN mkdir -p /usr/src/app 33 | WORKDIR /usr/src/app 34 | 35 | # Copy contents 36 | COPY . /usr/src/app 37 | 38 | # Copy weights 39 | #RUN python3 -c "from models import *; \ 40 | #attempt_download('weights/yolov3.pt'); \ 41 | #attempt_download('weights/yolov3-spp.pt')" 42 | 43 | 44 | # --------------------------------------------------- Extras Below --------------------------------------------------- 45 | 46 | # Build and Push 47 | # t=ultralytics/yolov3:v0 && sudo docker build -t $t . && sudo docker push $t 48 | 49 | # Run 50 | # t=ultralytics/yolov3:v0 && sudo docker pull $t && sudo docker run -it --gpus all --ipc=host $t bash 51 | 52 | # Pull and Run with local directory access 53 | # t=ultralytics/yolov3:v0 && sudo docker pull $t && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t bash 54 | 55 | # Kill all 56 | # sudo docker kill "$(sudo docker ps -q)" 57 | 58 | # Kill all image-based 59 | # sudo docker kill $(sudo docker ps -a -q --filter ancestor=ultralytics/yolov3:v0) 60 | 61 | # Run bash for loop 62 | # sudo docker run --gpus all --ipc=host ultralytics/yolov3:v0 while true; do python3 train.py --evolve; done 63 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/README.md: -------------------------------------------------------------------------------- 1 | ## YoLo-V3 Knowledge Distillation 2 | 3 | Original code from: https://github.com/ultralytics/yolov3 4 | 5 | 6 | This repository performs knowledge distillation between two yolo-v3 models: pre-trained teacher and student initialized from scratch using proxy datasets. 7 | 8 | ### Environment 9 | 10 | Install python 3.8 environment with following packages: 11 | 12 | ``` 13 | $ pip install -r requirements.txt 14 | ``` 15 | 16 | or use provided Dockerfile to create an image. 17 | 18 | 19 | ### How to run? 20 | 21 | 1. Get access to `DIODE_data` directory as in top level repository. 22 | 2. Extract a proxy dataset from `DIODE_data` directory to `/tmp` as follows: 23 | ``` 24 | $ tar xzf /path/to/DIODE_data/hallucinate/hallucinate_320_normed.tgz -C /tmp 25 | ``` 26 | 3. Extract coco dataset from `DIODE_data` directory to `/tmp` as follows: (for evaluation during training) 27 | ``` 28 | $ tar xzf /path/to/DIODE_data/coco/coco.tgz -C /tmp 29 | ``` 30 | 3. Copy yolo-v3 teacher weights file from `DIODE_data` to `weights` directory. 31 | ``` 32 | cp /path/to/DIODE_data/pretrained/yolov3-spp-ultralytics.pt /path/to/lpr_deep_inversion/yolov3/weights/ 33 | ``` 34 | 3. Perform knowledge distillation on proxy dataset as follows: 35 | ``` 36 | python distill.py --data NGC_hallucinate.data --weights '' --batch-size 64 --cfg yolov3-spp.cfg --device='0,1,2,3' --nw=20 --cfg-teacher yolov3-spp.cfg --weights-teacher './weights/yolov3-spp-ultralytics.pt' --alpha-yolo=0.0 --alpha-distill=1.0 --distill-method='mse' 37 | ``` 38 | 4. Evaluate: 39 | ``` 40 | python test.py --cfg yolov3-spp.cfg --weights='weights/best.pt' --img 640 --data='data/NGC_coco2014.data' --device='0' 41 | ``` 42 | 43 | Distillation and training logs are available at `DIODE_data/logs/yolov3_spp/`. e.g for onebox dataset distillation: 44 | ``` 45 | $ ls -1 /path/to/DIODE_data/logs/yolov3_spp/distill.onebox 46 | 47 | best.pt (best checkpoint) 48 | bestresults (evaluation results from best checkpoint) 49 | info.txt (distillation command, evaluation command, time taken etc) 50 | last.pt (last checkpoint) 51 | lastresults (evaluation results from last checkpoint) 52 | results.txt (eval results of every epoch) 53 | runs (tensorboard logs) 54 | test_batch0_gt.jpg 55 | test_batch0_pred.jpg 56 | train_batch0.jpg 57 | 58 | ``` 59 | 60 | Knowledge distillation can be performed with different proxy datasets. The available proxy dataset and their corresponding locations and `--data` flag for `distill.py` are: 61 | 62 | ``` 63 | # Real/Rendered proxy datasets 64 | coco /path/to/DIODE_data/coco/coco.tgz --data NGC_coco2014.data 65 | GTA5 /path/to/DIODE_data/gta5/gta5.tgz --data NGC_gta5.data 66 | bdd100k /path/to/DIODE_data/bdd100k/bdd100k.tar.gz --data NGC_bdd100k.data 67 | voc /path/to/DIODE_data/voc/voc.tgz --data NGC_voc.data 68 | imagenet /path/to/DIODE_data/imagenet/imagenet.tgz --data NGC_imagenet.data 69 | 70 | # DIODE generated proxy datasets 71 | diode-coco /path/to/DIODE_data/fakecoco/fakecocov3.tgz --data NGC_fakecoco.data 72 | diode-onebox /path/to/DIODE_data/onebox/onebox.tgz --data NGC_onebox.data 73 | diode-onebox w/ fp sampling /path/to/DIODE_data/hallucinate/hallucinate_320_normed.tgz --data NGC_hallucinate.data 74 | diode-onebox w/ tiles /path/to/DIODE_data/onebox_tiles_coco/tiles.tgz --data NGC_tiles.data 75 | ``` 76 | 77 | ### LICENSE 78 | This code is built on original Yolo-V3 code written by https://github.com/ultralytics/yolov3 and the following files are covered under its original licence https://github.com/NVlabs/DIODE/blob/master/knowledge_distillation/yolov3-master/LICENSE 79 | ``` 80 | yolov3-master/cfg/* 81 | yolov3-master/data/samples/* 82 | yolov3-master/data/coco* 83 | yolov3-master/data/get_coco* 84 | yolov3-master/utils/* 85 | yolov3-master/weights/* 86 | yolov3-master/detect.py 87 | yolov3-master/Dockerfile 88 | yolov3-master/models.py 89 | yolov3-master/requirements.txt 90 | yolov3-master/test.py 91 | yolov3-master/train.py 92 | yolov3-master/tutorial.ipynb 93 | ``` 94 | 95 | The following files have been added by this repository and are made available under the Nvidia Source Code License (1-Way Commercial). To view a copy of this license, visit https://github.com/NVlabs/DIODE/blob/master/LICENSE 96 | ``` 97 | yolov3-master/data/NGC* 98 | yolov3-master/distill.py 99 | yolov3-master/run.sh 100 | ``` 101 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/cfg/yolov3-1cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=16 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=18 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=1 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .7 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=18 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=1 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .7 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=18 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=1 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .7 787 | truth_thresh = 1 788 | random=1 789 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/cfg/yolov3-tiny-1cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=18 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=1 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=18 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=1 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/cfg/yolov3-tiny-3cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=24 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=3 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=24 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=3 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/cfg/yolov3-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=255 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=80 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=255 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 1,2,3 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=80 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/cfg/yolov3-tiny3-1cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 200000 21 | policy=steps 22 | steps=180000,190000 23 | scales=.1,.1 24 | 25 | 26 | [convolutional] 27 | batch_normalize=1 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | [maxpool] 35 | size=2 36 | stride=2 37 | 38 | [convolutional] 39 | batch_normalize=1 40 | filters=32 41 | size=3 42 | stride=1 43 | pad=1 44 | activation=leaky 45 | 46 | [maxpool] 47 | size=2 48 | stride=2 49 | 50 | [convolutional] 51 | batch_normalize=1 52 | filters=64 53 | size=3 54 | stride=1 55 | pad=1 56 | activation=leaky 57 | 58 | [maxpool] 59 | size=2 60 | stride=2 61 | 62 | [convolutional] 63 | batch_normalize=1 64 | filters=128 65 | size=3 66 | stride=1 67 | pad=1 68 | activation=leaky 69 | 70 | [maxpool] 71 | size=2 72 | stride=2 73 | 74 | [convolutional] 75 | batch_normalize=1 76 | filters=256 77 | size=3 78 | stride=1 79 | pad=1 80 | activation=leaky 81 | 82 | [maxpool] 83 | size=2 84 | stride=2 85 | 86 | [convolutional] 87 | batch_normalize=1 88 | filters=512 89 | size=3 90 | stride=1 91 | pad=1 92 | activation=leaky 93 | 94 | [maxpool] 95 | size=2 96 | stride=1 97 | 98 | [convolutional] 99 | batch_normalize=1 100 | filters=1024 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | ########### 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=256 111 | size=1 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | size=1 126 | stride=1 127 | pad=1 128 | filters=18 129 | activation=linear 130 | 131 | 132 | 133 | [yolo] 134 | mask = 6,7,8 135 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 136 | classes=1 137 | num=9 138 | jitter=.3 139 | ignore_thresh = .7 140 | truth_thresh = 1 141 | random=1 142 | 143 | [route] 144 | layers = -4 145 | 146 | [convolutional] 147 | batch_normalize=1 148 | filters=128 149 | size=1 150 | stride=1 151 | pad=1 152 | activation=leaky 153 | 154 | [upsample] 155 | stride=2 156 | 157 | [route] 158 | layers = -1, 8 159 | 160 | [convolutional] 161 | batch_normalize=1 162 | filters=256 163 | size=3 164 | stride=1 165 | pad=1 166 | activation=leaky 167 | 168 | [convolutional] 169 | size=1 170 | stride=1 171 | pad=1 172 | filters=18 173 | activation=linear 174 | 175 | [yolo] 176 | mask = 3,4,5 177 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 178 | classes=1 179 | num=9 180 | jitter=.3 181 | ignore_thresh = .7 182 | truth_thresh = 1 183 | random=1 184 | 185 | 186 | 187 | [route] 188 | layers = -3 189 | 190 | [convolutional] 191 | batch_normalize=1 192 | filters=128 193 | size=1 194 | stride=1 195 | pad=1 196 | activation=leaky 197 | 198 | [upsample] 199 | stride=2 200 | 201 | [route] 202 | layers = -1, 6 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=3 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=18 217 | activation=linear 218 | 219 | [yolo] 220 | mask = 0,1,2 221 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 222 | classes=1 223 | num=9 224 | jitter=.3 225 | ignore_thresh = .7 226 | truth_thresh = 1 227 | random=1 228 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/cfg/yolov3-tiny3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 200000 21 | policy=steps 22 | steps=180000,190000 23 | scales=.1,.1 24 | 25 | 26 | [convolutional] 27 | batch_normalize=1 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | [maxpool] 35 | size=2 36 | stride=2 37 | 38 | [convolutional] 39 | batch_normalize=1 40 | filters=32 41 | size=3 42 | stride=1 43 | pad=1 44 | activation=leaky 45 | 46 | [maxpool] 47 | size=2 48 | stride=2 49 | 50 | [convolutional] 51 | batch_normalize=1 52 | filters=64 53 | size=3 54 | stride=1 55 | pad=1 56 | activation=leaky 57 | 58 | [maxpool] 59 | size=2 60 | stride=2 61 | 62 | [convolutional] 63 | batch_normalize=1 64 | filters=128 65 | size=3 66 | stride=1 67 | pad=1 68 | activation=leaky 69 | 70 | [maxpool] 71 | size=2 72 | stride=2 73 | 74 | [convolutional] 75 | batch_normalize=1 76 | filters=256 77 | size=3 78 | stride=1 79 | pad=1 80 | activation=leaky 81 | 82 | [maxpool] 83 | size=2 84 | stride=2 85 | 86 | [convolutional] 87 | batch_normalize=1 88 | filters=512 89 | size=3 90 | stride=1 91 | pad=1 92 | activation=leaky 93 | 94 | [maxpool] 95 | size=2 96 | stride=1 97 | 98 | [convolutional] 99 | batch_normalize=1 100 | filters=1024 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | ########### 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=256 111 | size=1 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | size=1 126 | stride=1 127 | pad=1 128 | filters=255 129 | activation=linear 130 | 131 | 132 | 133 | [yolo] 134 | mask = 6,7,8 135 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 136 | classes=80 137 | num=9 138 | jitter=.3 139 | ignore_thresh = .7 140 | truth_thresh = 1 141 | random=1 142 | 143 | [route] 144 | layers = -4 145 | 146 | [convolutional] 147 | batch_normalize=1 148 | filters=128 149 | size=1 150 | stride=1 151 | pad=1 152 | activation=leaky 153 | 154 | [upsample] 155 | stride=2 156 | 157 | [route] 158 | layers = -1, 8 159 | 160 | [convolutional] 161 | batch_normalize=1 162 | filters=256 163 | size=3 164 | stride=1 165 | pad=1 166 | activation=leaky 167 | 168 | [convolutional] 169 | size=1 170 | stride=1 171 | pad=1 172 | filters=255 173 | activation=linear 174 | 175 | [yolo] 176 | mask = 3,4,5 177 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 178 | classes=80 179 | num=9 180 | jitter=.3 181 | ignore_thresh = .7 182 | truth_thresh = 1 183 | random=1 184 | 185 | 186 | 187 | [route] 188 | layers = -3 189 | 190 | [convolutional] 191 | batch_normalize=1 192 | filters=128 193 | size=1 194 | stride=1 195 | pad=1 196 | activation=leaky 197 | 198 | [upsample] 199 | stride=2 200 | 201 | [route] 202 | layers = -1, 6 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=3 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=255 217 | activation=linear 218 | 219 | [yolo] 220 | mask = 0,1,2 221 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 222 | classes=80 223 | num=9 224 | jitter=.3 225 | ignore_thresh = .7 226 | truth_thresh = 1 227 | random=1 228 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/cfg/yolov3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=16 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=255 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=80 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .7 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=255 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=80 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .7 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=255 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=80 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .7 787 | truth_thresh = 1 788 | random=1 789 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/cfg/yolov4-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.00261 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=2 30 | pad=1 31 | activation=leaky 32 | 33 | [convolutional] 34 | batch_normalize=1 35 | filters=64 36 | size=3 37 | stride=2 38 | pad=1 39 | activation=leaky 40 | 41 | [convolutional] 42 | batch_normalize=1 43 | filters=64 44 | size=3 45 | stride=1 46 | pad=1 47 | activation=leaky 48 | 49 | [route] 50 | layers=-1 51 | groups=2 52 | group_id=1 53 | 54 | [convolutional] 55 | batch_normalize=1 56 | filters=32 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | [convolutional] 63 | batch_normalize=1 64 | filters=32 65 | size=3 66 | stride=1 67 | pad=1 68 | activation=leaky 69 | 70 | [route] 71 | layers = -1,-2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [route] 82 | layers = -6,-1 83 | 84 | [maxpool] 85 | size=2 86 | stride=2 87 | 88 | [convolutional] 89 | batch_normalize=1 90 | filters=128 91 | size=3 92 | stride=1 93 | pad=1 94 | activation=leaky 95 | 96 | [route] 97 | layers=-1 98 | groups=2 99 | group_id=1 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=64 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [convolutional] 110 | batch_normalize=1 111 | filters=64 112 | size=3 113 | stride=1 114 | pad=1 115 | activation=leaky 116 | 117 | [route] 118 | layers = -1,-2 119 | 120 | [convolutional] 121 | batch_normalize=1 122 | filters=128 123 | size=1 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [route] 129 | layers = -6,-1 130 | 131 | [maxpool] 132 | size=2 133 | stride=2 134 | 135 | [convolutional] 136 | batch_normalize=1 137 | filters=256 138 | size=3 139 | stride=1 140 | pad=1 141 | activation=leaky 142 | 143 | [route] 144 | layers=-1 145 | groups=2 146 | group_id=1 147 | 148 | [convolutional] 149 | batch_normalize=1 150 | filters=128 151 | size=3 152 | stride=1 153 | pad=1 154 | activation=leaky 155 | 156 | [convolutional] 157 | batch_normalize=1 158 | filters=128 159 | size=3 160 | stride=1 161 | pad=1 162 | activation=leaky 163 | 164 | [route] 165 | layers = -1,-2 166 | 167 | [convolutional] 168 | batch_normalize=1 169 | filters=256 170 | size=1 171 | stride=1 172 | pad=1 173 | activation=leaky 174 | 175 | [route] 176 | layers = -6,-1 177 | 178 | [maxpool] 179 | size=2 180 | stride=2 181 | 182 | [convolutional] 183 | batch_normalize=1 184 | filters=512 185 | size=3 186 | stride=1 187 | pad=1 188 | activation=leaky 189 | 190 | ################################## 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | filters=256 195 | size=1 196 | stride=1 197 | pad=1 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | filters=512 203 | size=3 204 | stride=1 205 | pad=1 206 | activation=leaky 207 | 208 | [convolutional] 209 | size=1 210 | stride=1 211 | pad=1 212 | filters=255 213 | activation=linear 214 | 215 | 216 | 217 | [yolo] 218 | mask = 3,4,5 219 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 220 | classes=80 221 | num=6 222 | jitter=.3 223 | scale_x_y = 1.05 224 | cls_normalizer=1.0 225 | iou_normalizer=0.07 226 | iou_loss=ciou 227 | ignore_thresh = .7 228 | truth_thresh = 1 229 | random=0 230 | resize=1.5 231 | nms_kind=greedynms 232 | beta_nms=0.6 233 | 234 | [route] 235 | layers = -4 236 | 237 | [convolutional] 238 | batch_normalize=1 239 | filters=128 240 | size=1 241 | stride=1 242 | pad=1 243 | activation=leaky 244 | 245 | [upsample] 246 | stride=2 247 | 248 | [route] 249 | layers = -1, 23 250 | 251 | [convolutional] 252 | batch_normalize=1 253 | filters=256 254 | size=3 255 | stride=1 256 | pad=1 257 | activation=leaky 258 | 259 | [convolutional] 260 | size=1 261 | stride=1 262 | pad=1 263 | filters=255 264 | activation=linear 265 | 266 | [yolo] 267 | mask = 1,2,3 268 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 269 | classes=80 270 | num=6 271 | jitter=.3 272 | scale_x_y = 1.05 273 | cls_normalizer=1.0 274 | iou_normalizer=0.07 275 | iou_loss=ciou 276 | ignore_thresh = .7 277 | truth_thresh = 1 278 | random=0 279 | resize=1.5 280 | nms_kind=greedynms 281 | beta_nms=0.6 282 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/NGC_bdd100k.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=/tmp/bdd100k/manifest.txt 3 | valid=/tmp/coco/5k.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/NGC_coco2014.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=/tmp/coco/trainvalno5k.txt 3 | valid=/tmp/coco/5k.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/NGC_fakecoco.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=/tmp/fakecoco/manifest.txt 3 | valid=/tmp/coco/5k.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/NGC_gta5.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=/tmp/gta5/manifest.txt 3 | valid=/tmp/coco/5k.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/NGC_hallucinate.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=/tmp/hallucinate/manifest.txt 3 | valid=/tmp/coco/5k.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/NGC_imagenet.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=/tmp/imagenet/manifest.txt 3 | valid=/tmp/coco/5k.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/NGC_onebox.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=/tmp/onebox/manifest.txt 3 | valid=/tmp/coco/5k.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/NGC_realsynth.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=/tmp/realsynth/manifest.txt 3 | valid=/tmp/coco/5k.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/NGC_tiles.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=/tmp/tiles/manifest.txt 3 | valid=/tmp/coco/5k.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/NGC_voc.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=/tmp/voc/manifest.txt 3 | valid=/tmp/coco/5k.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorcycle 5 | airplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | couch 59 | potted plant 60 | bed 61 | dining table 62 | toilet 63 | tv 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/coco1.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=data/coco1.txt 3 | valid=data/coco1.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/coco1.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/train2017/000000109622.jpg 2 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/coco16.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=data/coco16.txt 3 | valid=data/coco16.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/coco16.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/train2017/000000109622.jpg 2 | ../coco/images/train2017/000000160694.jpg 3 | ../coco/images/train2017/000000308590.jpg 4 | ../coco/images/train2017/000000327573.jpg 5 | ../coco/images/train2017/000000062929.jpg 6 | ../coco/images/train2017/000000512793.jpg 7 | ../coco/images/train2017/000000371735.jpg 8 | ../coco/images/train2017/000000148118.jpg 9 | ../coco/images/train2017/000000309856.jpg 10 | ../coco/images/train2017/000000141882.jpg 11 | ../coco/images/train2017/000000318783.jpg 12 | ../coco/images/train2017/000000337760.jpg 13 | ../coco/images/train2017/000000298197.jpg 14 | ../coco/images/train2017/000000042421.jpg 15 | ../coco/images/train2017/000000328898.jpg 16 | ../coco/images/train2017/000000458856.jpg 17 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/coco1cls.data: -------------------------------------------------------------------------------- 1 | classes=1 2 | train=data/coco1cls.txt 3 | valid=data/coco1cls.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/coco1cls.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/train2017/000000000901.jpg 2 | ../coco/images/train2017/000000001464.jpg 3 | ../coco/images/train2017/000000003220.jpg 4 | ../coco/images/train2017/000000003365.jpg 5 | ../coco/images/train2017/000000004772.jpg 6 | ../coco/images/train2017/000000009987.jpg 7 | ../coco/images/train2017/000000010498.jpg 8 | ../coco/images/train2017/000000012455.jpg 9 | ../coco/images/train2017/000000013992.jpg 10 | ../coco/images/train2017/000000014125.jpg 11 | ../coco/images/train2017/000000016314.jpg 12 | ../coco/images/train2017/000000016670.jpg 13 | ../coco/images/train2017/000000018412.jpg 14 | ../coco/images/train2017/000000021212.jpg 15 | ../coco/images/train2017/000000021826.jpg 16 | ../coco/images/train2017/000000030566.jpg 17 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/coco2014.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=../coco/trainvalno5k.txt 3 | valid=../coco/5k.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/coco2017.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=../coco/train2017.txt 3 | valid=../coco/val2017.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/coco64.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=data/coco64.txt 3 | valid=data/coco64.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/coco64.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/train2017/000000109622.jpg 2 | ../coco/images/train2017/000000160694.jpg 3 | ../coco/images/train2017/000000308590.jpg 4 | ../coco/images/train2017/000000327573.jpg 5 | ../coco/images/train2017/000000062929.jpg 6 | ../coco/images/train2017/000000512793.jpg 7 | ../coco/images/train2017/000000371735.jpg 8 | ../coco/images/train2017/000000148118.jpg 9 | ../coco/images/train2017/000000309856.jpg 10 | ../coco/images/train2017/000000141882.jpg 11 | ../coco/images/train2017/000000318783.jpg 12 | ../coco/images/train2017/000000337760.jpg 13 | ../coco/images/train2017/000000298197.jpg 14 | ../coco/images/train2017/000000042421.jpg 15 | ../coco/images/train2017/000000328898.jpg 16 | ../coco/images/train2017/000000458856.jpg 17 | ../coco/images/train2017/000000073824.jpg 18 | ../coco/images/train2017/000000252846.jpg 19 | ../coco/images/train2017/000000459590.jpg 20 | ../coco/images/train2017/000000273650.jpg 21 | ../coco/images/train2017/000000331311.jpg 22 | ../coco/images/train2017/000000156326.jpg 23 | ../coco/images/train2017/000000262985.jpg 24 | ../coco/images/train2017/000000253580.jpg 25 | ../coco/images/train2017/000000447976.jpg 26 | ../coco/images/train2017/000000378077.jpg 27 | ../coco/images/train2017/000000259913.jpg 28 | ../coco/images/train2017/000000424553.jpg 29 | ../coco/images/train2017/000000000612.jpg 30 | ../coco/images/train2017/000000267625.jpg 31 | ../coco/images/train2017/000000566012.jpg 32 | ../coco/images/train2017/000000196664.jpg 33 | ../coco/images/train2017/000000363331.jpg 34 | ../coco/images/train2017/000000057992.jpg 35 | ../coco/images/train2017/000000520047.jpg 36 | ../coco/images/train2017/000000453903.jpg 37 | ../coco/images/train2017/000000162083.jpg 38 | ../coco/images/train2017/000000268516.jpg 39 | ../coco/images/train2017/000000277436.jpg 40 | ../coco/images/train2017/000000189744.jpg 41 | ../coco/images/train2017/000000041128.jpg 42 | ../coco/images/train2017/000000527728.jpg 43 | ../coco/images/train2017/000000465269.jpg 44 | ../coco/images/train2017/000000246833.jpg 45 | ../coco/images/train2017/000000076784.jpg 46 | ../coco/images/train2017/000000323715.jpg 47 | ../coco/images/train2017/000000560463.jpg 48 | ../coco/images/train2017/000000006263.jpg 49 | ../coco/images/train2017/000000094701.jpg 50 | ../coco/images/train2017/000000521359.jpg 51 | ../coco/images/train2017/000000302903.jpg 52 | ../coco/images/train2017/000000047559.jpg 53 | ../coco/images/train2017/000000480583.jpg 54 | ../coco/images/train2017/000000050025.jpg 55 | ../coco/images/train2017/000000084512.jpg 56 | ../coco/images/train2017/000000508913.jpg 57 | ../coco/images/train2017/000000093708.jpg 58 | ../coco/images/train2017/000000070493.jpg 59 | ../coco/images/train2017/000000539270.jpg 60 | ../coco/images/train2017/000000474402.jpg 61 | ../coco/images/train2017/000000209842.jpg 62 | ../coco/images/train2017/000000028820.jpg 63 | ../coco/images/train2017/000000154257.jpg 64 | ../coco/images/train2017/000000342499.jpg 65 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/coco_paper.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorcycle 5 | airplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | street sign 13 | stop sign 14 | parking meter 15 | bench 16 | bird 17 | cat 18 | dog 19 | horse 20 | sheep 21 | cow 22 | elephant 23 | bear 24 | zebra 25 | giraffe 26 | hat 27 | backpack 28 | umbrella 29 | shoe 30 | eye glasses 31 | handbag 32 | tie 33 | suitcase 34 | frisbee 35 | skis 36 | snowboard 37 | sports ball 38 | kite 39 | baseball bat 40 | baseball glove 41 | skateboard 42 | surfboard 43 | tennis racket 44 | bottle 45 | plate 46 | wine glass 47 | cup 48 | fork 49 | knife 50 | spoon 51 | bowl 52 | banana 53 | apple 54 | sandwich 55 | orange 56 | broccoli 57 | carrot 58 | hot dog 59 | pizza 60 | donut 61 | cake 62 | chair 63 | couch 64 | potted plant 65 | bed 66 | mirror 67 | dining table 68 | window 69 | desk 70 | toilet 71 | door 72 | tv 73 | laptop 74 | mouse 75 | remote 76 | keyboard 77 | cell phone 78 | microwave 79 | oven 80 | toaster 81 | sink 82 | refrigerator 83 | blender 84 | book 85 | clock 86 | vase 87 | scissors 88 | teddy bear 89 | hair drier 90 | toothbrush 91 | hair brush -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/get_coco2014.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Zip coco folder 3 | # zip -r coco.zip coco 4 | # tar -czvf coco.tar.gz coco 5 | 6 | # Download labels from Google Drive, accepting presented query 7 | filename="coco2014labels.zip" 8 | fileid="1s6-CmF5_SElM28r52P1OUrCcuXZN-SFo" 9 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null 10 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename} 11 | rm ./cookie 12 | 13 | # Unzip labels 14 | unzip -q ${filename} # for coco.zip 15 | # tar -xzf ${filename} # for coco.tar.gz 16 | rm ${filename} 17 | 18 | # Download and unzip images 19 | cd coco/images 20 | f="train2014.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f 21 | f="val2014.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f 22 | 23 | # cd out 24 | cd ../.. 25 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/get_coco2017.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Zip coco folder 3 | # zip -r coco.zip coco 4 | # tar -czvf coco.tar.gz coco 5 | 6 | # Download labels from Google Drive, accepting presented query 7 | filename="coco2017labels.zip" 8 | fileid="1cXZR_ckHki6nddOmcysCuuJFM--T-Q6L" 9 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null 10 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename} 11 | rm ./cookie 12 | 13 | # Unzip labels 14 | unzip -q ${filename} # for coco.zip 15 | # tar -xzf ${filename} # for coco.tar.gz 16 | rm ${filename} 17 | 18 | # Download and unzip images 19 | cd coco/images 20 | f="train2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f 21 | f="val2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f 22 | 23 | # cd out 24 | cd ../.. 25 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/samples/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DIODE/80a396d5772528d4c393a301b0a1390eb7e7e039/knowledge_distillation/yolov3-master/data/samples/bus.jpg -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/data/samples/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DIODE/80a396d5772528d4c393a301b0a1390eb7e7e039/knowledge_distillation/yolov3-master/data/samples/zidane.jpg -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/detect.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from models import * # set ONNX_EXPORT in models.py 4 | from utils.datasets import * 5 | from utils.utils import * 6 | 7 | 8 | def detect(save_img=False): 9 | imgsz = (320, 192) if ONNX_EXPORT else opt.img_size # (320, 192) or (416, 256) or (608, 352) for (height, width) 10 | out, source, weights, half, view_img, save_txt = opt.output, opt.source, opt.weights, opt.half, opt.view_img, opt.save_txt 11 | webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') 12 | 13 | # Initialize 14 | device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device) 15 | if os.path.exists(out): 16 | shutil.rmtree(out) # delete output folder 17 | os.makedirs(out) # make new output folder 18 | 19 | # Initialize model 20 | model = Darknet(opt.cfg, imgsz) 21 | 22 | # Load weights 23 | attempt_download(weights) 24 | if weights.endswith('.pt'): # pytorch format 25 | model.load_state_dict(torch.load(weights, map_location=device)['model']) 26 | else: # darknet format 27 | load_darknet_weights(model, weights) 28 | 29 | # Second-stage classifier 30 | classify = False 31 | if classify: 32 | modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize 33 | modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights 34 | modelc.to(device).eval() 35 | 36 | # Eval mode 37 | model.to(device).eval() 38 | 39 | # Fuse Conv2d + BatchNorm2d layers 40 | # model.fuse() 41 | 42 | # Export mode 43 | if ONNX_EXPORT: 44 | model.fuse() 45 | img = torch.zeros((1, 3) + imgsz) # (1, 3, 320, 192) 46 | f = opt.weights.replace(opt.weights.split('.')[-1], 'onnx') # *.onnx filename 47 | torch.onnx.export(model, img, f, verbose=False, opset_version=11, 48 | input_names=['images'], output_names=['classes', 'boxes']) 49 | 50 | # Validate exported model 51 | import onnx 52 | model = onnx.load(f) # Load the ONNX model 53 | onnx.checker.check_model(model) # Check that the IR is well formed 54 | print(onnx.helper.printable_graph(model.graph)) # Print a human readable representation of the graph 55 | return 56 | 57 | # Half precision 58 | half = half and device.type != 'cpu' # half precision only supported on CUDA 59 | if half: 60 | model.half() 61 | 62 | # Set Dataloader 63 | vid_path, vid_writer = None, None 64 | if webcam: 65 | view_img = True 66 | torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference 67 | dataset = LoadStreams(source, img_size=imgsz) 68 | else: 69 | save_img = True 70 | dataset = LoadImages(source, img_size=imgsz) 71 | 72 | # Get names and colors 73 | names = load_classes(opt.names) 74 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] 75 | 76 | # Run inference 77 | t0 = time.time() 78 | img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img 79 | _ = model(img.half() if half else img.float()) if device.type != 'cpu' else None # run once 80 | for path, img, im0s, vid_cap in dataset: 81 | img = torch.from_numpy(img).to(device) 82 | img = img.half() if half else img.float() # uint8 to fp16/32 83 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 84 | if img.ndimension() == 3: 85 | img = img.unsqueeze(0) 86 | 87 | # Inference 88 | t1 = torch_utils.time_synchronized() 89 | pred = model(img, augment=opt.augment)[0] 90 | t2 = torch_utils.time_synchronized() 91 | 92 | # to float 93 | if half: 94 | pred = pred.float() 95 | 96 | # Apply NMS 97 | pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, 98 | multi_label=False, classes=opt.classes, agnostic=opt.agnostic_nms) 99 | 100 | # Apply Classifier 101 | if classify: 102 | pred = apply_classifier(pred, modelc, img, im0s) 103 | 104 | # Process detections 105 | for i, det in enumerate(pred): # detections for image i 106 | if webcam: # batch_size >= 1 107 | p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() 108 | else: 109 | p, s, im0 = path, '', im0s 110 | 111 | save_path = str(Path(out) / Path(p).name) 112 | s += '%gx%g ' % img.shape[2:] # print string 113 | gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] #  normalization gain whwh 114 | if det is not None and len(det): 115 | # Rescale boxes from imgsz to im0 size 116 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() 117 | 118 | # Print results 119 | for c in det[:, -1].unique(): 120 | n = (det[:, -1] == c).sum() # detections per class 121 | s += '%g %ss, ' % (n, names[int(c)]) # add to string 122 | 123 | # Write results 124 | for *xyxy, conf, cls in reversed(det): 125 | if save_txt: # Write to file 126 | xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh 127 | with open(save_path[:save_path.rfind('.')] + '.txt', 'a') as file: 128 | file.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format 129 | 130 | if save_img or view_img: # Add bbox to image 131 | label = '%s %.2f' % (names[int(cls)], conf) 132 | plot_one_box(xyxy, im0, label=label, color=colors[int(cls)]) 133 | 134 | # Print time (inference + NMS) 135 | print('%sDone. (%.3fs)' % (s, t2 - t1)) 136 | 137 | # Stream results 138 | if view_img: 139 | cv2.imshow(p, im0) 140 | if cv2.waitKey(1) == ord('q'): # q to quit 141 | raise StopIteration 142 | 143 | # Save results (image with detections) 144 | if save_img: 145 | if dataset.mode == 'images': 146 | cv2.imwrite(save_path, im0) 147 | else: 148 | if vid_path != save_path: # new video 149 | vid_path = save_path 150 | if isinstance(vid_writer, cv2.VideoWriter): 151 | vid_writer.release() # release previous video writer 152 | 153 | fps = vid_cap.get(cv2.CAP_PROP_FPS) 154 | w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 155 | h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 156 | vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) 157 | vid_writer.write(im0) 158 | 159 | if save_txt or save_img: 160 | print('Results saved to %s' % os.getcwd() + os.sep + out) 161 | if platform == 'darwin': # MacOS 162 | os.system('open ' + save_path) 163 | 164 | print('Done. (%.3fs)' % (time.time() - t0)) 165 | 166 | 167 | if __name__ == '__main__': 168 | parser = argparse.ArgumentParser() 169 | parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='*.cfg path') 170 | parser.add_argument('--names', type=str, default='data/coco.names', help='*.names path') 171 | parser.add_argument('--weights', type=str, default='weights/yolov3-spp-ultralytics.pt', help='weights path') 172 | parser.add_argument('--source', type=str, default='data/samples', help='source') # input file/folder, 0 for webcam 173 | parser.add_argument('--output', type=str, default='output', help='output folder') # output folder 174 | parser.add_argument('--img-size', type=int, default=512, help='inference size (pixels)') 175 | parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold') 176 | parser.add_argument('--iou-thres', type=float, default=0.6, help='IOU threshold for NMS') 177 | parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)') 178 | parser.add_argument('--half', action='store_true', help='half precision FP16 inference') 179 | parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu') 180 | parser.add_argument('--view-img', action='store_true', help='display results') 181 | parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') 182 | parser.add_argument('--classes', nargs='+', type=int, help='filter by class') 183 | parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') 184 | parser.add_argument('--augment', action='store_true', help='augmented inference') 185 | opt = parser.parse_args() 186 | opt.cfg = check_file(opt.cfg) # check file 187 | opt.names = check_file(opt.names) # check file 188 | print(opt) 189 | 190 | with torch.no_grad(): 191 | detect() 192 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/requirements.txt: -------------------------------------------------------------------------------- 1 | # pip install -r requirements.txt 2 | Cython 3 | matplotlib>=3.2.2 4 | numpy>=1.18.5 5 | opencv-python>=4.1.2 6 | pillow 7 | # pycocotools>=2.0 8 | PyYAML>=5.3 9 | scipy>=1.4.1 10 | tensorboard>=2.2 11 | torch>=1.6.0 12 | torchvision>=0.7.0 13 | tqdm>=4.41.0 14 | 15 | # Conda commands (in place of pip) --------------------------------------------- 16 | # conda update -yn base -c defaults conda 17 | # conda install -yc anaconda numpy opencv matplotlib tqdm pillow ipython 18 | # conda install -yc conda-forge scikit-image pycocotools tensorboard 19 | # conda install -yc spyder-ide spyder-line-profiler 20 | # conda install -yc pytorch pytorch torchvision 21 | # conda install -yc conda-forge protobuf numpy && pip install onnx==1.6.0 # https://github.com/onnx/onnx#linux-and-macos 22 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/run.sh: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | # 9 | # Official PyTorch implementation of WACV2021 paper: 10 | # Data-Free Knowledge Distillation for Object Detection 11 | # A Chawla, H Yin, P Molchanov, J Alvarez 12 | # -------------------------------------------------------- 13 | 14 | 15 | # Train 16 | # python train.py --weights '' --cfg yolov3-spp.cfg --data 'data/NGC_coco2014.data' --batch-size 64 --device='0,1,2,3' --nw=28 17 | 18 | # Distill 19 | # python distill_baseline.py --data NGC_coco2014.data --weights './weights/temp/last.pt' --batch-size 64 --cfg yolov3-tiny.cfg --device='0' --nw=8 \ 20 | # --cfg-teacher yolov3-tiny.cfg --weights-teacher './weights/yolov3-tiny.pt' \ 21 | # --alpha-yolo=0.0 --alpha-distill=1.0 --distill-method='osd' --epochs=100 --device='0' --adam-lr=0.01 22 | 23 | # python distill.py --data NGC_coco2014.data --weights '' --batch-size 64 --cfg yolov3-tiny.cfg --device='0' --nw=8 \ 24 | # --cfg-teacher yolov3-tiny.cfg --weights-teacher './weights/yolov3-tiny.pt' \ 25 | # --alpha-yolo=0.0 --alpha-distill=1.0 --distill-method='mse' --epochs=100 --device='0' --adam-lr=0.001 26 | 27 | # Training command for master branch 28 | # python distill.py --data NGC_hallucinate.data --weights '' --batch-size 64 --cfg yolov3-spp.cfg --device='0,1,2,3' --nw=20 \ 29 | # --cfg-teacher yolov3-spp.cfg --weights-teacher './weights/yolov3-spp.pt' --alpha-yolo=0.0 --alpha-distill=1.0 --distill-method='mse' 30 | 31 | # Training command for debugging branch 32 | # python -u distill.py --data NGC_tiles.data --weights '' --batch-size 64 --cfg yolov3-spp.cfg --device='0,1,2,3' --nw=20 \ 33 | # --cfg-teacher yolov3-spp.cfg --weights-teacher './weights/yolov3-spp.pt' \ 34 | # --alpha-yolo=0.0 --alpha-distill=1.0 --distill-method='mse' --epochs=300 --adam-lr=0.001 | tee rawlogs.txt 35 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DIODE/80a396d5772528d4c393a301b0a1390eb7e7e039/knowledge_distillation/yolov3-master/utils/__init__.py -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/utils/adabound.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.optim.optimizer import Optimizer 5 | 6 | 7 | class AdaBound(Optimizer): 8 | """Implements AdaBound algorithm. 9 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_. 10 | Arguments: 11 | params (iterable): iterable of parameters to optimize or dicts defining 12 | parameter groups 13 | lr (float, optional): Adam learning rate (default: 1e-3) 14 | betas (Tuple[float, float], optional): coefficients used for computing 15 | running averages of gradient and its square (default: (0.9, 0.999)) 16 | final_lr (float, optional): final (SGD) learning rate (default: 0.1) 17 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3) 18 | eps (float, optional): term added to the denominator to improve 19 | numerical stability (default: 1e-8) 20 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 21 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm 22 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: 23 | https://openreview.net/forum?id=Bkg3g2R9FX 24 | """ 25 | 26 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, 27 | eps=1e-8, weight_decay=0, amsbound=False): 28 | if not 0.0 <= lr: 29 | raise ValueError("Invalid learning rate: {}".format(lr)) 30 | if not 0.0 <= eps: 31 | raise ValueError("Invalid epsilon value: {}".format(eps)) 32 | if not 0.0 <= betas[0] < 1.0: 33 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 34 | if not 0.0 <= betas[1] < 1.0: 35 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 36 | if not 0.0 <= final_lr: 37 | raise ValueError("Invalid final learning rate: {}".format(final_lr)) 38 | if not 0.0 <= gamma < 1.0: 39 | raise ValueError("Invalid gamma parameter: {}".format(gamma)) 40 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps, 41 | weight_decay=weight_decay, amsbound=amsbound) 42 | super(AdaBound, self).__init__(params, defaults) 43 | 44 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups)) 45 | 46 | def __setstate__(self, state): 47 | super(AdaBound, self).__setstate__(state) 48 | for group in self.param_groups: 49 | group.setdefault('amsbound', False) 50 | 51 | def step(self, closure=None): 52 | """Performs a single optimization step. 53 | Arguments: 54 | closure (callable, optional): A closure that reevaluates the model 55 | and returns the loss. 56 | """ 57 | loss = None 58 | if closure is not None: 59 | loss = closure() 60 | 61 | for group, base_lr in zip(self.param_groups, self.base_lrs): 62 | for p in group['params']: 63 | if p.grad is None: 64 | continue 65 | grad = p.grad.data 66 | if grad.is_sparse: 67 | raise RuntimeError( 68 | 'Adam does not support sparse gradients, please consider SparseAdam instead') 69 | amsbound = group['amsbound'] 70 | 71 | state = self.state[p] 72 | 73 | # State initialization 74 | if len(state) == 0: 75 | state['step'] = 0 76 | # Exponential moving average of gradient values 77 | state['exp_avg'] = torch.zeros_like(p.data) 78 | # Exponential moving average of squared gradient values 79 | state['exp_avg_sq'] = torch.zeros_like(p.data) 80 | if amsbound: 81 | # Maintains max of all exp. moving avg. of sq. grad. values 82 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 83 | 84 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 85 | if amsbound: 86 | max_exp_avg_sq = state['max_exp_avg_sq'] 87 | beta1, beta2 = group['betas'] 88 | 89 | state['step'] += 1 90 | 91 | if group['weight_decay'] != 0: 92 | grad = grad.add(group['weight_decay'], p.data) 93 | 94 | # Decay the first and second moment running average coefficient 95 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 96 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 97 | if amsbound: 98 | # Maintains the maximum of all 2nd moment running avg. till now 99 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 100 | # Use the max. for normalizing running avg. of gradient 101 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 102 | else: 103 | denom = exp_avg_sq.sqrt().add_(group['eps']) 104 | 105 | bias_correction1 = 1 - beta1 ** state['step'] 106 | bias_correction2 = 1 - beta2 ** state['step'] 107 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 108 | 109 | # Applies bounds on actual learning rate 110 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay 111 | final_lr = group['final_lr'] * group['lr'] / base_lr 112 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1)) 113 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step'])) 114 | step_size = torch.full_like(denom, step_size) 115 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg) 116 | 117 | p.data.add_(-step_size) 118 | 119 | return loss 120 | 121 | 122 | class AdaBoundW(Optimizer): 123 | """Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101) 124 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_. 125 | Arguments: 126 | params (iterable): iterable of parameters to optimize or dicts defining 127 | parameter groups 128 | lr (float, optional): Adam learning rate (default: 1e-3) 129 | betas (Tuple[float, float], optional): coefficients used for computing 130 | running averages of gradient and its square (default: (0.9, 0.999)) 131 | final_lr (float, optional): final (SGD) learning rate (default: 0.1) 132 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3) 133 | eps (float, optional): term added to the denominator to improve 134 | numerical stability (default: 1e-8) 135 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 136 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm 137 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: 138 | https://openreview.net/forum?id=Bkg3g2R9FX 139 | """ 140 | 141 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, 142 | eps=1e-8, weight_decay=0, amsbound=False): 143 | if not 0.0 <= lr: 144 | raise ValueError("Invalid learning rate: {}".format(lr)) 145 | if not 0.0 <= eps: 146 | raise ValueError("Invalid epsilon value: {}".format(eps)) 147 | if not 0.0 <= betas[0] < 1.0: 148 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 149 | if not 0.0 <= betas[1] < 1.0: 150 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 151 | if not 0.0 <= final_lr: 152 | raise ValueError("Invalid final learning rate: {}".format(final_lr)) 153 | if not 0.0 <= gamma < 1.0: 154 | raise ValueError("Invalid gamma parameter: {}".format(gamma)) 155 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps, 156 | weight_decay=weight_decay, amsbound=amsbound) 157 | super(AdaBoundW, self).__init__(params, defaults) 158 | 159 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups)) 160 | 161 | def __setstate__(self, state): 162 | super(AdaBoundW, self).__setstate__(state) 163 | for group in self.param_groups: 164 | group.setdefault('amsbound', False) 165 | 166 | def step(self, closure=None): 167 | """Performs a single optimization step. 168 | Arguments: 169 | closure (callable, optional): A closure that reevaluates the model 170 | and returns the loss. 171 | """ 172 | loss = None 173 | if closure is not None: 174 | loss = closure() 175 | 176 | for group, base_lr in zip(self.param_groups, self.base_lrs): 177 | for p in group['params']: 178 | if p.grad is None: 179 | continue 180 | grad = p.grad.data 181 | if grad.is_sparse: 182 | raise RuntimeError( 183 | 'Adam does not support sparse gradients, please consider SparseAdam instead') 184 | amsbound = group['amsbound'] 185 | 186 | state = self.state[p] 187 | 188 | # State initialization 189 | if len(state) == 0: 190 | state['step'] = 0 191 | # Exponential moving average of gradient values 192 | state['exp_avg'] = torch.zeros_like(p.data) 193 | # Exponential moving average of squared gradient values 194 | state['exp_avg_sq'] = torch.zeros_like(p.data) 195 | if amsbound: 196 | # Maintains max of all exp. moving avg. of sq. grad. values 197 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 198 | 199 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 200 | if amsbound: 201 | max_exp_avg_sq = state['max_exp_avg_sq'] 202 | beta1, beta2 = group['betas'] 203 | 204 | state['step'] += 1 205 | 206 | # Decay the first and second moment running average coefficient 207 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 208 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 209 | if amsbound: 210 | # Maintains the maximum of all 2nd moment running avg. till now 211 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 212 | # Use the max. for normalizing running avg. of gradient 213 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 214 | else: 215 | denom = exp_avg_sq.sqrt().add_(group['eps']) 216 | 217 | bias_correction1 = 1 - beta1 ** state['step'] 218 | bias_correction2 = 1 - beta2 ** state['step'] 219 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 220 | 221 | # Applies bounds on actual learning rate 222 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay 223 | final_lr = group['final_lr'] * group['lr'] / base_lr 224 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1)) 225 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step'])) 226 | step_size = torch.full_like(denom, step_size) 227 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg) 228 | 229 | if group['weight_decay'] != 0: 230 | decayed_weights = torch.mul(p.data, group['weight_decay']) 231 | p.data.add_(-step_size) 232 | p.data.sub_(decayed_weights) 233 | else: 234 | p.data.add_(-step_size) 235 | 236 | return loss 237 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/utils/cleanup.sh: -------------------------------------------------------------------------------- 1 | rm -r ./runs 2 | rm -r __pycache__ 3 | rm results.txt 4 | rm results.png 5 | rm results.json 6 | rm test_batch0_pred.jpg test_batch0_gt.jpg train_batch0.jpg 7 | rm weights/last.pt weights/best.pt 8 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/utils/distill_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | # 9 | # Official PyTorch implementation of WACV2021 paper: 10 | # Data-Free Knowledge Distillation for Object Detection 11 | # A Chawla, H Yin, P Molchanov, J Alvarez 12 | # -------------------------------------------------------- 13 | 14 | 15 | import torch 16 | from torch import nn 17 | import torch.nn.functional as nnfunc 18 | 19 | class Distillation(object): 20 | 21 | """ 22 | Distillation loss class 23 | supports :param: method--> 24 | 1. mse : match yolo layer outputs 25 | """ 26 | 27 | def __init__(self,method="mse"): 28 | if method=="mse": 29 | self.loss_fn = self.mse 30 | # elif method=="cfmse": 31 | # self.loss_fn = self.cfmse 32 | # elif method=="cfmse2": 33 | # self.loss_fn = self.cfmse2 34 | else: 35 | raise NotImplementedError 36 | 37 | def mse(self, predS, predT): 38 | """ 39 | mse between predT & predS 40 | only works when Stu & Tea are same architecture 41 | """ 42 | assert len(predT) == len(predS) 43 | dLoss = [] 44 | for branchS, branchT in zip(predS, predT): 45 | dLoss.append(torch.mean((branchS - branchT)**2)) 46 | dLoss = sum(dLoss) 47 | dLoss_items = torch.tensor((0.0, 0.0, 0.0, dLoss.item())).to(dLoss.device) 48 | return dLoss, dLoss_items.detach() 49 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/utils/evolve.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #for i in 0 1 2 3 3 | #do 4 | # t=ultralytics/yolov3:v139 && sudo docker pull $t && sudo nvidia-docker run -d --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t utils/evolve.sh $i 5 | # sleep 30 6 | #done 7 | 8 | while true; do 9 | # python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 100 --batch 64 --accum 1 --weights yolov3-tiny.conv.15 --multi --bucket ult/wer --evolve --cache --device $1 --cfg yolov3-tiny3-1cls.cfg --single --adam 10 | # python3 train.py --data ../out/data.data --img-size 608 --epochs 10 --batch 8 --accum 8 --weights ultralytics68.pt --multi --bucket ult/athena --evolve --device $1 --cfg yolov3-spp-1cls.cfg 11 | 12 | python3 train.py --data coco2014.data --img-size 512 608 --epochs 27 --batch 8 --accum 8 --evolve --weights '' --bucket ult/coco/sppa_512 --device $1 --cfg yolov3-sppa.cfg --multi 13 | done 14 | 15 | 16 | # coco epoch times --img-size 416 608 --epochs 27 --batch 16 --accum 4 17 | # 36:34 2080ti 18 | # 21:58 V100 19 | # 63:00 T4 -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/utils/gcp.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # New VM 4 | rm -rf sample_data yolov3 5 | git clone https://github.com/ultralytics/yolov3 6 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 test # branch 7 | # sudo apt-get install zip 8 | #git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex 9 | sudo conda install -yc conda-forge scikit-image pycocotools 10 | # python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('193Zp_ye-3qXMonR1nZj3YyxMtQkMy50k','coco2014.zip')" 11 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('1WQT6SOktSe8Uw6r10-2JhbEhMY5DJaph','coco2017.zip')" 12 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('1C3HewOG9akA3y456SZLBJZfNDPkBwAto','knife.zip')" 13 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('13g3LqdpkNE8sPosVJT6KFXlfoMypzRP4','sm4.zip')" 14 | sudo shutdown 15 | 16 | # Mount local SSD 17 | lsblk 18 | sudo mkfs.ext4 -F /dev/nvme0n1 19 | sudo mkdir -p /mnt/disks/nvme0n1 20 | sudo mount /dev/nvme0n1 /mnt/disks/nvme0n1 21 | sudo chmod a+w /mnt/disks/nvme0n1 22 | cp -r coco /mnt/disks/nvme0n1 23 | 24 | # Kill All 25 | t=ultralytics/yolov3:v1 26 | docker kill $(docker ps -a -q --filter ancestor=$t) 27 | 28 | # Evolve coco 29 | sudo -s 30 | t=ultralytics/yolov3:evolve 31 | # docker kill $(docker ps -a -q --filter ancestor=$t) 32 | for i in 0 1 6 7 33 | do 34 | docker pull $t && docker run --gpus all -d --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t bash utils/evolve.sh $i 35 | sleep 30 36 | done 37 | 38 | #COCO training 39 | n=131 && t=ultralytics/coco:v131 && sudo docker pull $t && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t python3 train.py --data coco2014.data --img-size 320 640 --epochs 300 --batch 16 --weights '' --device 0 --cfg yolov3-spp.cfg --bucket ult/coco --name $n && sudo shutdown 40 | n=132 && t=ultralytics/coco:v131 && sudo docker pull $t && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t python3 train.py --data coco2014.data --img-size 320 640 --epochs 300 --batch 64 --weights '' --device 0 --cfg yolov3-tiny.cfg --bucket ult/coco --name $n && sudo shutdown 41 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | # pip install --upgrade google-cloud-storage 3 | 4 | import os 5 | import time 6 | 7 | 8 | # from google.cloud import storage 9 | 10 | 11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'): 12 | # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f 13 | # Downloads a file from Google Drive, accepting presented query 14 | # from utils.google_utils import *; gdrive_download() 15 | t = time.time() 16 | 17 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='') 18 | os.remove(name) if os.path.exists(name) else None # remove existing 19 | os.remove('cookie') if os.path.exists('cookie') else None 20 | 21 | # Attempt file download 22 | os.system("curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id) 23 | if os.path.exists('cookie'): # large file 24 | s = "curl -Lb ./cookie \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % ( 25 | id, name) 26 | else: # small file 27 | s = "curl -s -L -o %s 'https://drive.google.com/uc?export=download&id=%s'" % (name, id) 28 | r = os.system(s) # execute, capture return values 29 | os.remove('cookie') if os.path.exists('cookie') else None 30 | 31 | # Error check 32 | if r != 0: 33 | os.remove(name) if os.path.exists(name) else None # remove partial 34 | print('Download error ') # raise Exception('Download error') 35 | return r 36 | 37 | # Unzip if archive 38 | if name.endswith('.zip'): 39 | print('unzipping... ', end='') 40 | os.system('unzip -q %s' % name) # unzip 41 | os.remove(name) # remove zip to free space 42 | 43 | print('Done (%.1fs)' % (time.time() - t)) 44 | return r 45 | 46 | 47 | def upload_blob(bucket_name, source_file_name, destination_blob_name): 48 | # Uploads a file to a bucket 49 | # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 50 | 51 | storage_client = storage.Client() 52 | bucket = storage_client.get_bucket(bucket_name) 53 | blob = bucket.blob(destination_blob_name) 54 | 55 | blob.upload_from_filename(source_file_name) 56 | 57 | print('File {} uploaded to {}.'.format( 58 | source_file_name, 59 | destination_blob_name)) 60 | 61 | 62 | def download_blob(bucket_name, source_blob_name, destination_file_name): 63 | # Uploads a blob from a bucket 64 | storage_client = storage.Client() 65 | bucket = storage_client.get_bucket(bucket_name) 66 | blob = bucket.blob(source_blob_name) 67 | 68 | blob.download_to_filename(destination_file_name) 69 | 70 | print('Blob {} downloaded to {}.'.format( 71 | source_blob_name, 72 | destination_file_name)) 73 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/utils/layers.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | 3 | from utils.utils import * 4 | 5 | 6 | def make_divisible(v, divisor): 7 | # Function ensures all layers have a channel number that is divisible by 8 8 | # https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 9 | return math.ceil(v / divisor) * divisor 10 | 11 | 12 | class Flatten(nn.Module): 13 | # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions 14 | def forward(self, x): 15 | return x.view(x.size(0), -1) 16 | 17 | 18 | class Concat(nn.Module): 19 | # Concatenate a list of tensors along dimension 20 | def __init__(self, dimension=1): 21 | super(Concat, self).__init__() 22 | self.d = dimension 23 | 24 | def forward(self, x): 25 | return torch.cat(x, self.d) 26 | 27 | 28 | class FeatureConcat(nn.Module): 29 | def __init__(self, layers): 30 | super(FeatureConcat, self).__init__() 31 | self.layers = layers # layer indices 32 | self.multiple = len(layers) > 1 # multiple layers flag 33 | 34 | def forward(self, x, outputs): 35 | return torch.cat([outputs[i] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]] 36 | 37 | 38 | class WeightedFeatureFusion(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 39 | def __init__(self, layers, weight=False): 40 | super(WeightedFeatureFusion, self).__init__() 41 | self.layers = layers # layer indices 42 | self.weight = weight # apply weights boolean 43 | self.n = len(layers) + 1 # number of layers 44 | if weight: 45 | self.w = nn.Parameter(torch.zeros(self.n), requires_grad=True) # layer weights 46 | 47 | def forward(self, x, outputs): 48 | # Weights 49 | if self.weight: 50 | w = torch.sigmoid(self.w) * (2 / self.n) # sigmoid weights (0-1) 51 | x = x * w[0] 52 | 53 | # Fusion 54 | nx = x.shape[1] # input channels 55 | for i in range(self.n - 1): 56 | a = outputs[self.layers[i]] * w[i + 1] if self.weight else outputs[self.layers[i]] # feature to add 57 | na = a.shape[1] # feature channels 58 | 59 | # Adjust channels 60 | if nx == na: # same shape 61 | x = x + a 62 | elif nx > na: # slice input 63 | x[:, :na] = x[:, :na] + a # or a = nn.ZeroPad2d((0, 0, 0, 0, 0, dc))(a); x = x + a 64 | else: # slice feature 65 | x = x + a[:, :nx] 66 | 67 | return x 68 | 69 | 70 | class MixConv2d(nn.Module): # MixConv: Mixed Depthwise Convolutional Kernels https://arxiv.org/abs/1907.09595 71 | def __init__(self, in_ch, out_ch, k=(3, 5, 7), stride=1, dilation=1, bias=True, method='equal_params'): 72 | super(MixConv2d, self).__init__() 73 | 74 | groups = len(k) 75 | if method == 'equal_ch': # equal channels per group 76 | i = torch.linspace(0, groups - 1E-6, out_ch).floor() # out_ch indices 77 | ch = [(i == g).sum() for g in range(groups)] 78 | else: # 'equal_params': equal parameter count per group 79 | b = [out_ch] + [0] * groups 80 | a = np.eye(groups + 1, groups, k=-1) 81 | a -= np.roll(a, 1, axis=1) 82 | a *= np.array(k) ** 2 83 | a[0] = 1 84 | ch = np.linalg.lstsq(a, b, rcond=None)[0].round().astype(int) # solve for equal weight indices, ax = b 85 | 86 | self.m = nn.ModuleList([nn.Conv2d(in_channels=in_ch, 87 | out_channels=ch[g], 88 | kernel_size=k[g], 89 | stride=stride, 90 | padding=k[g] // 2, # 'same' pad 91 | dilation=dilation, 92 | bias=bias) for g in range(groups)]) 93 | 94 | def forward(self, x): 95 | return torch.cat([m(x) for m in self.m], 1) 96 | 97 | 98 | # Activation functions below ------------------------------------------------------------------------------------------- 99 | class SwishImplementation(torch.autograd.Function): 100 | @staticmethod 101 | def forward(ctx, x): 102 | ctx.save_for_backward(x) 103 | return x * torch.sigmoid(x) 104 | 105 | @staticmethod 106 | def backward(ctx, grad_output): 107 | x = ctx.saved_tensors[0] 108 | sx = torch.sigmoid(x) # sigmoid(ctx) 109 | return grad_output * (sx * (1 + x * (1 - sx))) 110 | 111 | 112 | class MishImplementation(torch.autograd.Function): 113 | @staticmethod 114 | def forward(ctx, x): 115 | ctx.save_for_backward(x) 116 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 117 | 118 | @staticmethod 119 | def backward(ctx, grad_output): 120 | x = ctx.saved_tensors[0] 121 | sx = torch.sigmoid(x) 122 | fx = F.softplus(x).tanh() 123 | return grad_output * (fx + x * sx * (1 - fx * fx)) 124 | 125 | 126 | class MemoryEfficientSwish(nn.Module): 127 | def forward(self, x): 128 | return SwishImplementation.apply(x) 129 | 130 | 131 | class MemoryEfficientMish(nn.Module): 132 | def forward(self, x): 133 | return MishImplementation.apply(x) 134 | 135 | 136 | class Swish(nn.Module): 137 | def forward(self, x): 138 | return x * torch.sigmoid(x) 139 | 140 | 141 | class HardSwish(nn.Module): # https://arxiv.org/pdf/1905.02244.pdf 142 | def forward(self, x): 143 | return x * F.hardtanh(x + 3, 0., 6., True) / 6. 144 | 145 | 146 | class Mish(nn.Module): # https://github.com/digantamisra98/Mish 147 | def forward(self, x): 148 | return x * F.softplus(x).tanh() 149 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/utils/parse_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | 5 | 6 | def parse_model_cfg(path): 7 | # Parse the yolo *.cfg file and return module definitions path may be 'cfg/yolov3.cfg', 'yolov3.cfg', or 'yolov3' 8 | if not path.endswith('.cfg'): # add .cfg suffix if omitted 9 | path += '.cfg' 10 | if not os.path.exists(path) and os.path.exists('cfg' + os.sep + path): # add cfg/ prefix if omitted 11 | path = 'cfg' + os.sep + path 12 | 13 | with open(path, 'r') as f: 14 | lines = f.read().split('\n') 15 | lines = [x for x in lines if x and not x.startswith('#')] 16 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 17 | mdefs = [] # module definitions 18 | for line in lines: 19 | if line.startswith('['): # This marks the start of a new block 20 | mdefs.append({}) 21 | mdefs[-1]['type'] = line[1:-1].rstrip() 22 | if mdefs[-1]['type'] == 'convolutional': 23 | mdefs[-1]['batch_normalize'] = 0 # pre-populate with zeros (may be overwritten later) 24 | else: 25 | key, val = line.split("=") 26 | key = key.rstrip() 27 | 28 | if key == 'anchors': # return nparray 29 | mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors 30 | elif (key in ['from', 'layers', 'mask']) or (key == 'size' and ',' in val): # return array 31 | mdefs[-1][key] = [int(x) for x in val.split(',')] 32 | else: 33 | val = val.strip() 34 | # TODO: .isnumeric() actually fails to get the float case 35 | if val.isnumeric(): # return int or float 36 | mdefs[-1][key] = int(val) if (int(val) - float(val)) == 0 else float(val) 37 | else: 38 | mdefs[-1][key] = val # return string 39 | 40 | # Check all fields are supported 41 | supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups', 42 | 'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random', 43 | 'stride_x', 'stride_y', 'weights_type', 'weights_normalization', 'scale_x_y', 'beta_nms', 'nms_kind', 44 | 'iou_loss', 'iou_normalizer', 'cls_normalizer', 'iou_thresh', 'probability'] 45 | 46 | f = [] # fields 47 | for x in mdefs[1:]: 48 | [f.append(k) for k in x if k not in f] 49 | u = [x for x in f if x not in supported] # unsupported fields 50 | assert not any(u), "Unsupported fields %s in %s. See https://github.com/ultralytics/yolov3/issues/631" % (u, path) 51 | 52 | return mdefs 53 | 54 | 55 | def parse_data_cfg(path): 56 | # Parses the data configuration file 57 | if not os.path.exists(path) and os.path.exists('data' + os.sep + path): # add data/ prefix if omitted 58 | path = 'data' + os.sep + path 59 | 60 | with open(path, 'r') as f: 61 | lines = f.readlines() 62 | 63 | options = dict() 64 | for line in lines: 65 | line = line.strip() 66 | if line == '' or line.startswith('#'): 67 | continue 68 | key, val = line.split('=') 69 | options[key.strip()] = val.strip() 70 | 71 | return options 72 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | import time 4 | from copy import deepcopy 5 | 6 | import torch 7 | import torch.backends.cudnn as cudnn 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | 12 | def init_seeds(seed=0): 13 | torch.manual_seed(seed) 14 | 15 | # Reduce randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html 16 | if seed == 0: 17 | cudnn.deterministic = False 18 | cudnn.benchmark = True 19 | 20 | 21 | def select_device(device='', apex=False, batch_size=None): 22 | # device = 'cpu' or '0' or '0,1,2,3' 23 | cpu_request = device.lower() == 'cpu' 24 | if device and not cpu_request: # if device requested other than 'cpu' 25 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 26 | assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity 27 | 28 | cuda = False if cpu_request else torch.cuda.is_available() 29 | if cuda: 30 | c = 1024 ** 2 # bytes to MB 31 | ng = torch.cuda.device_count() 32 | if ng > 1 and batch_size: # check that batch_size is compatible with device_count 33 | assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng) 34 | x = [torch.cuda.get_device_properties(i) for i in range(ng)] 35 | s = 'Using CUDA ' + ('Apex ' if apex else '') # apex for mixed precision https://github.com/NVIDIA/apex 36 | for i in range(0, ng): 37 | if i == 1: 38 | s = ' ' * len(s) 39 | print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" % 40 | (s, i, x[i].name, x[i].total_memory / c)) 41 | else: 42 | print('Using CPU') 43 | 44 | print('') # skip a line 45 | return torch.device('cuda:0' if cuda else 'cpu') 46 | 47 | 48 | def time_synchronized(): 49 | torch.cuda.synchronize() if torch.cuda.is_available() else None 50 | return time.time() 51 | 52 | 53 | def initialize_weights(model): 54 | for m in model.modules(): 55 | t = type(m) 56 | if t is nn.Conv2d: 57 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 58 | elif t is nn.BatchNorm2d: 59 | m.eps = 1e-4 60 | m.momentum = 0.03 61 | elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 62 | m.inplace = True 63 | 64 | 65 | def find_modules(model, mclass=nn.Conv2d): 66 | # finds layer indices matching module class 'mclass' 67 | return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] 68 | 69 | 70 | def fuse_conv_and_bn(conv, bn): 71 | # https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 72 | with torch.no_grad(): 73 | # init 74 | fusedconv = torch.nn.Conv2d(conv.in_channels, 75 | conv.out_channels, 76 | kernel_size=conv.kernel_size, 77 | stride=conv.stride, 78 | padding=conv.padding, 79 | bias=True) 80 | 81 | # prepare filters 82 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 83 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 84 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 85 | 86 | # prepare spatial bias 87 | if conv.bias is not None: 88 | b_conv = conv.bias 89 | else: 90 | b_conv = torch.zeros(conv.weight.size(0)) 91 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 92 | fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) 93 | 94 | return fusedconv 95 | 96 | 97 | def model_info(model, verbose=False): 98 | # Plots a line-by-line description of a PyTorch model 99 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 100 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 101 | if verbose: 102 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 103 | for i, (name, p) in enumerate(model.named_parameters()): 104 | name = name.replace('module_list.', '') 105 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 106 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 107 | 108 | try: # FLOPS 109 | from thop import profile 110 | macs, _ = profile(model, inputs=(torch.zeros(1, 3, 480, 640),), verbose=False) 111 | fs = ', %.1f GFLOPS' % (macs / 1E9 * 2) 112 | except: 113 | fs = '' 114 | 115 | print('Model Summary: %g layers, %g parameters, %g gradients%s' % (len(list(model.parameters())), n_p, n_g, fs)) 116 | 117 | 118 | def load_classifier(name='resnet101', n=2): 119 | # Loads a pretrained model reshaped to n-class output 120 | import pretrainedmodels # https://github.com/Cadene/pretrained-models.pytorch#torchvision 121 | model = pretrainedmodels.__dict__[name](num_classes=1000, pretrained='imagenet') 122 | 123 | # Display model properties 124 | for x in ['model.input_size', 'model.input_space', 'model.input_range', 'model.mean', 'model.std']: 125 | print(x + ' =', eval(x)) 126 | 127 | # Reshape output to n classes 128 | filters = model.last_linear.weight.shape[1] 129 | model.last_linear.bias = torch.nn.Parameter(torch.zeros(n)) 130 | model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters)) 131 | model.last_linear.out_features = n 132 | return model 133 | 134 | 135 | def scale_img(img, ratio=1.0, same_shape=True): # img(16,3,256,416), r=ratio 136 | # scales img(bs,3,y,x) by ratio 137 | h, w = img.shape[2:] 138 | s = (int(h * ratio), int(w * ratio)) # new size 139 | img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize 140 | if not same_shape: # pad/crop img 141 | gs = 64 # (pixels) grid size 142 | h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] 143 | return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean 144 | 145 | 146 | class ModelEMA: 147 | """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models 148 | Keep a moving average of everything in the model state_dict (parameters and buffers). 149 | This is intended to allow functionality like 150 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage 151 | A smoothed version of the weights is necessary for some training schemes to perform well. 152 | E.g. Google's hyper-params for training MNASNet, MobileNet-V3, EfficientNet, etc that use 153 | RMSprop with a short 2.4-3 epoch decay period and slow LR decay rate of .96-.99 requires EMA 154 | smoothing of weights to match results. Pay attention to the decay constant you are using 155 | relative to your update count per epoch. 156 | To keep EMA from using GPU resources, set device='cpu'. This will save a bit of memory but 157 | disable validation of the EMA weights. Validation will have to be done manually in a separate 158 | process, or after the training stops converging. 159 | This class is sensitive where it is initialized in the sequence of model init, 160 | GPU assignment and distributed training wrappers. 161 | I've tested with the sequence in my own train.py for torch.DataParallel, apex.DDP, and single-GPU. 162 | """ 163 | 164 | def __init__(self, model, decay=0.9999, device=''): 165 | # make a copy of the model for accumulating moving average of weights 166 | self.ema = deepcopy(model) 167 | self.ema.eval() 168 | self.updates = 0 # number of EMA updates 169 | self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) 170 | self.device = device # perform ema on different device from model if set 171 | if device: 172 | self.ema.to(device=device) 173 | for p in self.ema.parameters(): 174 | p.requires_grad_(False) 175 | 176 | def update(self, model): 177 | self.updates += 1 178 | d = self.decay(self.updates) 179 | with torch.no_grad(): 180 | if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel): 181 | msd, esd = model.module.state_dict(), self.ema.module.state_dict() 182 | else: 183 | msd, esd = model.state_dict(), self.ema.state_dict() 184 | 185 | for k, v in esd.items(): 186 | if v.dtype.is_floating_point: 187 | v *= d 188 | v += (1. - d) * msd[k].detach() 189 | 190 | def update_attr(self, model): 191 | # Assign attributes (which may change during training) 192 | for k in model.__dict__.keys(): 193 | if not k.startswith('_'): 194 | setattr(self.ema, k, getattr(model, k)) 195 | -------------------------------------------------------------------------------- /knowledge_distillation/yolov3-master/weights/download_yolov3_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # make '/weights' directory if it does not exist and cd into it 4 | # mkdir -p weights && cd weights 5 | 6 | # copy darknet weight files, continue '-c' if partially downloaded 7 | # wget -c https://pjreddie.com/media/files/yolov3.weights 8 | # wget -c https://pjreddie.com/media/files/yolov3-tiny.weights 9 | # wget -c https://pjreddie.com/media/files/yolov3-spp.weights 10 | 11 | # yolov3 pytorch weights 12 | # download from Google Drive: https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI 13 | 14 | # darknet53 weights (first 75 layers only) 15 | # wget -c https://pjreddie.com/media/files/darknet53.conv.74 16 | 17 | # yolov3-tiny weights from darknet (first 16 layers only) 18 | # ./darknet partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15 19 | # mv yolov3-tiny.conv.15 ../ 20 | 21 | # new method 22 | python3 -c "from models import *; 23 | attempt_download('weights/yolov3.pt'); 24 | attempt_download('weights/yolov3-spp.pt')" 25 | -------------------------------------------------------------------------------- /models/yolo/README.md: -------------------------------------------------------------------------------- 1 | 2 | ### YOLO-V3 detector 3 | This directory `models/yolo` contains `src` files used to build and invert a pre-trained 4 | Yolo-V3 model, as well as utilities for dataset sampling and generation. 5 | 6 | 7 | ### LICENSE 8 | 9 | ------------------------------------------------------------------------------------ 10 | The original Yolo-V3 code is from https://github.com/ultralytics/yolov3 and the following files are covered under its original licence https://github.com/NVlabs/DIODE/blob/master/models/yolo/LICENSE 11 | 12 | ``` 13 | models/yolo/5k.txt 14 | models/yolo/cfg/* 15 | models/yolo/datasets.py 16 | models/yolo/models.py 17 | models/yolo/parse_config.py 18 | models/yolo/torch_utils.py 19 | models/yolo/utils.py 20 | ``` 21 | 22 | ------------ 23 | The remaining files `/models/yolo/__init__.py`, `models/yolo/yolostuff.py` and `models/yolo/5k_fullpath.txt` have been added by this repository and are made available under the following license: 24 | 25 | Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 26 | 27 | This work is made available under the Nvidia Source Code License (1-Way Commercial). 28 | To view a copy of this license, visit https://github.com/NVlabs/DIODE/blob/master/LICENSE 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /models/yolo/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | # 9 | # Official PyTorch implementation of WACV2021 paper: 10 | # Data-Free Knowledge Distillation for Object Detection 11 | # A Chawla, H Yin, P Molchanov, J Alvarez 12 | # -------------------------------------------------------- -------------------------------------------------------------------------------- /models/yolo/cfg/yolov3-1cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=16 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=18 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=1 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .7 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=18 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=1 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .7 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=18 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=1 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .7 787 | truth_thresh = 1 788 | random=1 789 | -------------------------------------------------------------------------------- /models/yolo/cfg/yolov3-tiny-1cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=18 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=1 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=18 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=1 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /models/yolo/cfg/yolov3-tiny-3cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=24 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=3 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=24 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=3 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /models/yolo/cfg/yolov3-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=255 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=80 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=255 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 1,2,3 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=80 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /models/yolo/cfg/yolov3-tiny3-1cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 200000 21 | policy=steps 22 | steps=180000,190000 23 | scales=.1,.1 24 | 25 | 26 | [convolutional] 27 | batch_normalize=1 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | [maxpool] 35 | size=2 36 | stride=2 37 | 38 | [convolutional] 39 | batch_normalize=1 40 | filters=32 41 | size=3 42 | stride=1 43 | pad=1 44 | activation=leaky 45 | 46 | [maxpool] 47 | size=2 48 | stride=2 49 | 50 | [convolutional] 51 | batch_normalize=1 52 | filters=64 53 | size=3 54 | stride=1 55 | pad=1 56 | activation=leaky 57 | 58 | [maxpool] 59 | size=2 60 | stride=2 61 | 62 | [convolutional] 63 | batch_normalize=1 64 | filters=128 65 | size=3 66 | stride=1 67 | pad=1 68 | activation=leaky 69 | 70 | [maxpool] 71 | size=2 72 | stride=2 73 | 74 | [convolutional] 75 | batch_normalize=1 76 | filters=256 77 | size=3 78 | stride=1 79 | pad=1 80 | activation=leaky 81 | 82 | [maxpool] 83 | size=2 84 | stride=2 85 | 86 | [convolutional] 87 | batch_normalize=1 88 | filters=512 89 | size=3 90 | stride=1 91 | pad=1 92 | activation=leaky 93 | 94 | [maxpool] 95 | size=2 96 | stride=1 97 | 98 | [convolutional] 99 | batch_normalize=1 100 | filters=1024 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | ########### 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=256 111 | size=1 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | size=1 126 | stride=1 127 | pad=1 128 | filters=18 129 | activation=linear 130 | 131 | 132 | 133 | [yolo] 134 | mask = 6,7,8 135 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 136 | classes=1 137 | num=9 138 | jitter=.3 139 | ignore_thresh = .7 140 | truth_thresh = 1 141 | random=1 142 | 143 | [route] 144 | layers = -4 145 | 146 | [convolutional] 147 | batch_normalize=1 148 | filters=128 149 | size=1 150 | stride=1 151 | pad=1 152 | activation=leaky 153 | 154 | [upsample] 155 | stride=2 156 | 157 | [route] 158 | layers = -1, 8 159 | 160 | [convolutional] 161 | batch_normalize=1 162 | filters=256 163 | size=3 164 | stride=1 165 | pad=1 166 | activation=leaky 167 | 168 | [convolutional] 169 | size=1 170 | stride=1 171 | pad=1 172 | filters=18 173 | activation=linear 174 | 175 | [yolo] 176 | mask = 3,4,5 177 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 178 | classes=1 179 | num=9 180 | jitter=.3 181 | ignore_thresh = .7 182 | truth_thresh = 1 183 | random=1 184 | 185 | 186 | 187 | [route] 188 | layers = -3 189 | 190 | [convolutional] 191 | batch_normalize=1 192 | filters=128 193 | size=1 194 | stride=1 195 | pad=1 196 | activation=leaky 197 | 198 | [upsample] 199 | stride=2 200 | 201 | [route] 202 | layers = -1, 6 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=3 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=18 217 | activation=linear 218 | 219 | [yolo] 220 | mask = 0,1,2 221 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 222 | classes=1 223 | num=9 224 | jitter=.3 225 | ignore_thresh = .7 226 | truth_thresh = 1 227 | random=1 228 | -------------------------------------------------------------------------------- /models/yolo/cfg/yolov3-tiny3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 200000 21 | policy=steps 22 | steps=180000,190000 23 | scales=.1,.1 24 | 25 | 26 | [convolutional] 27 | batch_normalize=1 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | [maxpool] 35 | size=2 36 | stride=2 37 | 38 | [convolutional] 39 | batch_normalize=1 40 | filters=32 41 | size=3 42 | stride=1 43 | pad=1 44 | activation=leaky 45 | 46 | [maxpool] 47 | size=2 48 | stride=2 49 | 50 | [convolutional] 51 | batch_normalize=1 52 | filters=64 53 | size=3 54 | stride=1 55 | pad=1 56 | activation=leaky 57 | 58 | [maxpool] 59 | size=2 60 | stride=2 61 | 62 | [convolutional] 63 | batch_normalize=1 64 | filters=128 65 | size=3 66 | stride=1 67 | pad=1 68 | activation=leaky 69 | 70 | [maxpool] 71 | size=2 72 | stride=2 73 | 74 | [convolutional] 75 | batch_normalize=1 76 | filters=256 77 | size=3 78 | stride=1 79 | pad=1 80 | activation=leaky 81 | 82 | [maxpool] 83 | size=2 84 | stride=2 85 | 86 | [convolutional] 87 | batch_normalize=1 88 | filters=512 89 | size=3 90 | stride=1 91 | pad=1 92 | activation=leaky 93 | 94 | [maxpool] 95 | size=2 96 | stride=1 97 | 98 | [convolutional] 99 | batch_normalize=1 100 | filters=1024 101 | size=3 102 | stride=1 103 | pad=1 104 | activation=leaky 105 | 106 | ########### 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=256 111 | size=1 112 | stride=1 113 | pad=1 114 | activation=leaky 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=512 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | size=1 126 | stride=1 127 | pad=1 128 | filters=255 129 | activation=linear 130 | 131 | 132 | 133 | [yolo] 134 | mask = 6,7,8 135 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 136 | classes=80 137 | num=9 138 | jitter=.3 139 | ignore_thresh = .7 140 | truth_thresh = 1 141 | random=1 142 | 143 | [route] 144 | layers = -4 145 | 146 | [convolutional] 147 | batch_normalize=1 148 | filters=128 149 | size=1 150 | stride=1 151 | pad=1 152 | activation=leaky 153 | 154 | [upsample] 155 | stride=2 156 | 157 | [route] 158 | layers = -1, 8 159 | 160 | [convolutional] 161 | batch_normalize=1 162 | filters=256 163 | size=3 164 | stride=1 165 | pad=1 166 | activation=leaky 167 | 168 | [convolutional] 169 | size=1 170 | stride=1 171 | pad=1 172 | filters=255 173 | activation=linear 174 | 175 | [yolo] 176 | mask = 3,4,5 177 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 178 | classes=80 179 | num=9 180 | jitter=.3 181 | ignore_thresh = .7 182 | truth_thresh = 1 183 | random=1 184 | 185 | 186 | 187 | [route] 188 | layers = -3 189 | 190 | [convolutional] 191 | batch_normalize=1 192 | filters=128 193 | size=1 194 | stride=1 195 | pad=1 196 | activation=leaky 197 | 198 | [upsample] 199 | stride=2 200 | 201 | [route] 202 | layers = -1, 6 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=3 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=255 217 | activation=linear 218 | 219 | [yolo] 220 | mask = 0,1,2 221 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 222 | classes=80 223 | num=9 224 | jitter=.3 225 | ignore_thresh = .7 226 | truth_thresh = 1 227 | random=1 228 | -------------------------------------------------------------------------------- /models/yolo/cfg/yolov3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=16 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=255 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=80 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .7 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=255 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=80 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .7 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=255 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=80 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .7 787 | truth_thresh = 1 788 | random=1 789 | -------------------------------------------------------------------------------- /models/yolo/cfg/yolov4-tiny-1cls.cfg: -------------------------------------------------------------------------------- 1 | # Generated by Glenn Jocher (glenn.jocher@ultralytics.com) for https://github.com/ultralytics/yolov3 2 | # def kmean_anchors(path='../coco/train2017.txt', n=12, img_size=(320, 640)): # from utils.utils import *; kmean_anchors() 3 | # Evolving anchors: 100%|██████████| 1000/1000 [41:15<00:00, 2.48s/it] 4 | # 0.20 iou_thr: 0.992 best possible recall, 4.25 anchors > thr 5 | # kmeans anchors (n=12, img_size=(320, 640), IoU=0.005/0.184/0.634-min/mean/best): 6,9, 15,16, 17,35, 37,26, 36,67, 63,42, 57,100, 121,81, 112,169, 241,158, 195,310, 426,359 6 | 7 | [net] 8 | # Testing 9 | # batch=1 10 | # subdivisions=1 11 | # Training 12 | batch=64 13 | subdivisions=16 14 | width=608 15 | height=608 16 | channels=3 17 | momentum=0.9 18 | decay=0.0005 19 | angle=0 20 | saturation = 1.5 21 | exposure = 1.5 22 | hue=.1 23 | 24 | learning_rate=0.001 25 | burn_in=1000 26 | max_batches = 200000 27 | policy=steps 28 | steps=180000,190000 29 | scales=.1,.1 30 | 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=16 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=32 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=64 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=128 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=256 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=512 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | [maxpool] 101 | size=2 102 | stride=1 103 | 104 | [convolutional] 105 | batch_normalize=1 106 | filters=1024 107 | size=3 108 | stride=1 109 | pad=1 110 | activation=leaky 111 | 112 | ########### 113 | 114 | [convolutional] 115 | batch_normalize=1 116 | filters=256 117 | size=1 118 | stride=1 119 | pad=1 120 | activation=leaky 121 | 122 | [convolutional] 123 | batch_normalize=1 124 | filters=512 125 | size=3 126 | stride=1 127 | pad=1 128 | activation=leaky 129 | 130 | [convolutional] 131 | size=1 132 | stride=1 133 | pad=1 134 | filters=24 135 | activation=linear 136 | 137 | 138 | 139 | [yolo] 140 | mask = 8,9,10,11 141 | anchors = 6,9, 15,16, 17,35, 37,26, 36,67, 63,42, 57,100, 121,81, 112,169, 241,158, 195,310, 426,359 142 | classes=1 143 | num=12 144 | jitter=.3 145 | ignore_thresh = .7 146 | truth_thresh = 1 147 | random=1 148 | 149 | [route] 150 | layers = -4 151 | 152 | [convolutional] 153 | batch_normalize=1 154 | filters=128 155 | size=1 156 | stride=1 157 | pad=1 158 | activation=leaky 159 | 160 | [upsample] 161 | stride=2 162 | 163 | [route] 164 | layers = -1, 8 165 | 166 | [convolutional] 167 | batch_normalize=1 168 | filters=256 169 | size=3 170 | stride=1 171 | pad=1 172 | activation=leaky 173 | 174 | [convolutional] 175 | size=1 176 | stride=1 177 | pad=1 178 | filters=24 179 | activation=linear 180 | 181 | [yolo] 182 | mask = 4,5,6,7 183 | anchors = 6,9, 15,16, 17,35, 37,26, 36,67, 63,42, 57,100, 121,81, 112,169, 241,158, 195,310, 426,359 184 | classes=1 185 | num=12 186 | jitter=.3 187 | ignore_thresh = .7 188 | truth_thresh = 1 189 | random=1 190 | 191 | 192 | 193 | [route] 194 | layers = -3 195 | 196 | [convolutional] 197 | batch_normalize=1 198 | filters=128 199 | size=1 200 | stride=1 201 | pad=1 202 | activation=leaky 203 | 204 | [upsample] 205 | stride=2 206 | 207 | [route] 208 | layers = -1, 6 209 | 210 | [convolutional] 211 | batch_normalize=1 212 | filters=128 213 | size=3 214 | stride=1 215 | pad=1 216 | activation=leaky 217 | 218 | [convolutional] 219 | size=1 220 | stride=1 221 | pad=1 222 | filters=24 223 | activation=linear 224 | 225 | [yolo] 226 | mask = 0,1,2,3 227 | anchors = 6,9, 15,16, 17,35, 37,26, 36,67, 63,42, 57,100, 121,81, 112,169, 241,158, 195,310, 426,359 228 | classes=1 229 | num=12 230 | jitter=.3 231 | ignore_thresh = .7 232 | truth_thresh = 1 233 | random=1 234 | -------------------------------------------------------------------------------- /models/yolo/cfg/yolov4-tiny.cfg: -------------------------------------------------------------------------------- 1 | # Generated by Glenn Jocher (glenn.jocher@ultralytics.com) for https://github.com/ultralytics/yolov3 2 | # def kmean_anchors(path='../coco/train2017.txt', n=12, img_size=(320, 640)): # from utils.utils import *; kmean_anchors() 3 | # Evolving anchors: 100%|██████████| 1000/1000 [41:15<00:00, 2.48s/it] 4 | # 0.20 iou_thr: 0.992 best possible recall, 4.25 anchors > thr 5 | # kmeans anchors (n=12, img_size=(320, 640), IoU=0.005/0.184/0.634-min/mean/best): 6,9, 15,16, 17,35, 37,26, 36,67, 63,42, 57,100, 121,81, 112,169, 241,158, 195,310, 426,359 6 | 7 | [net] 8 | # Testing 9 | # batch=1 10 | # subdivisions=1 11 | # Training 12 | batch=64 13 | subdivisions=16 14 | width=608 15 | height=608 16 | channels=3 17 | momentum=0.9 18 | decay=0.0005 19 | angle=0 20 | saturation = 1.5 21 | exposure = 1.5 22 | hue=.1 23 | 24 | learning_rate=0.001 25 | burn_in=1000 26 | max_batches = 200000 27 | policy=steps 28 | steps=180000,190000 29 | scales=.1,.1 30 | 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=16 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=32 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=64 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=128 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=256 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=2 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=512 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | [maxpool] 101 | size=2 102 | stride=1 103 | 104 | [convolutional] 105 | batch_normalize=1 106 | filters=1024 107 | size=3 108 | stride=1 109 | pad=1 110 | activation=leaky 111 | 112 | ########### 113 | 114 | [convolutional] 115 | batch_normalize=1 116 | filters=256 117 | size=1 118 | stride=1 119 | pad=1 120 | activation=leaky 121 | 122 | [convolutional] 123 | batch_normalize=1 124 | filters=512 125 | size=3 126 | stride=1 127 | pad=1 128 | activation=leaky 129 | 130 | [convolutional] 131 | size=1 132 | stride=1 133 | pad=1 134 | filters=340 135 | activation=linear 136 | 137 | 138 | 139 | [yolo] 140 | mask = 8,9,10,11 141 | anchors = 6,9, 15,16, 17,35, 37,26, 36,67, 63,42, 57,100, 121,81, 112,169, 241,158, 195,310, 426,359 142 | classes=80 143 | num=12 144 | jitter=.3 145 | ignore_thresh = .7 146 | truth_thresh = 1 147 | random=1 148 | 149 | [route] 150 | layers = -4 151 | 152 | [convolutional] 153 | batch_normalize=1 154 | filters=128 155 | size=1 156 | stride=1 157 | pad=1 158 | activation=leaky 159 | 160 | [upsample] 161 | stride=2 162 | 163 | [route] 164 | layers = -1, 8 165 | 166 | [convolutional] 167 | batch_normalize=1 168 | filters=256 169 | size=3 170 | stride=1 171 | pad=1 172 | activation=leaky 173 | 174 | [convolutional] 175 | size=1 176 | stride=1 177 | pad=1 178 | filters=340 179 | activation=linear 180 | 181 | [yolo] 182 | mask = 4,5,6,7 183 | anchors = 6,9, 15,16, 17,35, 37,26, 36,67, 63,42, 57,100, 121,81, 112,169, 241,158, 195,310, 426,359 184 | classes=80 185 | num=12 186 | jitter=.3 187 | ignore_thresh = .7 188 | truth_thresh = 1 189 | random=1 190 | 191 | 192 | 193 | [route] 194 | layers = -3 195 | 196 | [convolutional] 197 | batch_normalize=1 198 | filters=128 199 | size=1 200 | stride=1 201 | pad=1 202 | activation=leaky 203 | 204 | [upsample] 205 | stride=2 206 | 207 | [route] 208 | layers = -1, 6 209 | 210 | [convolutional] 211 | batch_normalize=1 212 | filters=128 213 | size=3 214 | stride=1 215 | pad=1 216 | activation=leaky 217 | 218 | [convolutional] 219 | size=1 220 | stride=1 221 | pad=1 222 | filters=340 223 | activation=linear 224 | 225 | [yolo] 226 | mask = 0,1,2,3 227 | anchors = 6,9, 15,16, 17,35, 37,26, 36,67, 63,42, 57,100, 121,81, 112,169, 241,158, 195,310, 426,359 228 | classes=80 229 | num=12 230 | jitter=.3 231 | ignore_thresh = .7 232 | truth_thresh = 1 233 | random=1 234 | -------------------------------------------------------------------------------- /models/yolo/parse_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | 5 | 6 | def parse_model_cfg(path): 7 | # Parse the yolo *.cfg file and return module definitions path may be 'cfg/yolov3.cfg', 'yolov3.cfg', or 'yolov3' 8 | if not path.endswith('.cfg'): # add .cfg suffix if omitted 9 | path += '.cfg' 10 | if not os.path.exists(path) and os.path.exists('cfg' + os.sep + path): # add cfg/ prefix if omitted 11 | path = 'cfg' + os.sep + path 12 | 13 | with open(path, 'r') as f: 14 | lines = f.read().split('\n') 15 | lines = [x for x in lines if x and not x.startswith('#')] 16 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 17 | mdefs = [] # module definitions 18 | for line in lines: 19 | if line.startswith('['): # This marks the start of a new block 20 | mdefs.append({}) 21 | mdefs[-1]['type'] = line[1:-1].rstrip() 22 | if mdefs[-1]['type'] == 'convolutional': 23 | mdefs[-1]['batch_normalize'] = 0 # pre-populate with zeros (may be overwritten later) 24 | else: 25 | key, val = line.split("=") 26 | key = key.rstrip() 27 | 28 | if key == 'anchors': # return nparray 29 | mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors 30 | elif key in ['from', 'layers', 'mask']: # return array 31 | mdefs[-1][key] = [int(x) for x in val.split(',')] 32 | else: 33 | val = val.strip() 34 | if val.isnumeric(): # return int or float 35 | mdefs[-1][key] = int(val) if (int(val) - float(val)) == 0 else float(val) 36 | else: 37 | mdefs[-1][key] = val # return string 38 | 39 | # Check all fields are supported 40 | supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups', 41 | 'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random', 42 | 'stride_x', 'stride_y', 'weights_type', 'weights_normalization', 'scale_x_y', 'beta_nms', 'nms_kind', 43 | 'iou_loss', 'iou_normalizer', 'cls_normalizer', 'iou_thresh'] 44 | 45 | f = [] # fields 46 | for x in mdefs[1:]: 47 | [f.append(k) for k in x if k not in f] 48 | u = [x for x in f if x not in supported] # unsupported fields 49 | assert not any(u), "Unsupported fields %s in %s. See https://github.com/ultralytics/yolov3/issues/631" % (u, path) 50 | 51 | return mdefs 52 | 53 | 54 | def parse_data_cfg(path): 55 | # Parses the data configuration file 56 | if not os.path.exists(path) and os.path.exists('data' + os.sep + path): # add data/ prefix if omitted 57 | path = 'data' + os.sep + path 58 | 59 | with open(path, 'r') as f: 60 | lines = f.readlines() 61 | 62 | options = dict() 63 | for line in lines: 64 | line = line.strip() 65 | if line == '' or line.startswith('#'): 66 | continue 67 | key, val = line.split('=') 68 | options[key.strip()] = val.strip() 69 | 70 | return options 71 | -------------------------------------------------------------------------------- /models/yolo/torch_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from copy import deepcopy 4 | 5 | import torch 6 | import torch.backends.cudnn as cudnn 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | def select_device(device='', apex=False, batch_size=None): 12 | # device = 'cpu' or '0' or '0,1,2,3' 13 | cpu_request = device.lower() == 'cpu' 14 | if device and not cpu_request: # if device requested other than 'cpu' 15 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 16 | assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity 17 | 18 | cuda = False if cpu_request else torch.cuda.is_available() 19 | if cuda: 20 | c = 1024 ** 2 # bytes to MB 21 | ng = torch.cuda.device_count() 22 | if ng > 1 and batch_size: # check that batch_size is compatible with device_count 23 | assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng) 24 | x = [torch.cuda.get_device_properties(i) for i in range(ng)] 25 | s = 'Using CUDA ' + ('Apex ' if apex else '') # apex for mixed precision https://github.com/NVIDIA/apex 26 | for i in range(0, ng): 27 | if i == 1: 28 | s = ' ' * len(s) 29 | print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" % 30 | (s, i, x[i].name, x[i].total_memory / c)) 31 | else: 32 | print('Using CPU') 33 | 34 | print('') # skip a line 35 | return torch.device('cuda:0' if cuda else 'cpu') 36 | 37 | 38 | def time_synchronized(): 39 | torch.cuda.synchronize() if torch.cuda.is_available() else None 40 | return time.time() 41 | 42 | 43 | def fuse_conv_and_bn(conv, bn): 44 | # https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 45 | with torch.no_grad(): 46 | # init 47 | fusedconv = torch.nn.Conv2d(conv.in_channels, 48 | conv.out_channels, 49 | kernel_size=conv.kernel_size, 50 | stride=conv.stride, 51 | padding=conv.padding, 52 | bias=True) 53 | 54 | # prepare filters 55 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 56 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 57 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 58 | 59 | # prepare spatial bias 60 | if conv.bias is not None: 61 | b_conv = conv.bias 62 | else: 63 | b_conv = torch.zeros(conv.weight.size(0)) 64 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 65 | fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) 66 | 67 | return fusedconv 68 | 69 | 70 | def model_info(model, verbose=False): 71 | # Plots a line-by-line description of a PyTorch model 72 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 73 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 74 | if verbose: 75 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 76 | for i, (name, p) in enumerate(model.named_parameters()): 77 | name = name.replace('module_list.', '') 78 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 79 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 80 | print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g)) 81 | 82 | 83 | def load_classifier(name='resnet101', n=2): 84 | # Loads a pretrained model reshaped to n-class output 85 | import pretrainedmodels # https://github.com/Cadene/pretrained-models.pytorch#torchvision 86 | model = pretrainedmodels.__dict__[name](num_classes=1000, pretrained='imagenet') 87 | 88 | # Display model properties 89 | for x in ['model.input_size', 'model.input_space', 'model.input_range', 'model.mean', 'model.std']: 90 | print(x + ' =', eval(x)) 91 | 92 | # Reshape output to n classes 93 | filters = model.last_linear.weight.shape[1] 94 | model.last_linear.bias = torch.nn.Parameter(torch.zeros(n)) 95 | model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters)) 96 | model.last_linear.out_features = n 97 | return model 98 | 99 | 100 | def scale_img(img, r=1.0): # img(16,3,256,416), r=ratio 101 | # scales a batch of pytorch images while retaining same input shape (cropped or grey-padded) 102 | h, w = img.shape[2:] 103 | s = (int(h * r), int(w * r)) # new size 104 | p = h - s[0], w - s[1] # pad/crop pixels 105 | img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize 106 | return F.pad(img, [0, p[1], 0, p[0]], value=0.5) if r < 1.0 else img[:, :, :p[0], :p[1]] # pad/crop 107 | # cv2.imwrite('scaled.jpg', np.array(img[0].permute((1, 2, 0)) * 255.0)) 108 | 109 | 110 | class ModelEMA: 111 | """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models 112 | Keep a moving average of everything in the model state_dict (parameters and buffers). 113 | This is intended to allow functionality like 114 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage 115 | A smoothed version of the weights is necessary for some training schemes to perform well. 116 | E.g. Google's hyper-params for training MNASNet, MobileNet-V3, EfficientNet, etc that use 117 | RMSprop with a short 2.4-3 epoch decay period and slow LR decay rate of .96-.99 requires EMA 118 | smoothing of weights to match results. Pay attention to the decay constant you are using 119 | relative to your update count per epoch. 120 | To keep EMA from using GPU resources, set device='cpu'. This will save a bit of memory but 121 | disable validation of the EMA weights. Validation will have to be done manually in a separate 122 | process, or after the training stops converging. 123 | This class is sensitive where it is initialized in the sequence of model init, 124 | GPU assignment and distributed training wrappers. 125 | I've tested with the sequence in my own train.py for torch.DataParallel, apex.DDP, and single-GPU. 126 | """ 127 | 128 | def __init__(self, model, decay=0.9998, device=''): 129 | # make a copy of the model for accumulating moving average of weights 130 | self.ema = deepcopy(model) 131 | self.ema.eval() 132 | self.decay = decay 133 | self.device = device # perform ema on different device from model if set 134 | if device: 135 | self.ema.to(device=device) 136 | for p in self.ema.parameters(): 137 | p.requires_grad_(False) 138 | 139 | def update(self, model): 140 | d = self.decay 141 | with torch.no_grad(): 142 | if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel): 143 | msd, esd = model.module.state_dict(), self.ema.module.state_dict() 144 | else: 145 | msd, esd = model.state_dict(), self.ema.state_dict() 146 | # self.ema.load_state_dict({k: esd[k] * d + (1 - d) * v.detach() for k, v in model.items() if v.dtype.is_floating_point}) 147 | for k, v in esd.items(): 148 | if v.dtype.is_floating_point: 149 | v *= d 150 | v += (1. - d) * msd[k].detach() 151 | 152 | def update_attr(self, model): 153 | # Assign attributes (which may change during training) 154 | for k in model.__dict__.keys(): 155 | if not k.startswith('_'): 156 | setattr(model, k, getattr(model, k)) 157 | -------------------------------------------------------------------------------- /scripts/LINE_looped_runner_yolo.sh: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | # 9 | # Official PyTorch implementation of WACV2021 paper: 10 | # Data-Free Knowledge Distillation for Object Detection 11 | # A Chawla, H Yin, P Molchanov, J Alvarez 12 | # -------------------------------------------------------- 13 | 14 | 15 | ############################################################################## 16 | # This script runs the main_yolo.py file multiple times with different inits 17 | # to generate multiple different batches of inverted images. 18 | # e.g. $ ./LINE_looped_runner_yolo.sh 1 512 CUDA_GPU_ID 19 | # How: this will split lines [1-511] in provided manifest file into 4 subsets of 128 20 | # lines each and will generate images with labels initialized from each of those 21 | # subsets. 22 | # To generate large number of images, run this script on multiple GPUs as follows: 23 | # $ ./LINE_looped_runner_yolo.sh 1 512 0 24 | # $ ./LINE_looped_runner_yolo.sh 512 1024 1 25 | # $ ./LINE_looped_runner_yolo.sh 1024 1536 2 26 | # $ .... 27 | 28 | # To generate our datasets we ran this script on 28 gpus to generate a dataset in 48 29 | # hours. 30 | 31 | # How to use this script: 32 | # 1. use LINE_looped_runner_yolo.sh to generate 938 batches of 128 images 33 | # of 160x160 resolution each. 34 | # 2. Then coalesce images from 938 batches into a single dataset of 120064 images 35 | # 3. Upsample 120064 images from 160x160 to 320x320 using imagemagick or any other tool 36 | # 3. Then use this newly generated dataset as initialization for this script 37 | # with resolution=320 and batchsize=96 to fine-tune 320x320 images using DIODE. 38 | ############################################################################## 39 | 40 | STARTLINE=$1 41 | ENDLINE=$2 42 | 43 | export CUDA_VISIBLE_DEVICES=$3 44 | echo "Running on GPU: $CUDA_VISIBLE_DEVICES from [ $STARTLINE , $ENDLINE )" 45 | 46 | CURLINE=$STARTLINE 47 | CURENDLINE=0 48 | 49 | resolution=160 50 | batchsize=128 51 | 52 | ############################################################################## 53 | # Uncomment below to use res=320 with bs=96 54 | ############################################################################## 55 | # resolution=320 56 | # batchsize=96 57 | 58 | 59 | while [ $CURLINE -lt $ENDLINE ] 60 | do 61 | 62 | # CURLINE, CURENDLINE 63 | CURENDLINE=$( expr $CURLINE + $batchsize ) 64 | if [ $CURENDLINE -gt $ENDLINE ] 65 | then 66 | CURENDLINE=$ENDLINE 67 | batchsize=$( expr $CURENDLINE - $CURLINE ) 68 | fi 69 | echo "lines: [$CURLINE - $CURENDLINE ) | batchsize: $batchsize | resolution: $resolution" 70 | 71 | # extract subset trainvalno5k lines [$CURLINE - $CURENDLINE) 72 | # randstring=$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 32 | head -n 1) 73 | SUBSETFILE="subset_${CURLINE}_${CURENDLINE}_bs${batchsize}_res${resolution}.txt" 74 | OUTDIR="subset_${CURLINE}_${CURENDLINE}_bs${batchsize}_res${resolution}" 75 | 76 | ####################################################################### 77 | # Modify path to dataset below 78 | # e.g /tmp/onebox/manifest.txt to get DIODE initialization labels from one-box dataset 79 | # e.g /tmp/coco/trainvalno5k.txt to get DIODE initialization labels from coco dataset 80 | ####################################################################### 81 | cat /tmp/onebox/manifest.txt | head -n $( expr $CURENDLINE - 1 ) | tail -n $batchsize > /tmp/$SUBSETFILE 82 | 83 | # Check that number of lines in file == batchsize 84 | nlines=$( cat /tmp/$SUBSETFILE | wc -l ) 85 | if [ $nlines -ne $batchsize ] 86 | then 87 | echo "Note: bs:${batchsize} doesn't match nlines:$nlines" 88 | fi 89 | 90 | echo $(date) 91 | 92 | # Resolution = 160 93 | python -u main_yolo.py --resolution=${resolution} --bs=${batchsize} \ 94 | --jitter=20 --do_flip --rand_brightness --rand_contrast --random_erase \ 95 | --path="/tmp/${OUTDIR}" \ 96 | --train_txt_path="/tmp/$SUBSETFILE" \ 97 | --iterations=2500 \ 98 | --r_feature=0.1 --p_norm=2 --alpha-mean=1.0 --alpha-var=1.0 --num_layers=-1 \ 99 | --first_bn_coef=2.0 \ 100 | --main_loss_multiplier=1.0 \ 101 | --alpha_img_stats=0.0 \ 102 | --tv_l1=75.0 \ 103 | --tv_l2=0.0 \ 104 | --lr=0.2 --min_lr=0.0 --beta1=0.0 --beta2=0.0 \ 105 | --wd=0.0 \ 106 | --save_every=1000 \ 107 | --seeds="0,0,23460" \ 108 | --display_every=100 --init_scale=1.0 --init_bias=0.0 --nms_conf_thres=0.05 --alpha-ssim=0.00 --save-coco > /dev/null 109 | 110 | # # Resolution = 320 111 | # python main_yolo.py --resolution=${resolution} --bs=${batchsize} \ 112 | # --jitter=40 --do_flip --rand_brightness --rand_contrast --random_erase \ 113 | # --path="/tmp/${OUTDIR}" \ 114 | # --train_txt_path="/tmp/$SUBSETFILE" \ 115 | # --iterations=1500 \ 116 | # --r_feature=0.1 --p_norm=2 --alpha-mean=1.0 --alpha-var=1.0 --num_layers=51 \ 117 | # --first_bn_coef=0.0 \ 118 | # --main_loss_multiplier=1.0 \ 119 | # --alpha_img_stats=0.0 \ 120 | # --tv_l1=75.0 \ 121 | # --tv_l2=0.0 \ 122 | # --lr=0.002 --min_lr=0.0005 \ 123 | # --wd=0.0 \ 124 | # --save_every=1000 \ 125 | # --seeds="0,0,23460" \ 126 | # --display_every=100 --init_scale=1.0 --init_bias=0.0 --nms_conf_thres=0.1 --alpha-ssim=0.0 --save-coco --real_mixin_alpha=1.0 \ 127 | # --box-sampler-warmup=4000 --box-sampler-conf=0.2 --box-sampler-overlap-iou=0.35 --box-sampler-minarea=0.01 --box-sampler-maxarea=0.85 --box-sampler-earlyexit=4000 > /dev/null 128 | 129 | # Clean up large unusable files 130 | rm /tmp/$OUTDIR/chkpt.pt 131 | rm /tmp/$OUTDIR/iteration_targets* 132 | rm /tmp/$OUTDIR/tracker.data 133 | mv /tmp/$SUBSETFILE /tmp/$OUTDIR 134 | cat /tmp/$OUTDIR/losses.log | grep "Initialization" 135 | cat /tmp/$OUTDIR/losses.log | grep "Verifier RealImage" 136 | cat /tmp/$OUTDIR/losses.log | grep "Verifier GeneratedImage" | tail -n1 137 | 138 | # tar this folder 139 | tar czf /tmp/${OUTDIR}.tgz -C /tmp $OUTDIR 140 | rm -r /tmp/$OUTDIR 141 | mv /tmp/${OUTDIR}.tgz /result/ 142 | 143 | # loop increment 144 | CURLINE=$CURENDLINE 145 | done 146 | 147 | echo "Finished" 148 | -------------------------------------------------------------------------------- /scripts/auto_gpu.sh: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | # 9 | # Official PyTorch implementation of WACV2021 paper: 10 | # Data-Free Knowledge Distillation for Object Detection 11 | # A Chawla, H Yin, P Molchanov, J Alvarez 12 | # -------------------------------------------------------- 13 | 14 | 15 | if [ "${CUDA_VISIBLE_DEVICES}" = "auto" ] 16 | then 17 | # number of gpus 18 | NUMGPUS=`nvidia-smi -q -d MEMORY | grep "Attached GPU" | grep -P -o "\d"` 19 | echo "NUMGPUS: $NUMGPUS" 20 | 21 | # extract free-memory for each gpu 22 | MEMLIST="ID FREEMEM" 23 | for (( DEVICE=0; DEVICE<${NUMGPUS}; DEVICE++ )) 24 | do 25 | echo "RUNNING for GPU: ${DEVICE}" 26 | FREEMEM=`nvidia-smi -q -d MEMORY -i ${DEVICE} | grep "Free" | head -n1 | grep -E -o "[0-9]+"` 27 | MEMLIST="${MEMLIST}\n${DEVICE} ${FREEMEM}" 28 | done 29 | echo "####################" 30 | echo -e $MEMLIST 31 | echo "####################" 32 | 33 | # MEMLIST --> remove first line --> sort on gpumem --> pick first line --> pick first GPU device-id 34 | export CUDA_VISIBLE_DEVICES=`echo -e ${MEMLIST} | tail -n +2 | sort -n -r -k2 | head -n1 | grep -E -o "^[0-9]"` 35 | 36 | fi 37 | echo "CUDA_VISIBLE_DEVICES set to: ${CUDA_VISIBLE_DEVICES}" 38 | -------------------------------------------------------------------------------- /scripts/draw_all_boxes_dataset.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | # 9 | # Official PyTorch implementation of WACV2021 paper: 10 | # Data-Free Knowledge Distillation for Object Detection 11 | # A Chawla, H Yin, P Molchanov, J Alvarez 12 | # -------------------------------------------------------- 13 | 14 | 15 | import os, sys 16 | from PIL import Image, ImageDraw 17 | import argparse 18 | import numpy as np 19 | from tqdm import tqdm 20 | import pickle 21 | 22 | def draw(args): 23 | 24 | with open(args.names, "rb") as f: 25 | names = pickle.load(f) 26 | 27 | os.makedirs(args.outdir) 28 | with open(args.manifest, "rt") as f: 29 | images = f.readlines() 30 | images = [img.strip() for img in images] 31 | 32 | labels = [image.replace("images","labels") for image in images] 33 | labels = [lbl.replace(os.path.splitext(lbl)[1], '.txt') for lbl in labels] 34 | 35 | for image,label in tqdm(zip(images, labels)): 36 | 37 | pilimage = Image.open(image).convert(mode='RGB') 38 | boxes = np.loadtxt(label).reshape(-1,5) 39 | draw = ImageDraw.Draw(pilimage) 40 | width, height = pilimage.size 41 | for box in boxes: 42 | cls,x,y,w,h = box 43 | x1 = (x - w/2.0) * width 44 | x2 = (x + w/2.0) * width 45 | y1 = (y - h/2.0) * height 46 | y2 = (y + h/2.0) * height 47 | pilbox = [x1,y1,x2,y2] 48 | pilbox = [int(atom) for atom in pilbox] 49 | try: 50 | draw.rectangle(xy=pilbox, outline=(254,0,0), width=2) 51 | draw.text(xy=pilbox[0:2], text="cls:{} {}".format(int(cls), names[int(cls)])) 52 | except: 53 | import pdb; pdb.set_trace() 54 | 55 | outfile = os.path.join(args.outdir, os.path.basename(image)) 56 | pilimage.save(outfile) 57 | 58 | 59 | 60 | if __name__ == '__main__': 61 | parser = argparse.ArgumentParser(description='draw boxes on images of dataset') 62 | parser.add_argument('--manifest', type=str, required=True, help='txt file containing list of images') 63 | parser.add_argument('--outdir', type=str, required=True, help='dir where labelled images will be stored') 64 | parser.add_argument('--names', type=str, required=True, help='path to names.pkl file') 65 | args = parser.parse_args() 66 | 67 | draw(args) 68 | 69 | 70 | -------------------------------------------------------------------------------- /scripts/runner_yolo_multiscale.sh: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | # 9 | # Official PyTorch implementation of WACV2021 paper: 10 | # Data-Free Knowledge Distillation for Object Detection 11 | # A Chawla, H Yin, P Molchanov, J Alvarez 12 | # -------------------------------------------------------- 13 | 14 | 15 | now=$(date +"day_%m_%d_%Y_time_%H_%M_%S") 16 | echo "CURDATETIME: ${now}" 17 | export CUDA_VISIBLE_DEVICES="auto" 18 | source scripts/auto_gpu.sh 19 | echo "${CUDA_VISIBLE_DEVICES}" 20 | 21 | ############################################################## 22 | # This command generates 128 images of resolution 160x160 23 | ############################################################## 24 | 25 | rootlocation="./diode_results/" 26 | python -u main_yolo.py --resolution=160 --bs=128 \ 27 | --jitter=20 --do_flip --rand_brightness --rand_contrast --random_erase \ 28 | --path="${rootlocation}/${now}_res160" \ 29 | --train_txt_path="/tmp/onebox/manifest.txt" \ 30 | --iterations=2500 \ 31 | --r_feature=0.1 --p_norm=2 --alpha-mean=1.0 --alpha-var=1.0 --num_layers=-1 \ 32 | --first_bn_coef=2.0 \ 33 | --main_loss_multiplier=0.5 \ 34 | --alpha_img_stats=0.0 \ 35 | --tv_l1=75.0 \ 36 | --tv_l2=0.0 \ 37 | --lr=0.2 --min_lr=0.0 --beta1=0.0 --beta2=0.0 \ 38 | --wd=0.0 \ 39 | --save_every=100 --display_every=100 \ 40 | --seeds="0,0,23456" \ 41 | --init_scale=1.0 --init_bias=0.0 --nms_conf_thres=0.05 --alpha-ssim=0.00 --save-coco 42 | 43 | ############################################################## 44 | # to enable fp-sampling add the following args to above: 45 | # --box-sampler --box-sampler-warmup=800 --box-sampler-conf=0.2 \ 46 | # --box-sampler-overlap-iou=0.35 --box-sampler-minarea=0.01 --box-sampler-maxarea=0.85 --box-sampler-earlyexit=2800 47 | ############################################################## 48 | 49 | ############################################################## 50 | # Uncomment the command block below to upsample and fine-tune 51 | # images generated by previous command to resolution 320x320 52 | ############################################################## 53 | 54 | # # create manifest file 55 | # find ${rootlocation}/${now}_res160/coco/images -name '*.png' > ${rootlocation}/${now}_res160/coco/manifest.txt 56 | # # upsample images from 160 --> 320 57 | # # WARNING: imagemagick must be installed before the next line is run 58 | # # to install imagemagick: $sudo apt-get update && sudo apt-get install -y imagemagick 59 | # find ${rootlocation}/${now}_res160/coco/images -name '*.png' | xargs -I {} convert {} -resize 320x320 {} 60 | 61 | # python main_yolo.py --resolution=320 --bs=96 \ 62 | # --jitter=40 --do_flip --rand_brightness --rand_contrast --random_erase \ 63 | # --path="${rootlocation}/${now}_res320" \ 64 | # --train_txt_path="${rootlocation}/${now}_res160/coco/manifest.txt" \ 65 | # --iterations=1500 \ 66 | # --r_feature=0.1 --p_norm=2 --alpha-mean=1.0 --alpha-var=1.0 --num_layers=51 \ 67 | # --first_bn_coef=0.0 \ 68 | # --main_loss_multiplier=1.0 \ 69 | # --alpha_img_stats=0.0 \ 70 | # --tv_l1=75.0 \ 71 | # --tv_l2=0.0 \ 72 | # --lr=0.002 --min_lr=0.0005 \ 73 | # --wd=0.0 \ 74 | # --save_every=100 \ 75 | # --seeds="0,0,23456" \ 76 | # --display_every=100 --init_scale=1.0 --init_bias=0.0 --nms_conf_thres=0.1 --alpha-ssim=0.0 --save-coco --real_mixin_alpha=1.0 -------------------------------------------------------------------------------- /scripts/singlebox_dataset.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | # 9 | # Official PyTorch implementation of WACV2021 paper: 10 | # Data-Free Knowledge Distillation for Object Detection 11 | # A Chawla, H Yin, P Molchanov, J Alvarez 12 | # -------------------------------------------------------- 13 | 14 | """ 15 | Create randomized labels for COCO images 16 | COCO labels are structured as: 17 | [0-79] x y w h 18 | where x,y,w,h are normalized to 0-1 and have 6 places after decimal and 1 place before decimal (0/1) 19 | e.g: 20 | 1 0.128828 0.375258 0.249063 0.733333 21 | 0 0.476187 0.289613 0.028781 0.138099 22 | 23 | To randomize: 24 | First generate width and height dimensions 25 | Then jitter the x/y labels 26 | Then fix using max/min clipping 27 | """ 28 | import numpy as np 29 | import argparse 30 | import os 31 | from PIL import Image 32 | from tqdm import tqdm 33 | 34 | MINDIM=0.2 35 | MAXDIM=0.8 36 | 37 | def populate(args): 38 | 39 | # folder 40 | os.makedirs(os.path.join(args.outdir, "images", "train2014")) 41 | os.makedirs(os.path.join(args.outdir, "labels", "train2014")) 42 | 43 | for imgIdx in tqdm(range(args.numImages)): 44 | 45 | # box: w,h,x,y 46 | width = MINDIM + (MAXDIM-MINDIM) * np.random.rand() 47 | height = MINDIM + (MAXDIM-MINDIM) * np.random.rand() 48 | x = 0.5 + (0.5-width/2.0) * np.random.rand() * np.random.choice([1,-1]) 49 | y = 0.5 + (0.5-height/2.0) * np.random.rand() * np.random.choice([1,-1]) 50 | assert x+width/2.0 <= 1.0, "overflow width, x+width/2.0={}".format(x+width/2.0) 51 | assert y+height/2.0<= 1.0, "overflow height, y+height/2.0={}".format(y+height/2.0) 52 | 53 | # class 54 | cls = np.random.choice(np.arange(args.numClasses)) 55 | 56 | _label_str = "{} {:.6f} {:.6f} {:.6f} {:.6f}\n".format( 57 | int(cls), x, y, width, height 58 | ) 59 | 60 | im = Image.new(mode="RGB", size=(256,256), color=(127,127,127)) 61 | 62 | # save 63 | outfile = "COCO_train2014_{:012d}".format(imgIdx+1) 64 | im.save(os.path.join(args.outdir, "images", "train2014", outfile+".jpg")) 65 | with open(os.path.join(args.outdir, "labels", "train2014", outfile+".txt"), "wt") as f: 66 | f.write(_label_str) 67 | 68 | 69 | if __name__ == '__main__': 70 | parser = argparse.ArgumentParser(description='populate single box per image labels') 71 | parser.add_argument('--numImages', type=int, default=120000, help='number of images to generate') 72 | parser.add_argument('--numClasses', type=int, default=80, help='number of classes') 73 | parser.add_argument('--outdir', type=str, required=True, help='output directory') 74 | args = parser.parse_args() 75 | 76 | populate(args) 77 | 78 | -------------------------------------------------------------------------------- /utils_di.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | # 9 | # Official PyTorch implementation of WACV2021 paper: 10 | # Data-Free Knowledge Distillation for Object Detection 11 | # A Chawla, H Yin, P Molchanov, J Alvarez 12 | # -------------------------------------------------------- 13 | 14 | 15 | import os 16 | import sys 17 | 18 | def create_folder(directory): 19 | if not os.path.exists(directory): 20 | os.makedirs(directory) 21 | --------------------------------------------------------------------------------