├── .gitignore
├── LICENSE
├── README.md
├── clip_object_tracker.py
├── coco.names
├── cv.yml
├── data
    ├── coco.yaml
    ├── coco128.yaml
    ├── hyp.finetune.yaml
    ├── hyp.scratch.yaml
    ├── images
    │   ├── bus.jpg
    │   └── zidane.jpg
    ├── scripts
    │   ├── get_coco.sh
    │   └── get_voc.sh
    ├── video
    │   ├── cars.mp4
    │   ├── fish.mp4
    │   └── test.mp4
    └── voc.yaml
├── deep_sort
    ├── __init__.py
    ├── detection.py
    ├── iou_matching.py
    ├── kalman_filter.py
    ├── linear_assignment.py
    ├── nn_matching.py
    ├── preprocessing.py
    ├── track.py
    └── tracker.py
├── example
    └── video
    │   └── fish.mp4
├── model_data
    └── mars-small128.pb
├── models
    ├── __init__.py
    ├── common.py
    ├── experimental.py
    ├── export.py
    ├── hub
    │   ├── yolov3-spp.yaml
    │   ├── yolov3-tiny.yaml
    │   ├── yolov3.yaml
    │   ├── yolov5-fpn.yaml
    │   └── yolov5-panet.yaml
    ├── yolo.py
    ├── yolov5l.yaml
    ├── yolov5m.yaml
    ├── yolov5s.yaml
    └── yolov5x.yaml
├── requirements.txt
├── tool
    ├── config.py
    ├── darknet2pytorch.py
    ├── region_loss.py
    ├── torch_utils.py
    ├── utils.py
    ├── utils_iou.py
    └── yolo_layer.py
├── tools
    ├── freeze_model.py
    ├── generate_clip_detections.py
    └── generate_detections.py
└── utils
    ├── __init__.py
    ├── activations.py
    ├── autoanchor.py
    ├── datasets.py
    ├── general.py
    ├── google_app_engine
        ├── Dockerfile
        ├── additional_requirements.txt
        └── app.yaml
    ├── google_utils.py
    ├── loss.py
    ├── metrics.py
    ├── models
        ├── __init__.py
        ├── common.py
        ├── experimental.py
        ├── export.py
        ├── hub
        │   ├── yolov3-spp.yaml
        │   ├── yolov3-tiny.yaml
        │   ├── yolov3.yaml
        │   ├── yolov5-fpn.yaml
        │   └── yolov5-panet.yaml
        ├── yolo.py
        ├── yolov5l.yaml
        ├── yolov5m.yaml
        ├── yolov5s.yaml
        └── yolov5x.yaml
    ├── plots.py
    ├── roboflow.py
    ├── torch_utils.py
    ├── yolov4.py
    ├── yolov5.py
    └── yolov7.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Repo-specific GitIgnore ----------------------------------------------------------------------------------------------
  2 | *.cfg
  3 | !cfg/yolov3*.cfg
  4 | 
  5 | storage.googleapis.com
  6 | runs/*
  7 | !data/images/zidane.jpg
  8 | !data/images/bus.jpg
  9 | !data/coco.names
 10 | !data/coco_paper.names
 11 | !data/coco.data
 12 | !data/coco_*.data
 13 | !data/coco_*.txt
 14 | !data/trainvalno5k.shapes
 15 | !data/*.sh
 16 | 
 17 | pycocotools/*
 18 | results*.txt
 19 | gcp_test*.sh
 20 | 
 21 | # Datasets -------------------------------------------------------------------------------------------------------------
 22 | coco/
 23 | coco128/
 24 | VOC/
 25 | 
 26 | # MATLAB GitIgnore -----------------------------------------------------------------------------------------------------
 27 | *.m~
 28 | *.mat
 29 | !targets*.mat
 30 | 
 31 | # Neural Network weights -----------------------------------------------------------------------------------------------
 32 | *.weights
 33 | *.pt
 34 | *.onnx
 35 | *.mlmodel
 36 | *.torchscript
 37 | darknet53.conv.74
 38 | yolov3-tiny.conv.15
 39 | 
 40 | # GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
 41 | # Byte-compiled / optimized / DLL files
 42 | __pycache__/
 43 | *.py[cod]
 44 | *$py.class
 45 | 
 46 | # C extensions
 47 | *.so
 48 | 
 49 | # Distribution / packaging
 50 | .Python
 51 | env/
 52 | build/
 53 | develop-eggs/
 54 | dist/
 55 | downloads/
 56 | eggs/
 57 | .eggs/
 58 | lib/
 59 | lib64/
 60 | parts/
 61 | sdist/
 62 | var/
 63 | wheels/
 64 | *.egg-info/
 65 | wandb/
 66 | .installed.cfg
 67 | *.egg
 68 | 
 69 | 
 70 | # PyInstaller
 71 | #  Usually these files are written by a python script from a template
 72 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 73 | *.manifest
 74 | *.spec
 75 | 
 76 | # Installer logs
 77 | pip-log.txt
 78 | pip-delete-this-directory.txt
 79 | 
 80 | # Unit test / coverage reports
 81 | htmlcov/
 82 | .tox/
 83 | .coverage
 84 | .coverage.*
 85 | .cache
 86 | nosetests.xml
 87 | coverage.xml
 88 | *.cover
 89 | .hypothesis/
 90 | 
 91 | # Translations
 92 | *.mo
 93 | *.pot
 94 | 
 95 | # Django stuff:
 96 | *.log
 97 | local_settings.py
 98 | 
 99 | # Flask stuff:
100 | instance/
101 | .webassets-cache
102 | 
103 | # Scrapy stuff:
104 | .scrapy
105 | 
106 | # Sphinx documentation
107 | docs/_build/
108 | 
109 | # PyBuilder
110 | target/
111 | 
112 | # Jupyter Notebook
113 | .ipynb_checkpoints
114 | 
115 | # pyenv
116 | .python-version
117 | 
118 | # celery beat schedule file
119 | celerybeat-schedule
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # dotenv
125 | .env
126 | 
127 | # virtualenv
128 | .venv*
129 | venv*/
130 | ENV*/
131 | 
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 | 
136 | # Rope project settings
137 | .ropeproject
138 | 
139 | # mkdocs documentation
140 | /site
141 | 
142 | # mypy
143 | .mypy_cache/
144 | 
145 | 
146 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
147 | 
148 | # General
149 | .DS_Store
150 | .AppleDouble
151 | .LSOverride
152 | 
153 | # Icon must end with two \r
154 | Icon
155 | Icon?
156 | 
157 | # Thumbnails
158 | ._*
159 | 
160 | # Files that might appear in the root of a volume
161 | .DocumentRevisions-V100
162 | .fseventsd
163 | .Spotlight-V100
164 | .TemporaryItems
165 | .Trashes
166 | .VolumeIcon.icns
167 | .com.apple.timemachine.donotpresent
168 | 
169 | # Directories potentially created on remote AFP share
170 | .AppleDB
171 | .AppleDesktop
172 | Network Trash Folder
173 | Temporary Items
174 | .apdisk
175 | 
176 | 
177 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
178 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
179 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
180 | 
181 | # User-specific stuff:
182 | .idea/*
183 | .idea/**/workspace.xml
184 | .idea/**/tasks.xml
185 | .idea/dictionaries
186 | .html  # Bokeh Plots
187 | .pg  # TensorFlow Frozen Graphs
188 | .avi # videos
189 | 
190 | # Sensitive or high-churn files:
191 | .idea/**/dataSources/
192 | .idea/**/dataSources.ids
193 | .idea/**/dataSources.local.xml
194 | .idea/**/sqlDataSources.xml
195 | .idea/**/dynamic.xml
196 | .idea/**/uiDesigner.xml
197 | 
198 | # Gradle:
199 | .idea/**/gradle.xml
200 | .idea/**/libraries
201 | 
202 | # CMake
203 | cmake-build-debug/
204 | cmake-build-release/
205 | 
206 | # Mongo Explorer plugin:
207 | .idea/**/mongoSettings.xml
208 | 
209 | ## File-based project format:
210 | *.iws
211 | 
212 | ## Plugin-specific files:
213 | 
214 | # IntelliJ
215 | out/
216 | 
217 | # mpeltonen/sbt-idea plugin
218 | .idea_modules/
219 | 
220 | # JIRA plugin
221 | atlassian-ide-plugin.xml
222 | 
223 | # Cursive Clojure plugin
224 | .idea/replstate.xml
225 | 
226 | # Crashlytics plugin (for Android Studio and IntelliJ)
227 | com_crashlytics_export_strings.xml
228 | crashlytics.properties
229 | crashlytics-build.properties
230 | fabric.properties
231 | 
232 | CLIP-repo/
233 | clip/
234 | 
235 | pytorch-YOLOv4/
236 | yolov4.weights
237 | yolov4.cfg


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Roboflow Object Tracking Example
  2 | 
  3 | Object tracking using Roboflow Inference API and Zero-Shot (CLIP) Deep SORT. Read more in our
  4 | [Zero-Shot Object Tracking announcement post](https://blog.roboflow.com/zero-shot-object-tracking/).
  5 | 
  6 | ![Example fish tracking](https://user-images.githubusercontent.com/870796/130703648-8af62801-d66c-41f5-80ae-889301ae9b44.gif)
  7 | 
  8 | Example object tracking courtesy of the [Roboflow Universe public Aquarium model and dataset](https://universe.roboflow.com/brad-dwyer/aquarium-combined). You can adapt this to your own dataset on Roboflow or any pre-trained model from [Roboflow Universe](https://universe.roboflow.com).
  9 | 
 10 | # Overview
 11 | 
 12 | Object tracking involves following individual objects of interest across frames. It
 13 | combines the output of an [object detection](https://blog.roboflow.com/object-detection) model
 14 | with a secondary algorithm to determine which detections are identifying "the same"
 15 | object over time.
 16 | 
 17 | Previously, this required training a special classification model to differentiate
 18 | the instances of each different class. In this repository, we have used
 19 | [OpenAI's CLIP zero-shot image classifier](https://blog.roboflow.com/clip-model-eli5-beginner-guide/)
 20 | to create a universal object tracking repository. All you need is a trained object
 21 | detection model and CLIP handles the instance identification for the object tracking
 22 | algorithm.
 23 | 
 24 | # Getting Started
 25 | 
 26 | Colab Tutorial Here:
 27 | 
 28 | <a href="https://colab.research.google.com/drive/1aU7Jq668oMlUx6bYVv3vAQbXVLpIllNH"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
 29 | 
 30 | ## Training your model
 31 | 
 32 | To use the Roboflow Inference API as your detection engine:
 33 | 
 34 | Upload, annotate, and train your model on Roboflow with [Roboflow Train](https://docs.roboflow.com/train).
 35 | Your model will be hosted on an inference URL.
 36 | 
 37 | To use YOLOv7 as your detection engine:
 38 | 
 39 | Follow Roboflow's [Train YOLOv7 on Custom Data Tutorial](https://blog.roboflow.com/yolov7-custom-dataset-training-tutorial/)
 40 | 
 41 | The YOLOv7 implementation uses [this colab notebook](https://colab.research.google.com/drive/1X9A8odmK4k6l26NDviiT6dd6TgR-piOa)
 42 | 
 43 | To use YOLOv5 as your detection engine:
 44 | 
 45 | Follow Roboflow's [Train YOLOv5 on Custom Data Tutorial](https://blog.roboflow.com/how-to-train-yolov5-on-a-custom-dataset/)
 46 | 
 47 | The YOLOv5 implementation uses [this colab notebook](https://colab.research.google.com/drive/1gDZ2xcTOgR39tGGs-EZ6i3RTs16wmzZQ)
 48 | 
 49 | The YOLOv5 implementation is currently compatible with this commit hash of YOLOv5 `886f1c03d839575afecb059accf74296fad395b6`
 50 | 
 51 | ## Performing Object Tracking
 52 | 
 53 | ### Clone repositories
 54 | 
 55 | ```
 56 | git clone https://github.com/roboflow-ai/zero-shot-object-tracking
 57 | cd zero-shot-object-tracking
 58 | git clone https://github.com/openai/CLIP.git CLIP-repo
 59 | cp -r ./CLIP-repo/clip ./clip             // Unix based
 60 | robocopy CLIP-repo/clip clip\             // Windows
 61 | ```
 62 | 
 63 | ### Install requirements (python 3.7+)
 64 | 
 65 | ```bash
 66 | pip install --upgrade pip
 67 | pip install -r requirements.txt
 68 | ```
 69 | 
 70 | ### Install requirements (anaconda python 3.8)
 71 | ```
 72 | conda install pytorch torchvision torchaudio -c pytorch
 73 | conda install ftfy regex tqdm requests pandas seaborn
 74 | pip install opencv pycocotools tensorflow
 75 | ```
 76 | 
 77 | ### Run with Roboflow
 78 | 
 79 | ```bash
 80 | 
 81 | python clip_object_tracker.py --source data/video/fish.mp4 --url https://detect.roboflow.com/playing-cards-ow27d/1 --api_key ROBOFLOW_API_KEY --info
 82 | ```
 83 | 
 84 | **NOTE you must provide a valid API key from [Roboflow](docs.roboflow.com)
 85 | 
 86 | ### Run with YOLOv7
 87 | ```bash
 88 | 
 89 | python clip_object_tracker.py --weights models/yolov7.pt --source data/video/fish.mp4 --detection-engine yolov7 --info
 90 | ```
 91 | 
 92 | ### Run with YOLOv5
 93 | ```bash
 94 | 
 95 | python clip_object_tracker.py --weights models/yolov5s.pt --source data/video/fish.mp4 --detection-engine yolov5 --info
 96 | ```
 97 | 
 98 | ### Run with YOLOv4
 99 | To use YOLOv4 for object detection you will need pretrained weights (.weights file), a model config for your weights (.cfg), and a class names file (.names). Test weights can be found here https://github.com/AlexeyAB/darknet. [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) [yolov4.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg)
100 | ```
101 | python clip_object_tracker.py --weights yolov4.weights --cfg yolov4.cfg --names coco.names --source data/video/cars.mp4 --detection-engine yolov4 --info
102 | ```
103 | (by default, output will be in runs/detect/exp[num])
104 | 
105 | <figure class="video_container">
106 |   <video controls="true" allowfullscreen="true" poster="path/to/poster_image.png">
107 |     <source src="data/demo/cards.mp4" type="video/mp4">
108 |   </video>
109 | </figure>
110 | 
111 | Help
112 | 
113 | ```bash
114 | python clip_object_tracker.py -h
115 | ```
116 | ```
117 | --weights WEIGHTS [WEIGHTS ...]  model.pt path(s)
118 | --source SOURCE                  source (video/image)
119 | --img-size IMG_SIZE              inference size (pixels)
120 | --confidence CONFIDENCE          object confidence threshold                      
121 | --overlap OVERLAP                IOU threshold for NMS
122 | --thickness THICKNESS            Thickness of the bounding box strokes
123 | --device DEVICE                  cuda device, i.e. 0 or 0,1,2,3 or cpu
124 | --view-img                       display results
125 | --save-txt                       save results to *.txt
126 | --save-conf                      save confidences in --save-txt labels
127 | --classes CLASSES [CLASSES ...]  filter by class: --class 0, or --class 0 2 3
128 | --agnostic-nms                   class-agnostic NMS
129 | --augment                        augmented inference
130 | --update                         update all models
131 | --project PROJECT                save results to project/name
132 | --name NAME                      save results to project/name
133 | --exist-ok                       existing project/name ok, do not increment
134 | --nms_max_overlap                Non-maxima suppression threshold: Maximum detection overlap.
135 | --max_cosine_distance            Gating threshold for cosine distance metric (object appearance).
136 | --nn_budget NN_BUDGET            Maximum size of the appearance descriptors allery. If None, no budget is enforced.
137 | --api_key API_KEY                Roboflow API Key.
138 | --url URL                        Roboflow Model URL.
139 | --info                           Print debugging info.
140 | --detection-engine               Which engine you want to use for object detection (yolov7, yolov5, yolov4, roboflow).
141 | ```
142 | ## Acknowledgements
143 | 
144 | Huge thanks to:
145 | 
146 | - [yolov4-deepsort by theAIGuysCode](https://github.com/theAIGuysCode/yolov4-deepsort)
147 | - [yolov5 by ultralytics](https://github.com/ultralytics/yolov5)
148 | - [yolov7 by WongKinYiu](https://github.com/WongKinYiu/yolov7)
149 | - [Deep SORT Repository by nwojke](https://github.com/nwojke/deep_sort)
150 | - [OpenAI for being awesome](https://openai.com/blog/clip/)
151 | 


--------------------------------------------------------------------------------
/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/cv.yml:
--------------------------------------------------------------------------------
 1 | name: cv
 2 | 
 3 | channels:
 4 |   - conda-forge
 5 | 
 6 | dependencies:
 7 |   - python==3.8.6
 8 |   - pip
 9 |   - cython
10 |   - matplotlib>=3.2.2
11 |   - numpy>=1.18.5
12 |   - PyYAML>=5.3
13 |   - scipy>=1.4.1
14 |   - tensorboard>=2.2
15 |   - torchvision>=0.8.1
16 |   - tqdm>=4.41.0
17 |   - requests==2.26.0
18 |   - pandas==1.3.2
19 |   - seaborn>=0.11.0
20 |   - ftfy==6.0.3
21 |   - pillow
22 |   - opencv
23 |   - regex
24 |   - pip:
25 |     - lxml
26 |     - torch>=1.7.0


--------------------------------------------------------------------------------
/data/coco.yaml:
--------------------------------------------------------------------------------
 1 | # COCO 2017 dataset http://cocodataset.org
 2 | # Train command: python train.py --data coco.yaml
 3 | # Default dataset location is next to /yolov5:
 4 | #   /parent_folder
 5 | #     /coco
 6 | #     /yolov5
 7 | 
 8 | 
 9 | # download command/URL (optional)
10 | download: bash data/scripts/get_coco.sh
11 | 
12 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
13 | train: ../coco/train2017.txt  # 118287 images
14 | val: ../coco/val2017.txt  # 5000 images
15 | test: ../coco/test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
16 | 
17 | # number of classes
18 | nc: 80
19 | 
20 | # class names
21 | names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
22 |         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
23 |         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
24 |         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
25 |         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
26 |         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
27 |         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
28 |         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
29 |         'hair drier', 'toothbrush']
30 | 
31 | # Print classes
32 | # with open('data/coco.yaml') as f:
33 | #   d = yaml.load(f, Loader=yaml.FullLoader)  # dict
34 | #   for i, x in enumerate(d['names']):
35 | #     print(i, x)
36 | 


--------------------------------------------------------------------------------
/data/coco128.yaml:
--------------------------------------------------------------------------------
 1 | # COCO 2017 dataset http://cocodataset.org - first 128 training images
 2 | # Train command: python train.py --data coco128.yaml
 3 | # Default dataset location is next to /yolov5:
 4 | #   /parent_folder
 5 | #     /coco128
 6 | #     /yolov5
 7 | 
 8 | 
 9 | # download command/URL (optional)
10 | download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
11 | 
12 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
13 | train: ../coco128/images/train2017/  # 128 images
14 | val: ../coco128/images/train2017/  # 128 images
15 | 
16 | # number of classes
17 | nc: 80
18 | 
19 | # class names
20 | names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
21 |         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
22 |         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
23 |         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
24 |         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
25 |         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
26 |         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
27 |         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
28 |         'hair drier', 'toothbrush']
29 | 


--------------------------------------------------------------------------------
/data/hyp.finetune.yaml:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for VOC finetuning
 2 | # python train.py --batch 64 --weights yolov5m.pt --data voc.yaml --img 512 --epochs 50
 3 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
 4 | 
 5 | 
 6 | # Hyperparameter Evolution Results
 7 | # Generations: 306
 8 | #                   P         R     mAP.5 mAP.5:.95       box       obj       cls
 9 | # Metrics:        0.6     0.936     0.896     0.684    0.0115   0.00805   0.00146
10 | 
11 | lr0: 0.0032
12 | lrf: 0.12
13 | momentum: 0.843
14 | weight_decay: 0.00036
15 | warmup_epochs: 2.0
16 | warmup_momentum: 0.5
17 | warmup_bias_lr: 0.05
18 | box: 0.0296
19 | cls: 0.243
20 | cls_pw: 0.631
21 | obj: 0.301
22 | obj_pw: 0.911
23 | iou_t: 0.2
24 | anchor_t: 2.91
25 | # anchors: 3.63
26 | fl_gamma: 0.0
27 | hsv_h: 0.0138
28 | hsv_s: 0.664
29 | hsv_v: 0.464
30 | degrees: 0.373
31 | translate: 0.245
32 | scale: 0.898
33 | shear: 0.602
34 | perspective: 0.0
35 | flipud: 0.00856
36 | fliplr: 0.5
37 | mosaic: 1.0
38 | mixup: 0.243
39 | 


--------------------------------------------------------------------------------
/data/hyp.scratch.yaml:
--------------------------------------------------------------------------------
 1 | # Hyperparameters for COCO training from scratch
 2 | # python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300
 3 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
 4 | 
 5 | 
 6 | lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
 7 | lrf: 0.2  # final OneCycleLR learning rate (lr0 * lrf)
 8 | momentum: 0.937  # SGD momentum/Adam beta1
 9 | weight_decay: 0.0005  # optimizer weight decay 5e-4
10 | warmup_epochs: 3.0  # warmup epochs (fractions ok)
11 | warmup_momentum: 0.8  # warmup initial momentum
12 | warmup_bias_lr: 0.1  # warmup initial bias lr
13 | box: 0.05  # box loss gain
14 | cls: 0.5  # cls loss gain
15 | cls_pw: 1.0  # cls BCELoss positive_weight
16 | obj: 1.0  # obj loss gain (scale with pixels)
17 | obj_pw: 1.0  # obj BCELoss positive_weight
18 | iou_t: 0.20  # IoU training threshold
19 | anchor_t: 4.0  # anchor-multiple threshold
20 | # anchors: 3  # anchors per output layer (0 to ignore)
21 | fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
22 | hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
23 | hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
24 | hsv_v: 0.4  # image HSV-Value augmentation (fraction)
25 | degrees: 0.0  # image rotation (+/- deg)
26 | translate: 0.1  # image translation (+/- fraction)
27 | scale: 0.5  # image scale (+/- gain)
28 | shear: 0.0  # image shear (+/- deg)
29 | perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
30 | flipud: 0.0  # image flip up-down (probability)
31 | fliplr: 0.5  # image flip left-right (probability)
32 | mosaic: 1.0  # image mosaic (probability)
33 | mixup: 0.0  # image mixup (probability)
34 | 


--------------------------------------------------------------------------------
/data/images/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/data/images/bus.jpg


--------------------------------------------------------------------------------
/data/images/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/data/images/zidane.jpg


--------------------------------------------------------------------------------
/data/scripts/get_coco.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # COCO 2017 dataset http://cocodataset.org
 3 | # Download command: bash data/scripts/get_coco.sh
 4 | # Train command: python train.py --data coco.yaml
 5 | # Default dataset location is next to /yolov5:
 6 | #   /parent_folder
 7 | #     /coco
 8 | #     /yolov5
 9 | 
10 | # Download/unzip labels
11 | d='../' # unzip directory
12 | url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
13 | f='coco2017labels.zip'                                                                 # 68 MB
14 | echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
15 | 
16 | # Download/unzip images
17 | d='../coco/images' # unzip directory
18 | url=http://images.cocodataset.org/zips/
19 | f1='train2017.zip' # 19G, 118k images
20 | f2='val2017.zip'   # 1G, 5k images
21 | f3='test2017.zip'  # 7G, 41k images (optional)
22 | for f in $f1 $f2; do
23 |   echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
24 | done
25 | 


--------------------------------------------------------------------------------
/data/scripts/get_voc.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
  3 | # Download command: bash data/scripts/get_voc.sh
  4 | # Train command: python train.py --data voc.yaml
  5 | # Default dataset location is next to /yolov5:
  6 | #   /parent_folder
  7 | #     /VOC
  8 | #     /yolov5
  9 | 
 10 | start=$(date +%s)
 11 | mkdir -p ../tmp
 12 | cd ../tmp/
 13 | 
 14 | # Download/unzip images and labels
 15 | d='.' # unzip directory
 16 | url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
 17 | f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images
 18 | f2=VOCtest_06-Nov-2007.zip     # 438MB, 4953 images
 19 | f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images
 20 | for f in $f1 $f2 $f3; do
 21 |   echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
 22 | done
 23 | 
 24 | end=$(date +%s)
 25 | runtime=$((end - start))
 26 | echo "Completed in" $runtime "seconds"
 27 | 
 28 | echo "Splitting dataset..."
 29 | python3 - "$@" <<END
 30 | import xml.etree.ElementTree as ET
 31 | import pickle
 32 | import os
 33 | from os import listdir, getcwd
 34 | from os.path import join
 35 | 
 36 | sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
 37 | 
 38 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
 39 | 
 40 | 
 41 | def convert(size, box):
 42 |     dw = 1./(size[0])
 43 |     dh = 1./(size[1])
 44 |     x = (box[0] + box[1])/2.0 - 1
 45 |     y = (box[2] + box[3])/2.0 - 1
 46 |     w = box[1] - box[0]
 47 |     h = box[3] - box[2]
 48 |     x = x*dw
 49 |     w = w*dw
 50 |     y = y*dh
 51 |     h = h*dh
 52 |     return (x,y,w,h)
 53 | 
 54 | def convert_annotation(year, image_id):
 55 |     in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
 56 |     out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
 57 |     tree=ET.parse(in_file)
 58 |     root = tree.getroot()
 59 |     size = root.find('size')
 60 |     w = int(size.find('width').text)
 61 |     h = int(size.find('height').text)
 62 | 
 63 |     for obj in root.iter('object'):
 64 |         difficult = obj.find('difficult').text
 65 |         cls = obj.find('name').text
 66 |         if cls not in classes or int(difficult)==1:
 67 |             continue
 68 |         cls_id = classes.index(cls)
 69 |         xmlbox = obj.find('bndbox')
 70 |         b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
 71 |         bb = convert((w,h), b)
 72 |         out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
 73 | 
 74 | wd = getcwd()
 75 | 
 76 | for year, image_set in sets:
 77 |     if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
 78 |         os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
 79 |     image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
 80 |     list_file = open('%s_%s.txt'%(year, image_set), 'w')
 81 |     for image_id in image_ids:
 82 |         list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
 83 |         convert_annotation(year, image_id)
 84 |     list_file.close()
 85 | 
 86 | END
 87 | 
 88 | cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt >train.txt
 89 | cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt
 90 | 
 91 | python3 - "$@" <<END
 92 | 
 93 | import shutil
 94 | import os
 95 | os.system('mkdir ../VOC/')
 96 | os.system('mkdir ../VOC/images')
 97 | os.system('mkdir ../VOC/images/train')
 98 | os.system('mkdir ../VOC/images/val')
 99 | 
100 | os.system('mkdir ../VOC/labels')
101 | os.system('mkdir ../VOC/labels/train')
102 | os.system('mkdir ../VOC/labels/val')
103 | 
104 | import os
105 | print(os.path.exists('../tmp/train.txt'))
106 | f = open('../tmp/train.txt', 'r')
107 | lines = f.readlines()
108 | 
109 | for line in lines:
110 |     line = "/".join(line.split('/')[-5:]).strip()
111 |     if (os.path.exists("../" + line)):
112 |         os.system("cp ../"+ line + " ../VOC/images/train")
113 |         
114 |     line = line.replace('JPEGImages', 'labels')
115 |     line = line.replace('jpg', 'txt')
116 |     if (os.path.exists("../" + line)):
117 |         os.system("cp ../"+ line + " ../VOC/labels/train")
118 | 
119 | 
120 | print(os.path.exists('../tmp/2007_test.txt'))
121 | f = open('../tmp/2007_test.txt', 'r')
122 | lines = f.readlines()
123 | 
124 | for line in lines:
125 |     line = "/".join(line.split('/')[-5:]).strip()
126 |     if (os.path.exists("../" + line)):
127 |         os.system("cp ../"+ line + " ../VOC/images/val")
128 |         
129 |     line = line.replace('JPEGImages', 'labels')
130 |     line = line.replace('jpg', 'txt')
131 |     if (os.path.exists("../" + line)):
132 |         os.system("cp ../"+ line + " ../VOC/labels/val")
133 | 
134 | END
135 | 
136 | rm -rf ../tmp # remove temporary directory
137 | echo "VOC download done."
138 | 


--------------------------------------------------------------------------------
/data/video/cars.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/data/video/cars.mp4


--------------------------------------------------------------------------------
/data/video/fish.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/data/video/fish.mp4


--------------------------------------------------------------------------------
/data/video/test.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/data/video/test.mp4


--------------------------------------------------------------------------------
/data/voc.yaml:
--------------------------------------------------------------------------------
 1 | # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
 2 | # Train command: python train.py --data voc.yaml
 3 | # Default dataset location is next to /yolov5:
 4 | #   /parent_folder
 5 | #     /VOC
 6 | #     /yolov5
 7 | 
 8 | 
 9 | # download command/URL (optional)
10 | download: bash data/scripts/get_voc.sh
11 | 
12 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
13 | train: ../VOC/images/train/  # 16551 images
14 | val: ../VOC/images/val/  # 4952 images
15 | 
16 | # number of classes
17 | nc: 20
18 | 
19 | # class names
20 | names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
21 |         'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
22 | 


--------------------------------------------------------------------------------
/deep_sort/__init__.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | 


--------------------------------------------------------------------------------
/deep_sort/detection.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Detection(object):
 6 |     """
 7 |     This class represents a bounding box detection in a single image.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     tlwh : array_like
12 |         Bounding box in format `(x, y, w, h)`.
13 |     confidence : float
14 |         Detector confidence score.
15 |     feature : array_like
16 |         A feature vector that describes the object contained in this image.
17 | 
18 |     Attributes
19 |     ----------
20 |     tlwh : ndarray
21 |         Bounding box in format `(top left x, top left y, width, height)`.
22 |     confidence : ndarray
23 |         Detector confidence score.
24 |     class_num : ndarray
25 |         Detector class.
26 |     feature : ndarray | NoneType
27 |         A feature vector that describes the object contained in this image.
28 | 
29 |     """
30 | 
31 |     def __init__(self, tlwh, confidence, class_num, feature):
32 |         self.tlwh = np.asarray(tlwh, dtype=np.float)
33 |         self.confidence = float(confidence)
34 |         self.class_num = class_num
35 |         self.feature = np.asarray(feature, dtype=np.float32)
36 | 
37 |     def to_tlbr(self):
38 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
39 |         `(top left, bottom right)`.
40 |         """
41 |         ret = self.tlwh.copy()
42 |         ret[2:] += ret[:2]
43 |         return ret
44 | 
45 |     def to_xyah(self):
46 |         """Convert bounding box to format `(center x, center y, aspect ratio,
47 |         height)`, where the aspect ratio is `width / height`.
48 |         """
49 |         ret = self.tlwh.copy()
50 |         ret[:2] += ret[2:] / 2
51 |         ret[2] /= ret[3]
52 |         return ret
53 | 


--------------------------------------------------------------------------------
/deep_sort/iou_matching.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | from __future__ import absolute_import
 3 | import numpy as np
 4 | from . import linear_assignment
 5 | 
 6 | 
 7 | def iou(bbox, candidates):
 8 |     """Computer intersection over union.
 9 | 
10 |     Parameters
11 |     ----------
12 |     bbox : ndarray
13 |         A bounding box in format `(top left x, top left y, width, height)`.
14 |     candidates : ndarray
15 |         A matrix of candidate bounding boxes (one per row) in the same format
16 |         as `bbox`.
17 | 
18 |     Returns
19 |     -------
20 |     ndarray
21 |         The intersection over union in [0, 1] between the `bbox` and each
22 |         candidate. A higher score means a larger fraction of the `bbox` is
23 |         occluded by the candidate.
24 | 
25 |     """
26 |     bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
27 |     candidates_tl = candidates[:, :2]
28 |     candidates_br = candidates[:, :2] + candidates[:, 2:]
29 | 
30 |     tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
31 |                np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
32 |     br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
33 |                np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
34 |     wh = np.maximum(0., br - tl)
35 | 
36 |     area_intersection = wh.prod(axis=1)
37 |     area_bbox = bbox[2:].prod()
38 |     area_candidates = candidates[:, 2:].prod(axis=1)
39 |     return area_intersection / (area_bbox + area_candidates - area_intersection)
40 | 
41 | 
42 | def iou_cost(tracks, detections, track_indices=None,
43 |              detection_indices=None):
44 |     """An intersection over union distance metric.
45 | 
46 |     Parameters
47 |     ----------
48 |     tracks : List[deep_sort.track.Track]
49 |         A list of tracks.
50 |     detections : List[deep_sort.detection.Detection]
51 |         A list of detections.
52 |     track_indices : Optional[List[int]]
53 |         A list of indices to tracks that should be matched. Defaults to
54 |         all `tracks`.
55 |     detection_indices : Optional[List[int]]
56 |         A list of indices to detections that should be matched. Defaults
57 |         to all `detections`.
58 | 
59 |     Returns
60 |     -------
61 |     ndarray
62 |         Returns a cost matrix of shape
63 |         len(track_indices), len(detection_indices) where entry (i, j) is
64 |         `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
65 | 
66 |     """
67 |     if track_indices is None:
68 |         track_indices = np.arange(len(tracks))
69 |     if detection_indices is None:
70 |         detection_indices = np.arange(len(detections))
71 | 
72 |     cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
73 |     for row, track_idx in enumerate(track_indices):
74 |         if tracks[track_idx].time_since_update > 1:
75 |             cost_matrix[row, :] = linear_assignment.INFTY_COST
76 |             continue
77 | 
78 |         bbox = tracks[track_idx].to_tlwh()
79 |         candidates = np.asarray([detections[i].tlwh for i in detection_indices])
80 |         cost_matrix[row, :] = 1. - iou(bbox, candidates)
81 |     return cost_matrix
82 | 


--------------------------------------------------------------------------------
/deep_sort/kalman_filter.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | import scipy.linalg
  4 | 
  5 | 
  6 | """
  7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
  8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
  9 | function and used as Mahalanobis gating threshold.
 10 | """
 11 | chi2inv95 = {
 12 |     1: 3.8415,
 13 |     2: 5.9915,
 14 |     3: 7.8147,
 15 |     4: 9.4877,
 16 |     5: 11.070,
 17 |     6: 12.592,
 18 |     7: 14.067,
 19 |     8: 15.507,
 20 |     9: 16.919}
 21 | 
 22 | 
 23 | class KalmanFilter(object):
 24 |     """
 25 |     A simple Kalman filter for tracking bounding boxes in image space.
 26 | 
 27 |     The 8-dimensional state space
 28 | 
 29 |         x, y, a, h, vx, vy, va, vh
 30 | 
 31 |     contains the bounding box center position (x, y), aspect ratio a, height h,
 32 |     and their respective velocities.
 33 | 
 34 |     Object motion follows a constant velocity model. The bounding box location
 35 |     (x, y, a, h) is taken as direct observation of the state space (linear
 36 |     observation model).
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self):
 41 |         ndim, dt = 4, 1.
 42 | 
 43 |         # Create Kalman filter model matrices.
 44 |         self._motion_mat = np.eye(2 * ndim, 2 * ndim)
 45 |         for i in range(ndim):
 46 |             self._motion_mat[i, ndim + i] = dt
 47 |         self._update_mat = np.eye(ndim, 2 * ndim)
 48 | 
 49 |         # Motion and observation uncertainty are chosen relative to the current
 50 |         # state estimate. These weights control the amount of uncertainty in
 51 |         # the model. This is a bit hacky.
 52 |         self._std_weight_position = 1. / 20
 53 |         self._std_weight_velocity = 1. / 160
 54 | 
 55 |     def initiate(self, measurement):
 56 |         """Create track from unassociated measurement.
 57 | 
 58 |         Parameters
 59 |         ----------
 60 |         measurement : ndarray
 61 |             Bounding box coordinates (x, y, a, h) with center position (x, y),
 62 |             aspect ratio a, and height h.
 63 | 
 64 |         Returns
 65 |         -------
 66 |         (ndarray, ndarray)
 67 |             Returns the mean vector (8 dimensional) and covariance matrix (8x8
 68 |             dimensional) of the new track. Unobserved velocities are initialized
 69 |             to 0 mean.
 70 | 
 71 |         """
 72 |         mean_pos = measurement
 73 |         mean_vel = np.zeros_like(mean_pos)
 74 |         mean = np.r_[mean_pos, mean_vel]
 75 | 
 76 |         std = [
 77 |             2 * self._std_weight_position * measurement[3],
 78 |             2 * self._std_weight_position * measurement[3],
 79 |             1e-2,
 80 |             2 * self._std_weight_position * measurement[3],
 81 |             10 * self._std_weight_velocity * measurement[3],
 82 |             10 * self._std_weight_velocity * measurement[3],
 83 |             1e-5,
 84 |             10 * self._std_weight_velocity * measurement[3]]
 85 |         covariance = np.diag(np.square(std))
 86 |         return mean, covariance
 87 | 
 88 |     def predict(self, mean, covariance):
 89 |         """Run Kalman filter prediction step.
 90 | 
 91 |         Parameters
 92 |         ----------
 93 |         mean : ndarray
 94 |             The 8 dimensional mean vector of the object state at the previous
 95 |             time step.
 96 |         covariance : ndarray
 97 |             The 8x8 dimensional covariance matrix of the object state at the
 98 |             previous time step.
 99 | 
100 |         Returns
101 |         -------
102 |         (ndarray, ndarray)
103 |             Returns the mean vector and covariance matrix of the predicted
104 |             state. Unobserved velocities are initialized to 0 mean.
105 | 
106 |         """
107 |         std_pos = [
108 |             self._std_weight_position * mean[3],
109 |             self._std_weight_position * mean[3],
110 |             1e-2,
111 |             self._std_weight_position * mean[3]]
112 |         std_vel = [
113 |             self._std_weight_velocity * mean[3],
114 |             self._std_weight_velocity * mean[3],
115 |             1e-5,
116 |             self._std_weight_velocity * mean[3]]
117 |         motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118 | 
119 |         mean = np.dot(self._motion_mat, mean)
120 |         covariance = np.linalg.multi_dot((
121 |             self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
122 | 
123 |         return mean, covariance
124 | 
125 |     def project(self, mean, covariance):
126 |         """Project state distribution to measurement space.
127 | 
128 |         Parameters
129 |         ----------
130 |         mean : ndarray
131 |             The state's mean vector (8 dimensional array).
132 |         covariance : ndarray
133 |             The state's covariance matrix (8x8 dimensional).
134 | 
135 |         Returns
136 |         -------
137 |         (ndarray, ndarray)
138 |             Returns the projected mean and covariance matrix of the given state
139 |             estimate.
140 | 
141 |         """
142 |         std = [
143 |             self._std_weight_position * mean[3],
144 |             self._std_weight_position * mean[3],
145 |             1e-1,
146 |             self._std_weight_position * mean[3]]
147 |         innovation_cov = np.diag(np.square(std))
148 | 
149 |         mean = np.dot(self._update_mat, mean)
150 |         covariance = np.linalg.multi_dot((
151 |             self._update_mat, covariance, self._update_mat.T))
152 |         return mean, covariance + innovation_cov
153 | 
154 |     def update(self, mean, covariance, measurement):
155 |         """Run Kalman filter correction step.
156 | 
157 |         Parameters
158 |         ----------
159 |         mean : ndarray
160 |             The predicted state's mean vector (8 dimensional).
161 |         covariance : ndarray
162 |             The state's covariance matrix (8x8 dimensional).
163 |         measurement : ndarray
164 |             The 4 dimensional measurement vector (x, y, a, h), where (x, y)
165 |             is the center position, a the aspect ratio, and h the height of the
166 |             bounding box.
167 | 
168 |         Returns
169 |         -------
170 |         (ndarray, ndarray)
171 |             Returns the measurement-corrected state distribution.
172 | 
173 |         """
174 |         projected_mean, projected_cov = self.project(mean, covariance)
175 | 
176 |         chol_factor, lower = scipy.linalg.cho_factor(
177 |             projected_cov, lower=True, check_finite=False)
178 |         kalman_gain = scipy.linalg.cho_solve(
179 |             (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
180 |             check_finite=False).T
181 |         innovation = measurement - projected_mean
182 | 
183 |         new_mean = mean + np.dot(innovation, kalman_gain.T)
184 |         new_covariance = covariance - np.linalg.multi_dot((
185 |             kalman_gain, projected_cov, kalman_gain.T))
186 |         return new_mean, new_covariance
187 | 
188 |     def gating_distance(self, mean, covariance, measurements,
189 |                         only_position=False):
190 |         """Compute gating distance between state distribution and measurements.
191 | 
192 |         A suitable distance threshold can be obtained from `chi2inv95`. If
193 |         `only_position` is False, the chi-square distribution has 4 degrees of
194 |         freedom, otherwise 2.
195 | 
196 |         Parameters
197 |         ----------
198 |         mean : ndarray
199 |             Mean vector over the state distribution (8 dimensional).
200 |         covariance : ndarray
201 |             Covariance of the state distribution (8x8 dimensional).
202 |         measurements : ndarray
203 |             An Nx4 dimensional matrix of N measurements, each in
204 |             format (x, y, a, h) where (x, y) is the bounding box center
205 |             position, a the aspect ratio, and h the height.
206 |         only_position : Optional[bool]
207 |             If True, distance computation is done with respect to the bounding
208 |             box center position only.
209 | 
210 |         Returns
211 |         -------
212 |         ndarray
213 |             Returns an array of length N, where the i-th element contains the
214 |             squared Mahalanobis distance between (mean, covariance) and
215 |             `measurements[i]`.
216 | 
217 |         """
218 |         mean, covariance = self.project(mean, covariance)
219 |         if only_position:
220 |             mean, covariance = mean[:2], covariance[:2, :2]
221 |             measurements = measurements[:, :2]
222 | 
223 |         cholesky_factor = np.linalg.cholesky(covariance)
224 |         d = measurements - mean
225 |         z = scipy.linalg.solve_triangular(
226 |             cholesky_factor, d.T, lower=True, check_finite=False,
227 |             overwrite_b=True)
228 |         squared_maha = np.sum(z * z, axis=0)
229 |         return squared_maha
230 | 


--------------------------------------------------------------------------------
/deep_sort/linear_assignment.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | from scipy.optimize import linear_sum_assignment
  5 | from . import kalman_filter
  6 | 
  7 | 
  8 | INFTY_COST = 1e+5
  9 | 
 10 | 
 11 | def min_cost_matching(
 12 |         distance_metric, max_distance, tracks, detections, track_indices=None,
 13 |         detection_indices=None):
 14 |     """Solve linear assignment problem.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
 19 |         The distance metric is given a list of tracks and detections as well as
 20 |         a list of N track indices and M detection indices. The metric should
 21 |         return the NxM dimensional cost matrix, where element (i, j) is the
 22 |         association cost between the i-th track in the given track indices and
 23 |         the j-th detection in the given detection_indices.
 24 |     max_distance : float
 25 |         Gating threshold. Associations with cost larger than this value are
 26 |         disregarded.
 27 |     tracks : List[track.Track]
 28 |         A list of predicted tracks at the current time step.
 29 |     detections : List[detection.Detection]
 30 |         A list of detections at the current time step.
 31 |     track_indices : List[int]
 32 |         List of track indices that maps rows in `cost_matrix` to tracks in
 33 |         `tracks` (see description above).
 34 |     detection_indices : List[int]
 35 |         List of detection indices that maps columns in `cost_matrix` to
 36 |         detections in `detections` (see description above).
 37 | 
 38 |     Returns
 39 |     -------
 40 |     (List[(int, int)], List[int], List[int])
 41 |         Returns a tuple with the following three entries:
 42 |         * A list of matched track and detection indices.
 43 |         * A list of unmatched track indices.
 44 |         * A list of unmatched detection indices.
 45 | 
 46 |     """
 47 |     if track_indices is None:
 48 |         track_indices = np.arange(len(tracks))
 49 |     if detection_indices is None:
 50 |         detection_indices = np.arange(len(detections))
 51 | 
 52 |     if len(detection_indices) == 0 or len(track_indices) == 0:
 53 |         return [], track_indices, detection_indices  # Nothing to match.
 54 | 
 55 |     cost_matrix = distance_metric(
 56 |         tracks, detections, track_indices, detection_indices)
 57 |     cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
 58 | 
 59 |     # Start of Edited Block by Maxwell Stone
 60 |     cost_matrix = np.nan_to_num(cost_matrix, copy=True, nan=0.0, posinf=None, neginf=None)
 61 |     # This code is not from the original DeepSORT algorithm and should be considered if there are tracking issues.
 62 |     # This line replaces Nan values, caused by incorrect CLIP detections, with 0's to stop from crashing.
 63 |     # **Warning. Issues may arise from this config. It is not completely tested.
 64 |     # End of Edited Block by Maxwell Stone
 65 | 
 66 |     indices = linear_sum_assignment(cost_matrix)
 67 |     indices = np.asarray(indices)
 68 |     indices = np.transpose(indices)
 69 |     matches, unmatched_tracks, unmatched_detections = [], [], []
 70 |     for col, detection_idx in enumerate(detection_indices):
 71 |         if col not in indices[:, 1]:
 72 |             unmatched_detections.append(detection_idx)
 73 |     for row, track_idx in enumerate(track_indices):
 74 |         if row not in indices[:, 0]:
 75 |             unmatched_tracks.append(track_idx)
 76 |     for row, col in indices:
 77 |         track_idx = track_indices[row]
 78 |         detection_idx = detection_indices[col]
 79 |         if cost_matrix[row, col] > max_distance:
 80 |             unmatched_tracks.append(track_idx)
 81 |             unmatched_detections.append(detection_idx)
 82 |         else:
 83 |             matches.append((track_idx, detection_idx))
 84 |     return matches, unmatched_tracks, unmatched_detections
 85 | 
 86 | 
 87 | def matching_cascade(
 88 |         distance_metric, max_distance, cascade_depth, tracks, detections,
 89 |         track_indices=None, detection_indices=None):
 90 |     """Run matching cascade.
 91 | 
 92 |     Parameters
 93 |     ----------
 94 |     distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
 95 |         The distance metric is given a list of tracks and detections as well as
 96 |         a list of N track indices and M detection indices. The metric should
 97 |         return the NxM dimensional cost matrix, where element (i, j) is the
 98 |         association cost between the i-th track in the given track indices and
 99 |         the j-th detection in the given detection indices.
100 |     max_distance : float
101 |         Gating threshold. Associations with cost larger than this value are
102 |         disregarded.
103 |     cascade_depth: int
104 |         The cascade depth, should be se to the maximum track age.
105 |     tracks : List[track.Track]
106 |         A list of predicted tracks at the current time step.
107 |     detections : List[detection.Detection]
108 |         A list of detections at the current time step.
109 |     track_indices : Optional[List[int]]
110 |         List of track indices that maps rows in `cost_matrix` to tracks in
111 |         `tracks` (see description above). Defaults to all tracks.
112 |     detection_indices : Optional[List[int]]
113 |         List of detection indices that maps columns in `cost_matrix` to
114 |         detections in `detections` (see description above). Defaults to all
115 |         detections.
116 | 
117 |     Returns
118 |     -------
119 |     (List[(int, int)], List[int], List[int])
120 |         Returns a tuple with the following three entries:
121 |         * A list of matched track and detection indices.
122 |         * A list of unmatched track indices.
123 |         * A list of unmatched detection indices.
124 | 
125 |     """
126 |     if track_indices is None:
127 |         track_indices = list(range(len(tracks)))
128 |     if detection_indices is None:
129 |         detection_indices = list(range(len(detections)))
130 | 
131 |     unmatched_detections = detection_indices
132 |     matches = []
133 |     for level in range(cascade_depth):
134 |         if len(unmatched_detections) == 0:  # No detections left
135 |             break
136 | 
137 |         track_indices_l = [
138 |             k for k in track_indices
139 |             if tracks[k].time_since_update == 1 + level
140 |         ]
141 |         if len(track_indices_l) == 0:  # Nothing to match at this level
142 |             continue
143 | 
144 |         matches_l, _, unmatched_detections = \
145 |             min_cost_matching(
146 |                 distance_metric, max_distance, tracks, detections,
147 |                 track_indices_l, unmatched_detections)
148 |         matches += matches_l
149 |     unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
150 |     return matches, unmatched_tracks, unmatched_detections
151 | 
152 | 
153 | def gate_cost_matrix(
154 |         kf, cost_matrix, tracks, detections, track_indices, detection_indices,
155 |         gated_cost=INFTY_COST, only_position=False):
156 |     """Invalidate infeasible entries in cost matrix based on the state
157 |     distributions obtained by Kalman filtering.
158 | 
159 |     Parameters
160 |     ----------
161 |     kf : The Kalman filter.
162 |     cost_matrix : ndarray
163 |         The NxM dimensional cost matrix, where N is the number of track indices
164 |         and M is the number of detection indices, such that entry (i, j) is the
165 |         association cost between `tracks[track_indices[i]]` and
166 |         `detections[detection_indices[j]]`.
167 |     tracks : List[track.Track]
168 |         A list of predicted tracks at the current time step.
169 |     detections : List[detection.Detection]
170 |         A list of detections at the current time step.
171 |     track_indices : List[int]
172 |         List of track indices that maps rows in `cost_matrix` to tracks in
173 |         `tracks` (see description above).
174 |     detection_indices : List[int]
175 |         List of detection indices that maps columns in `cost_matrix` to
176 |         detections in `detections` (see description above).
177 |     gated_cost : Optional[float]
178 |         Entries in the cost matrix corresponding to infeasible associations are
179 |         set this value. Defaults to a very large value.
180 |     only_position : Optional[bool]
181 |         If True, only the x, y position of the state distribution is considered
182 |         during gating. Defaults to False.
183 | 
184 |     Returns
185 |     -------
186 |     ndarray
187 |         Returns the modified cost matrix.
188 | 
189 |     """
190 |     gating_dim = 2 if only_position else 4
191 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
192 |     measurements = np.asarray(
193 |         [detections[i].to_xyah() for i in detection_indices])
194 |     for row, track_idx in enumerate(track_indices):
195 |         track = tracks[track_idx]
196 |         gating_distance = kf.gating_distance(
197 |             track.mean, track.covariance, measurements, only_position)
198 |         cost_matrix[row, gating_distance > gating_threshold] = gated_cost
199 |     return cost_matrix
200 | 


--------------------------------------------------------------------------------
/deep_sort/nn_matching.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | 
  4 | 
  5 | def _pdist(a, b):
  6 |     """Compute pair-wise squared distance between points in `a` and `b`.
  7 | 
  8 |     Parameters
  9 |     ----------
 10 |     a : array_like
 11 |         An NxM matrix of N samples of dimensionality M.
 12 |     b : array_like
 13 |         An LxM matrix of L samples of dimensionality M.
 14 | 
 15 |     Returns
 16 |     -------
 17 |     ndarray
 18 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 19 |         contains the squared distance between `a[i]` and `b[j]`.
 20 | 
 21 |     """
 22 |     a, b = np.asarray(a), np.asarray(b)
 23 |     if len(a) == 0 or len(b) == 0:
 24 |         return np.zeros((len(a), len(b)))
 25 |     a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
 26 |     r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
 27 |     r2 = np.clip(r2, 0., float(np.inf))
 28 |     return r2
 29 | 
 30 | 
 31 | def _cosine_distance(a, b, data_is_normalized=False):
 32 |     """Compute pair-wise cosine distance between points in `a` and `b`.
 33 | 
 34 |     Parameters
 35 |     ----------
 36 |     a : array_like
 37 |         An NxM matrix of N samples of dimensionality M.
 38 |     b : array_like
 39 |         An LxM matrix of L samples of dimensionality M.
 40 |     data_is_normalized : Optional[bool]
 41 |         If True, assumes rows in a and b are unit length vectors.
 42 |         Otherwise, a and b are explicitly normalized to lenght 1.
 43 | 
 44 |     Returns
 45 |     -------
 46 |     ndarray
 47 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 48 |         contains the squared distance between `a[i]` and `b[j]`.
 49 | 
 50 |     """
 51 |     if not data_is_normalized:
 52 |         a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
 53 |         b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
 54 |     return 1. - np.dot(a, b.T)
 55 | 
 56 | 
 57 | def _nn_euclidean_distance(x, y):
 58 |     """ Helper function for nearest neighbor distance metric (Euclidean).
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     x : ndarray
 63 |         A matrix of N row-vectors (sample points).
 64 |     y : ndarray
 65 |         A matrix of M row-vectors (query points).
 66 | 
 67 |     Returns
 68 |     -------
 69 |     ndarray
 70 |         A vector of length M that contains for each entry in `y` the
 71 |         smallest Euclidean distance to a sample in `x`.
 72 | 
 73 |     """
 74 |     distances = _pdist(x, y)
 75 |     return np.maximum(0.0, distances.min(axis=0))
 76 | 
 77 | 
 78 | def _nn_cosine_distance(x, y):
 79 |     """ Helper function for nearest neighbor distance metric (cosine).
 80 | 
 81 |     Parameters
 82 |     ----------
 83 |     x : ndarray
 84 |         A matrix of N row-vectors (sample points).
 85 |     y : ndarray
 86 |         A matrix of M row-vectors (query points).
 87 | 
 88 |     Returns
 89 |     -------
 90 |     ndarray
 91 |         A vector of length M that contains for each entry in `y` the
 92 |         smallest cosine distance to a sample in `x`.
 93 | 
 94 |     """
 95 |     distances = _cosine_distance(x, y)
 96 |     return distances.min(axis=0)
 97 | 
 98 | 
 99 | class NearestNeighborDistanceMetric(object):
100 |     """
101 |     A nearest neighbor distance metric that, for each target, returns
102 |     the closest distance to any sample that has been observed so far.
103 | 
104 |     Parameters
105 |     ----------
106 |     metric : str
107 |         Either "euclidean" or "cosine".
108 |     matching_threshold: float
109 |         The matching threshold. Samples with larger distance are considered an
110 |         invalid match.
111 |     budget : Optional[int]
112 |         If not None, fix samples per class to at most this number. Removes
113 |         the oldest samples when the budget is reached.
114 | 
115 |     Attributes
116 |     ----------
117 |     samples : Dict[int -> List[ndarray]]
118 |         A dictionary that maps from target identities to the list of samples
119 |         that have been observed so far.
120 | 
121 |     """
122 | 
123 |     def __init__(self, metric, matching_threshold, budget=None):
124 | 
125 | 
126 |         if metric == "euclidean":
127 |             self._metric = _nn_euclidean_distance
128 |         elif metric == "cosine":
129 |             self._metric = _nn_cosine_distance
130 |         else:
131 |             raise ValueError(
132 |                 "Invalid metric; must be either 'euclidean' or 'cosine'")
133 |         self.matching_threshold = matching_threshold
134 |         self.budget = budget
135 |         self.samples = {}
136 | 
137 |     def partial_fit(self, features, targets, active_targets):
138 |         """Update the distance metric with new data.
139 | 
140 |         Parameters
141 |         ----------
142 |         features : ndarray
143 |             An NxM matrix of N features of dimensionality M.
144 |         targets : ndarray
145 |             An integer array of associated target identities.
146 |         active_targets : List[int]
147 |             A list of targets that are currently present in the scene.
148 | 
149 |         """
150 |         for feature, target in zip(features, targets):
151 |             self.samples.setdefault(target, []).append(feature)
152 |             if self.budget is not None:
153 |                 self.samples[target] = self.samples[target][-self.budget:]
154 |         self.samples = {k: self.samples[k] for k in active_targets}
155 | 
156 |     def distance(self, features, targets):
157 |         """Compute distance between features and targets.
158 | 
159 |         Parameters
160 |         ----------
161 |         features : ndarray
162 |             An NxM matrix of N features of dimensionality M.
163 |         targets : List[int]
164 |             A list of targets to match the given `features` against.
165 | 
166 |         Returns
167 |         -------
168 |         ndarray
169 |             Returns a cost matrix of shape len(targets), len(features), where
170 |             element (i, j) contains the closest squared distance between
171 |             `targets[i]` and `features[j]`.
172 | 
173 |         """
174 |         cost_matrix = np.zeros((len(targets), len(features)))
175 |         for i, target in enumerate(targets):
176 |             cost_matrix[i, :] = self._metric(self.samples[target], features)
177 |         return cost_matrix
178 | 


--------------------------------------------------------------------------------
/deep_sort/preprocessing.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | import cv2
 4 | 
 5 | 
 6 | def non_max_suppression(boxes, classes, max_bbox_overlap, scores=None):
 7 |     """Suppress overlapping detections.
 8 | 
 9 |     Original code from [1]_ has been adapted to include confidence score.
10 | 
11 |     .. [1] http://www.pyimagesearch.com/2015/02/16/
12 |            faster-non-maximum-suppression-python/
13 | 
14 |     Examples
15 |     --------
16 | 
17 |         >>> boxes = [d.roi for d in detections]
18 |         >>> classes = [d.classes for d in detections]
19 |         >>> scores = [d.confidence for d in detections]
20 |         >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
21 |         >>> detections = [detections[i] for i in indices]
22 | 
23 |     Parameters
24 |     ----------
25 |     boxes : ndarray
26 |         Array of ROIs (x, y, width, height).
27 |     max_bbox_overlap : float
28 |         ROIs that overlap more than this values are suppressed.
29 |     scores : Optional[array_like]
30 |         Detector confidence score.
31 | 
32 |     Returns
33 |     -------
34 |     List[int]
35 |         Returns indices of detections that have survived non-maxima suppression.
36 | 
37 |     """
38 |     if len(boxes) == 0:
39 |         return []
40 | 
41 |     boxes = boxes.astype(np.float)
42 |     pick = []
43 | 
44 |     x1 = boxes[:, 0]
45 |     y1 = boxes[:, 1]
46 |     x2 = boxes[:, 2] + boxes[:, 0]
47 |     y2 = boxes[:, 3] + boxes[:, 1]
48 | 
49 |     area = (x2 - x1 + 1) * (y2 - y1 + 1)
50 |     if scores is not None:
51 |         idxs = np.argsort(scores)
52 |     else:
53 |         idxs = np.argsort(y2)
54 | 
55 |     while len(idxs) > 0:
56 |         last = len(idxs) - 1
57 |         i = idxs[last]
58 |         pick.append(i)
59 | 
60 |         xx1 = np.maximum(x1[i], x1[idxs[:last]])
61 |         yy1 = np.maximum(y1[i], y1[idxs[:last]])
62 |         xx2 = np.minimum(x2[i], x2[idxs[:last]])
63 |         yy2 = np.minimum(y2[i], y2[idxs[:last]])
64 | 
65 |         w = np.maximum(0, xx2 - xx1 + 1)
66 |         h = np.maximum(0, yy2 - yy1 + 1)
67 | 
68 |         overlap = (w * h) / area[idxs[:last]]
69 | 
70 |         idxs = np.delete(
71 |             idxs, np.concatenate(
72 |                 ([last], np.where(overlap > max_bbox_overlap)[0])))
73 | 
74 |     return pick
75 | 


--------------------------------------------------------------------------------
/deep_sort/track.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | 
  3 | 
  4 | class TrackState:
  5 |     """
  6 |     Enumeration type for the single target track state. Newly created tracks are
  7 |     classified as `tentative` until enough evidence has been collected. Then,
  8 |     the track state is changed to `confirmed`. Tracks that are no longer alive
  9 |     are classified as `deleted` to mark them for removal from the set of active
 10 |     tracks.
 11 | 
 12 |     """
 13 | 
 14 |     Tentative = 1
 15 |     Confirmed = 2
 16 |     Deleted = 3
 17 | 
 18 | 
 19 | class Track:
 20 |     """
 21 |     A single target track with state space `(x, y, a, h)` and associated
 22 |     velocities, where `(x, y)` is the center of the bounding box, `a` is the
 23 |     aspect ratio and `h` is the height.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     mean : ndarray
 28 |         Mean vector of the initial state distribution.
 29 |     covariance : ndarray
 30 |         Covariance matrix of the initial state distribution.
 31 |     track_id : int
 32 |         A unique track identifier.
 33 |     n_init : int
 34 |         Number of consecutive detections before the track is confirmed. The
 35 |         track state is set to `Deleted` if a miss occurs within the first
 36 |         `n_init` frames.
 37 |     max_age : int
 38 |         The maximum number of consecutive misses before the track state is
 39 |         set to `Deleted`.
 40 |     feature : Optional[ndarray]
 41 |         Feature vector of the detection this track originates from. If not None,
 42 |         this feature is added to the `features` cache.
 43 | 
 44 |     Attributes
 45 |     ----------
 46 |     mean : ndarray
 47 |         Mean vector of the initial state distribution.
 48 |     covariance : ndarray
 49 |         Covariance matrix of the initial state distribution.
 50 |     track_id : int
 51 |         A unique track identifier.
 52 |     hits : int
 53 |         Total number of measurement updates.
 54 |     age : int
 55 |         Total number of frames since first occurance.
 56 |     time_since_update : int
 57 |         Total number of frames since last measurement update.
 58 |     state : TrackState
 59 |         The current track state.
 60 |     features : List[ndarray]
 61 |         A cache of features. On each measurement update, the associated feature
 62 |         vector is added to this list.
 63 | 
 64 |     """
 65 | 
 66 |     def __init__(self, mean, covariance, track_id, n_init, max_age,
 67 |                  feature=None, class_num=None):
 68 |         self.mean = mean
 69 |         self.covariance = covariance
 70 |         self.track_id = track_id
 71 |         self.hits = 1
 72 |         self.age = 1
 73 |         self.time_since_update = 0
 74 | 
 75 |         self.state = TrackState.Tentative
 76 |         self.features = []
 77 |         if feature is not None:
 78 |             self.features.append(feature)
 79 | 
 80 |         self._n_init = n_init
 81 |         self._max_age = max_age
 82 |         self.class_num = class_num
 83 | 
 84 |     def to_tlwh(self):
 85 |         """Get current position in bounding box format `(top left x, top left y,
 86 |         width, height)`.
 87 | 
 88 |         Returns
 89 |         -------
 90 |         ndarray
 91 |             The bounding box.
 92 | 
 93 |         """
 94 |         ret = self.mean[:4].copy()
 95 |         ret[2] *= ret[3]
 96 |         ret[:2] -= ret[2:] / 2
 97 |         return ret
 98 | 
 99 |     def to_tlbr(self):
100 |         """Get current position in bounding box format `(min x, miny, max x,
101 |         max y)`.
102 | 
103 |         Returns
104 |         -------
105 |         ndarray
106 |             The bounding box.
107 | 
108 |         """
109 |         ret = self.to_tlwh()
110 |         ret[2:] = ret[:2] + ret[2:]
111 |         return ret
112 | 
113 |     def predict(self, kf):
114 |         """Propagate the state distribution to the current time step using a
115 |         Kalman filter prediction step.
116 | 
117 |         Parameters
118 |         ----------
119 |         kf : kalman_filter.KalmanFilter
120 |             The Kalman filter.
121 | 
122 |         """
123 |         self.mean, self.covariance = kf.predict(self.mean, self.covariance)
124 |         self.age += 1
125 |         self.time_since_update += 1
126 | 
127 |     def update(self, kf, detection):
128 |         """Perform Kalman filter measurement update step and update the feature
129 |         cache.
130 | 
131 |         Parameters
132 |         ----------
133 |         kf : kalman_filter.KalmanFilter
134 |             The Kalman filter.
135 |         detection : Detection
136 |             The associated detection.
137 | 
138 |         """
139 |         self.mean, self.covariance = kf.update(
140 |             self.mean, self.covariance, detection.to_xyah())
141 |         self.features.append(detection.feature)
142 | 
143 |         self.hits += 1
144 |         self.time_since_update = 0
145 |         if self.state == TrackState.Tentative and self.hits >= self._n_init:
146 |             self.state = TrackState.Confirmed
147 | 
148 |     def mark_missed(self):
149 |         """Mark this track as missed (no association at the current time step).
150 |         """
151 |         if self.state == TrackState.Tentative:
152 |             self.state = TrackState.Deleted
153 |         elif self.time_since_update > self._max_age:
154 |             self.state = TrackState.Deleted
155 | 
156 |     def is_tentative(self):
157 |         """Returns True if this track is tentative (unconfirmed).
158 |         """
159 |         return self.state == TrackState.Tentative
160 | 
161 |     def is_confirmed(self):
162 |         """Returns True if this track is confirmed."""
163 |         return self.state == TrackState.Confirmed
164 | 
165 |     def is_deleted(self):
166 |         """Returns True if this track is dead and should be deleted."""
167 |         return self.state == TrackState.Deleted
168 | 


--------------------------------------------------------------------------------
/deep_sort/tracker.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | from . import kalman_filter
  5 | from . import linear_assignment
  6 | from . import iou_matching
  7 | from .track import Track
  8 | 
  9 | 
 10 | class Tracker:
 11 |     """
 12 |     This is the multi-target tracker.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     metric : nn_matching.NearestNeighborDistanceMetric
 17 |         A distance metric for measurement-to-track association.
 18 |     max_age : int
 19 |         Maximum number of missed misses before a track is deleted.
 20 |     n_init : int
 21 |         Number of consecutive detections before the track is confirmed. The
 22 |         track state is set to `Deleted` if a miss occurs within the first
 23 |         `n_init` frames.
 24 | 
 25 |     Attributes
 26 |     ----------
 27 |     metric : nn_matching.NearestNeighborDistanceMetric
 28 |         The distance metric used for measurement to track association.
 29 |     max_age : int
 30 |         Maximum number of missed misses before a track is deleted.
 31 |     n_init : int
 32 |         Number of frames that a track remains in initialization phase.
 33 |     kf : kalman_filter.KalmanFilter
 34 |         A Kalman filter to filter target trajectories in image space.
 35 |     tracks : List[Track]
 36 |         The list of active tracks at the current time step.
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self, metric, max_iou_distance=0.7, max_age=60, n_init=3):
 41 |         self.metric = metric
 42 |         self.max_iou_distance = max_iou_distance
 43 |         self.max_age = max_age
 44 |         self.n_init = n_init
 45 | 
 46 |         self.kf = kalman_filter.KalmanFilter()
 47 |         self.tracks = []
 48 |         self._next_id = 1
 49 | 
 50 |     def predict(self):
 51 |         """Propagate track state distributions one time step forward.
 52 | 
 53 |         This function should be called once every time step, before `update`.
 54 |         """
 55 |         for track in self.tracks:
 56 |             track.predict(self.kf)
 57 | 
 58 |     def update(self, detections):
 59 |         """Perform measurement update and track management.
 60 | 
 61 |         Parameters
 62 |         ----------
 63 |         detections : List[deep_sort.detection.Detection]
 64 |             A list of detections at the current time step.
 65 | 
 66 |         """
 67 |         # Run matching cascade.
 68 |         matches, unmatched_tracks, unmatched_detections = \
 69 |             self._match(detections)
 70 | 
 71 |         # Update track set.
 72 |         for track_idx, detection_idx in matches:
 73 |             self.tracks[track_idx].update(
 74 |                 self.kf, detections[detection_idx])
 75 |         for track_idx in unmatched_tracks:
 76 |             self.tracks[track_idx].mark_missed()
 77 |         for detection_idx in unmatched_detections:
 78 |             self._initiate_track(detections[detection_idx])
 79 |         self.tracks = [t for t in self.tracks if not t.is_deleted()]
 80 | 
 81 |         # Update distance metric.
 82 |         active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
 83 |         features, targets = [], []
 84 |         for track in self.tracks:
 85 |             if not track.is_confirmed():
 86 |                 continue
 87 |             features += track.features
 88 |             targets += [track.track_id for _ in track.features]
 89 |             track.features = []
 90 |         self.metric.partial_fit(
 91 |             np.asarray(features), np.asarray(targets), active_targets)
 92 | 
 93 |     def _match(self, detections):
 94 | 
 95 |         def gated_metric(tracks, dets, track_indices, detection_indices):
 96 |             features = np.array([dets[i].feature for i in detection_indices])
 97 |             targets = np.array([tracks[i].track_id for i in track_indices])
 98 |             cost_matrix = self.metric.distance(features, targets)
 99 |             cost_matrix = linear_assignment.gate_cost_matrix(
100 |                 self.kf, cost_matrix, tracks, dets, track_indices,
101 |                 detection_indices)
102 | 
103 |             return cost_matrix
104 | 
105 |         # Split track set into confirmed and unconfirmed tracks.
106 |         confirmed_tracks = [
107 |             i for i, t in enumerate(self.tracks) if t.is_confirmed()]
108 |         unconfirmed_tracks = [
109 |             i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
110 | 
111 |         # Associate confirmed tracks using appearance features.
112 |         matches_a, unmatched_tracks_a, unmatched_detections = \
113 |             linear_assignment.matching_cascade(
114 |                 gated_metric, self.metric.matching_threshold, self.max_age,
115 |                 self.tracks, detections, confirmed_tracks)
116 | 
117 |         # Associate remaining tracks together with unconfirmed tracks using IOU.
118 |         iou_track_candidates = unconfirmed_tracks + [
119 |             k for k in unmatched_tracks_a if
120 |             self.tracks[k].time_since_update == 1]
121 |         unmatched_tracks_a = [
122 |             k for k in unmatched_tracks_a if
123 |             self.tracks[k].time_since_update != 1]
124 |         matches_b, unmatched_tracks_b, unmatched_detections = \
125 |             linear_assignment.min_cost_matching(
126 |                 iou_matching.iou_cost, self.max_iou_distance, self.tracks,
127 |                 detections, iou_track_candidates, unmatched_detections)
128 | 
129 |         matches = matches_a + matches_b
130 |         unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
131 |         return matches, unmatched_tracks, unmatched_detections
132 | 
133 |     def _initiate_track(self, detection):
134 |         mean, covariance = self.kf.initiate(detection.to_xyah())
135 |         class_num = detection.class_num
136 |         self.tracks.append(Track(
137 |             mean, covariance, self._next_id, self.n_init, self.max_age,
138 |             detection.feature, class_num))
139 |         self._next_id += 1
140 | 


--------------------------------------------------------------------------------
/example/video/fish.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/example/video/fish.mp4


--------------------------------------------------------------------------------
/model_data/mars-small128.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/model_data/mars-small128.pb


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/models/__init__.py


--------------------------------------------------------------------------------
/models/experimental.py:
--------------------------------------------------------------------------------
  1 | # This file contains experimental modules
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn as nn
  6 | 
  7 | from models.common import Conv, DWConv
  8 | from utils.google_utils import attempt_download
  9 | 
 10 | 
 11 | class CrossConv(nn.Module):
 12 |     # Cross Convolution Downsample
 13 |     def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
 14 |         # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
 15 |         super(CrossConv, self).__init__()
 16 |         c_ = int(c2 * e)  # hidden channels
 17 |         self.cv1 = Conv(c1, c_, (1, k), (1, s))
 18 |         self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
 19 |         self.add = shortcut and c1 == c2
 20 | 
 21 |     def forward(self, x):
 22 |         return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
 23 | 
 24 | 
 25 | class Sum(nn.Module):
 26 |     # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
 27 |     def __init__(self, n, weight=False):  # n: number of inputs
 28 |         super(Sum, self).__init__()
 29 |         self.weight = weight  # apply weights boolean
 30 |         self.iter = range(n - 1)  # iter object
 31 |         if weight:
 32 |             self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True)  # layer weights
 33 | 
 34 |     def forward(self, x):
 35 |         y = x[0]  # no weight
 36 |         if self.weight:
 37 |             w = torch.sigmoid(self.w) * 2
 38 |             for i in self.iter:
 39 |                 y = y + x[i + 1] * w[i]
 40 |         else:
 41 |             for i in self.iter:
 42 |                 y = y + x[i + 1]
 43 |         return y
 44 | 
 45 | 
 46 | class GhostConv(nn.Module):
 47 |     # Ghost Convolution https://github.com/huawei-noah/ghostnet
 48 |     def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
 49 |         super(GhostConv, self).__init__()
 50 |         c_ = c2 // 2  # hidden channels
 51 |         self.cv1 = Conv(c1, c_, k, s, None, g, act)
 52 |         self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
 53 | 
 54 |     def forward(self, x):
 55 |         y = self.cv1(x)
 56 |         return torch.cat([y, self.cv2(y)], 1)
 57 | 
 58 | 
 59 | class GhostBottleneck(nn.Module):
 60 |     # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
 61 |     def __init__(self, c1, c2, k, s):
 62 |         super(GhostBottleneck, self).__init__()
 63 |         c_ = c2 // 2
 64 |         self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pw
 65 |                                   DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
 66 |                                   GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
 67 |         self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
 68 |                                       Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
 69 | 
 70 |     def forward(self, x):
 71 |         return self.conv(x) + self.shortcut(x)
 72 | 
 73 | 
 74 | class MixConv2d(nn.Module):
 75 |     # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
 76 |     def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
 77 |         super(MixConv2d, self).__init__()
 78 |         groups = len(k)
 79 |         if equal_ch:  # equal c_ per group
 80 |             i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
 81 |             c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
 82 |         else:  # equal weight.numel() per group
 83 |             b = [c2] + [0] * groups
 84 |             a = np.eye(groups + 1, groups, k=-1)
 85 |             a -= np.roll(a, 1, axis=1)
 86 |             a *= np.array(k) ** 2
 87 |             a[0] = 1
 88 |             c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
 89 | 
 90 |         self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
 91 |         self.bn = nn.BatchNorm2d(c2)
 92 |         self.act = nn.LeakyReLU(0.1, inplace=True)
 93 | 
 94 |     def forward(self, x):
 95 |         return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
 96 | 
 97 | 
 98 | class Ensemble(nn.ModuleList):
 99 |     # Ensemble of models
100 |     def __init__(self):
101 |         super(Ensemble, self).__init__()
102 | 
103 |     def forward(self, x, augment=False):
104 |         y = []
105 |         for module in self:
106 |             y.append(module(x, augment)[0])
107 |         # y = torch.stack(y).max(0)[0]  # max ensemble
108 |         # y = torch.cat(y, 1)  # nms ensemble
109 |         y = torch.stack(y).mean(0)  # mean ensemble
110 |         return y, None  # inference, train output
111 | 
112 | 
113 | def attempt_load(weights, map_location=None):
114 |     # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
115 |     model = Ensemble()
116 |     for w in weights if isinstance(weights, list) else [weights]:
117 |         attempt_download(w)
118 |         model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval())  # load FP32 model
119 | 
120 |     # Compatibility updates
121 |     for m in model.modules():
122 |         if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
123 |             m.inplace = True  # pytorch 1.7.0 compatibility
124 |         elif type(m) is Conv:
125 |             m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
126 | 
127 |     if len(model) == 1:
128 |         return model[-1]  # return model
129 |     else:
130 |         print('Ensemble created with %s\n' % weights)
131 |         for k in ['names', 'stride']:
132 |             setattr(model, k, getattr(model[-1], k))
133 |         return model  # return ensemble
134 | 


--------------------------------------------------------------------------------
/models/export.py:
--------------------------------------------------------------------------------
 1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
 2 | 
 3 | Usage:
 4 |     $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
 5 | """
 6 | 
 7 | import argparse
 8 | import sys
 9 | import time
10 | 
11 | sys.path.append('./')  # to run '$ python *.py' files in subdirectories
12 | 
13 | import torch
14 | import torch.nn as nn
15 | 
16 | import models
17 | from models.experimental import attempt_load
18 | from utils.activations import Hardswish, SiLU
19 | from utils.general import set_logging, check_img_size
20 | 
21 | if __name__ == '__main__':
22 |     parser = argparse.ArgumentParser()
23 |     parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')  # from yolov5/models/
24 |     parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
25 |     parser.add_argument('--batch-size', type=int, default=1, help='batch size')
26 |     opt = parser.parse_args()
27 |     opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
28 |     print(opt)
29 |     set_logging()
30 |     t = time.time()
31 | 
32 |     # Load PyTorch model
33 |     model = attempt_load(opt.weights, map_location=torch.device('cpu'))  # load FP32 model
34 |     labels = model.names
35 | 
36 |     # Checks
37 |     gs = int(max(model.stride))  # grid size (max stride)
38 |     opt.img_size = [check_img_size(x, gs) for x in opt.img_size]  # verify img_size are gs-multiples
39 | 
40 |     # Input
41 |     img = torch.zeros(opt.batch_size, 3, *opt.img_size)  # image size(1,3,320,192) iDetection
42 | 
43 |     # Update model
44 |     for k, m in model.named_modules():
45 |         m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
46 |         if isinstance(m, models.common.Conv):  # assign export-friendly activations
47 |             if isinstance(m.act, nn.Hardswish):
48 |                 m.act = Hardswish()
49 |             elif isinstance(m.act, nn.SiLU):
50 |                 m.act = SiLU()
51 |         # elif isinstance(m, models.yolo.Detect):
52 |         #     m.forward = m.forward_export  # assign forward (optional)
53 |     model.model[-1].export = True  # set Detect() layer export=True
54 |     y = model(img)  # dry run
55 | 
56 |     # TorchScript export
57 |     try:
58 |         print('\nStarting TorchScript export with torch %s...' % torch.__version__)
59 |         f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
60 |         ts = torch.jit.trace(model, img)
61 |         ts.save(f)
62 |         print('TorchScript export success, saved as %s' % f)
63 |     except Exception as e:
64 |         print('TorchScript export failure: %s' % e)
65 | 
66 |     # ONNX export
67 |     try:
68 |         import onnx
69 | 
70 |         print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
71 |         f = opt.weights.replace('.pt', '.onnx')  # filename
72 |         torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
73 |                           output_names=['classes', 'boxes'] if y is None else ['output'])
74 | 
75 |         # Checks
76 |         onnx_model = onnx.load(f)  # load onnx model
77 |         onnx.checker.check_model(onnx_model)  # check onnx model
78 |         # print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
79 |         print('ONNX export success, saved as %s' % f)
80 |     except Exception as e:
81 |         print('ONNX export failure: %s' % e)
82 | 
83 |     # CoreML export
84 |     try:
85 |         import coremltools as ct
86 | 
87 |         print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
88 |         # convert model from torchscript and apply pixel scaling as per detect.py
89 |         model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
90 |         f = opt.weights.replace('.pt', '.mlmodel')  # filename
91 |         model.save(f)
92 |         print('CoreML export success, saved as %s' % f)
93 |     except Exception as e:
94 |         print('CoreML export failure: %s' % e)
95 | 
96 |     # Finish
97 |     print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))
98 | 


--------------------------------------------------------------------------------
/models/hub/yolov3-spp.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # darknet53 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
16 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
17 |    [-1, 1, Bottleneck, [64]],
18 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
19 |    [-1, 2, Bottleneck, [128]],
20 |    [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
21 |    [-1, 8, Bottleneck, [256]],
22 |    [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
23 |    [-1, 8, Bottleneck, [512]],
24 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
25 |    [-1, 4, Bottleneck, [1024]],  # 10
26 |   ]
27 | 
28 | # YOLOv3-SPP head
29 | head:
30 |   [[-1, 1, Bottleneck, [1024, False]],
31 |    [-1, 1, SPP, [512, [5, 9, 13]]],
32 |    [-1, 1, Conv, [1024, 3, 1]],
33 |    [-1, 1, Conv, [512, 1, 1]],
34 |    [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
35 | 
36 |    [-2, 1, Conv, [256, 1, 1]],
37 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P4
39 |    [-1, 1, Bottleneck, [512, False]],
40 |    [-1, 1, Bottleneck, [512, False]],
41 |    [-1, 1, Conv, [256, 1, 1]],
42 |    [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
43 | 
44 |    [-2, 1, Conv, [128, 1, 1]],
45 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P3
47 |    [-1, 1, Bottleneck, [256, False]],
48 |    [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
49 | 
50 |    [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
51 |   ]
52 | 


--------------------------------------------------------------------------------
/models/hub/yolov3-tiny.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,14, 23,27, 37,58]  # P4/16
 9 |   - [81,82, 135,169, 344,319]  # P5/32
10 | 
11 | # YOLOv3-tiny backbone
12 | backbone:
13 |   # [from, number, module, args]
14 |   [[-1, 1, Conv, [16, 3, 1]],  # 0
15 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 1-P1/2
16 |    [-1, 1, Conv, [32, 3, 1]],
17 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 3-P2/4
18 |    [-1, 1, Conv, [64, 3, 1]],
19 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 5-P3/8
20 |    [-1, 1, Conv, [128, 3, 1]],
21 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 7-P4/16
22 |    [-1, 1, Conv, [256, 3, 1]],
23 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 9-P5/32
24 |    [-1, 1, Conv, [512, 3, 1]],
25 |    [-1, 1, nn.ZeroPad2d, [0, 1, 0, 1]],  # 11
26 |    [-1, 1, nn.MaxPool2d, [2, 1, 0]],  # 12
27 |   ]
28 | 
29 | # YOLOv3-tiny head
30 | head:
31 |   [[-1, 1, Conv, [1024, 3, 1]],
32 |    [-1, 1, Conv, [256, 1, 1]],
33 |    [-1, 1, Conv, [512, 3, 1]],  # 15 (P5/32-large)
34 | 
35 |    [-2, 1, Conv, [128, 1, 1]],
36 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P4
38 |    [-1, 1, Conv, [256, 3, 1]],  # 19 (P4/16-medium)
39 | 
40 |    [[19, 15], 1, Detect, [nc, anchors]],  # Detect(P4, P5)
41 |   ]
42 | 


--------------------------------------------------------------------------------
/models/hub/yolov3.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # darknet53 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
16 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
17 |    [-1, 1, Bottleneck, [64]],
18 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
19 |    [-1, 2, Bottleneck, [128]],
20 |    [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
21 |    [-1, 8, Bottleneck, [256]],
22 |    [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
23 |    [-1, 8, Bottleneck, [512]],
24 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
25 |    [-1, 4, Bottleneck, [1024]],  # 10
26 |   ]
27 | 
28 | # YOLOv3 head
29 | head:
30 |   [[-1, 1, Bottleneck, [1024, False]],
31 |    [-1, 1, Conv, [512, [1, 1]]],
32 |    [-1, 1, Conv, [1024, 3, 1]],
33 |    [-1, 1, Conv, [512, 1, 1]],
34 |    [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
35 | 
36 |    [-2, 1, Conv, [256, 1, 1]],
37 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P4
39 |    [-1, 1, Bottleneck, [512, False]],
40 |    [-1, 1, Bottleneck, [512, False]],
41 |    [-1, 1, Conv, [256, 1, 1]],
42 |    [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
43 | 
44 |    [-2, 1, Conv, [128, 1, 1]],
45 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P3
47 |    [-1, 1, Bottleneck, [256, False]],
48 |    [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
49 | 
50 |    [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
51 |   ]
52 | 


--------------------------------------------------------------------------------
/models/hub/yolov5-fpn.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, Bottleneck, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 6, BottleneckCSP, [1024]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 FPN head
28 | head:
29 |   [[-1, 3, BottleneckCSP, [1024, False]],  # 10 (P5/32-large)
30 | 
31 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
33 |    [-1, 1, Conv, [512, 1, 1]],
34 |    [-1, 3, BottleneckCSP, [512, False]],  # 14 (P4/16-medium)
35 | 
36 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
38 |    [-1, 1, Conv, [256, 1, 1]],
39 |    [-1, 3, BottleneckCSP, [256, False]],  # 18 (P3/8-small)
40 | 
41 |    [[18, 14, 10], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
42 |   ]
43 | 


--------------------------------------------------------------------------------
/models/hub/yolov5-panet.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 PANet head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/models/yolov5l.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/models/yolov5m.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 0.67  # model depth multiple
 4 | width_multiple: 0.75  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/models/yolov5s.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 0.33  # model depth multiple
 4 | width_multiple: 0.50  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/models/yolov5x.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.33  # model depth multiple
 4 | width_multiple: 1.25  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # pip install -r requirements.txt
 2 | 
 3 | # base ----------------------------------------
 4 | Cython
 5 | matplotlib>=3.2.2
 6 | numpy>=1.18.5
 7 | opencv-python>=4.1.2
 8 | Pillow
 9 | PyYAML>=5.3
10 | scipy>=1.4.1
11 | tensorboard>=2.2
12 | torch>=1.7.0
13 | torchvision>=0.8.1
14 | tqdm>=4.41.0
15 | requests==2.26.0
16 | pyyaml==5.4.1
17 | pandas==1.3.2
18 | 
19 | # plotting ------------------------------------
20 | seaborn>=0.11.0
21 | pandas
22 | 
23 | # clip
24 | ftfy==6.0.3
25 | regex==2.5.86


--------------------------------------------------------------------------------
/tool/region_loss.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | from tool.torch_utils import *
  4 | 
  5 | 
  6 | def build_targets(pred_boxes, target, anchors, num_anchors, num_classes, nH, nW, noobject_scale, object_scale,
  7 |                   sil_thresh, seen):
  8 |     nB = target.size(0)
  9 |     nA = num_anchors
 10 |     nC = num_classes
 11 |     anchor_step = len(anchors) / num_anchors
 12 |     conf_mask = torch.ones(nB, nA, nH, nW) * noobject_scale
 13 |     coord_mask = torch.zeros(nB, nA, nH, nW)
 14 |     cls_mask = torch.zeros(nB, nA, nH, nW)
 15 |     tx = torch.zeros(nB, nA, nH, nW)
 16 |     ty = torch.zeros(nB, nA, nH, nW)
 17 |     tw = torch.zeros(nB, nA, nH, nW)
 18 |     th = torch.zeros(nB, nA, nH, nW)
 19 |     tconf = torch.zeros(nB, nA, nH, nW)
 20 |     tcls = torch.zeros(nB, nA, nH, nW)
 21 | 
 22 |     nAnchors = nA * nH * nW
 23 |     nPixels = nH * nW
 24 |     for b in range(nB):
 25 |         cur_pred_boxes = pred_boxes[b * nAnchors:(b + 1) * nAnchors].t()
 26 |         cur_ious = torch.zeros(nAnchors)
 27 |         for t in range(50):
 28 |             if target[b][t * 5 + 1] == 0:
 29 |                 break
 30 |             gx = target[b][t * 5 + 1] * nW
 31 |             gy = target[b][t * 5 + 2] * nH
 32 |             gw = target[b][t * 5 + 3] * nW
 33 |             gh = target[b][t * 5 + 4] * nH
 34 |             cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors, 1).t()
 35 |             cur_ious = torch.max(cur_ious, bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False))
 36 |         conf_mask[b][cur_ious > sil_thresh] = 0
 37 |     if seen < 12800:
 38 |         if anchor_step == 4:
 39 |             tx = torch.FloatTensor(anchors).view(nA, anchor_step).index_select(1, torch.LongTensor([2])).view(1, nA, 1,
 40 |                                                                                                               1).repeat(
 41 |                 nB, 1, nH, nW)
 42 |             ty = torch.FloatTensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([2])).view(
 43 |                 1, nA, 1, 1).repeat(nB, 1, nH, nW)
 44 |         else:
 45 |             tx.fill_(0.5)
 46 |             ty.fill_(0.5)
 47 |         tw.zero_()
 48 |         th.zero_()
 49 |         coord_mask.fill_(1)
 50 | 
 51 |     nGT = 0
 52 |     nCorrect = 0
 53 |     for b in range(nB):
 54 |         for t in range(50):
 55 |             if target[b][t * 5 + 1] == 0:
 56 |                 break
 57 |             nGT = nGT + 1
 58 |             best_iou = 0.0
 59 |             best_n = -1
 60 |             min_dist = 10000
 61 |             gx = target[b][t * 5 + 1] * nW
 62 |             gy = target[b][t * 5 + 2] * nH
 63 |             gi = int(gx)
 64 |             gj = int(gy)
 65 |             gw = target[b][t * 5 + 3] * nW
 66 |             gh = target[b][t * 5 + 4] * nH
 67 |             gt_box = [0, 0, gw, gh]
 68 |             for n in range(nA):
 69 |                 aw = anchors[anchor_step * n]
 70 |                 ah = anchors[anchor_step * n + 1]
 71 |                 anchor_box = [0, 0, aw, ah]
 72 |                 iou = bbox_iou(anchor_box, gt_box, x1y1x2y2=False)
 73 |                 if anchor_step == 4:
 74 |                     ax = anchors[anchor_step * n + 2]
 75 |                     ay = anchors[anchor_step * n + 3]
 76 |                     dist = pow(((gi + ax) - gx), 2) + pow(((gj + ay) - gy), 2)
 77 |                 if iou > best_iou:
 78 |                     best_iou = iou
 79 |                     best_n = n
 80 |                 elif anchor_step == 4 and iou == best_iou and dist < min_dist:
 81 |                     best_iou = iou
 82 |                     best_n = n
 83 |                     min_dist = dist
 84 | 
 85 |             gt_box = [gx, gy, gw, gh]
 86 |             pred_box = pred_boxes[b * nAnchors + best_n * nPixels + gj * nW + gi]
 87 | 
 88 |             coord_mask[b][best_n][gj][gi] = 1
 89 |             cls_mask[b][best_n][gj][gi] = 1
 90 |             conf_mask[b][best_n][gj][gi] = object_scale
 91 |             tx[b][best_n][gj][gi] = target[b][t * 5 + 1] * nW - gi
 92 |             ty[b][best_n][gj][gi] = target[b][t * 5 + 2] * nH - gj
 93 |             tw[b][best_n][gj][gi] = math.log(gw / anchors[anchor_step * best_n])
 94 |             th[b][best_n][gj][gi] = math.log(gh / anchors[anchor_step * best_n + 1])
 95 |             iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)  # best_iou
 96 |             tconf[b][best_n][gj][gi] = iou
 97 |             tcls[b][best_n][gj][gi] = target[b][t * 5]
 98 |             if iou > 0.5:
 99 |                 nCorrect = nCorrect + 1
100 | 
101 |     return nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls
102 | 
103 | 
104 | class RegionLoss(nn.Module):
105 |     def __init__(self, num_classes=0, anchors=[], num_anchors=1):
106 |         super(RegionLoss, self).__init__()
107 |         self.num_classes = num_classes
108 |         self.anchors = anchors
109 |         self.num_anchors = num_anchors
110 |         self.anchor_step = len(anchors) / num_anchors
111 |         self.coord_scale = 1
112 |         self.noobject_scale = 1
113 |         self.object_scale = 5
114 |         self.class_scale = 1
115 |         self.thresh = 0.6
116 |         self.seen = 0
117 | 
118 |     def forward(self, output, target):
119 |         # output : BxAs*(4+1+num_classes)*H*W
120 |         t0 = time.time()
121 |         nB = output.data.size(0)
122 |         nA = self.num_anchors
123 |         nC = self.num_classes
124 |         nH = output.data.size(2)
125 |         nW = output.data.size(3)
126 | 
127 |         output = output.view(nB, nA, (5 + nC), nH, nW)
128 |         x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
129 |         y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
130 |         w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
131 |         h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
132 |         conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
133 |         cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda()))
134 |         cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC)
135 |         t1 = time.time()
136 | 
137 |         pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW)
138 |         grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
139 |         grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
140 |         anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
141 |         anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
142 |         anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
143 |         anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
144 |         pred_boxes[0] = x.data + grid_x
145 |         pred_boxes[1] = y.data + grid_y
146 |         pred_boxes[2] = torch.exp(w.data) * anchor_w
147 |         pred_boxes[3] = torch.exp(h.data) * anchor_h
148 |         pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4))
149 |         t2 = time.time()
150 | 
151 |         nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes,
152 |                                                                                                     target.data,
153 |                                                                                                     self.anchors, nA,
154 |                                                                                                     nC, \
155 |                                                                                                     nH, nW,
156 |                                                                                                     self.noobject_scale,
157 |                                                                                                     self.object_scale,
158 |                                                                                                     self.thresh,
159 |                                                                                                     self.seen)
160 |         cls_mask = (cls_mask == 1)
161 |         nProposals = int((conf > 0.25).sum().data[0])
162 | 
163 |         tx = Variable(tx.cuda())
164 |         ty = Variable(ty.cuda())
165 |         tw = Variable(tw.cuda())
166 |         th = Variable(th.cuda())
167 |         tconf = Variable(tconf.cuda())
168 |         tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())
169 | 
170 |         coord_mask = Variable(coord_mask.cuda())
171 |         conf_mask = Variable(conf_mask.cuda().sqrt())
172 |         cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())
173 |         cls = cls[cls_mask].view(-1, nC)
174 | 
175 |         t3 = time.time()
176 | 
177 |         loss_x = self.coord_scale * nn.MSELoss(reduction='sum')(x * coord_mask, tx * coord_mask) / 2.0
178 |         loss_y = self.coord_scale * nn.MSELoss(reduction='sum')(y * coord_mask, ty * coord_mask) / 2.0
179 |         loss_w = self.coord_scale * nn.MSELoss(reduction='sum')(w * coord_mask, tw * coord_mask) / 2.0
180 |         loss_h = self.coord_scale * nn.MSELoss(reduction='sum')(h * coord_mask, th * coord_mask) / 2.0
181 |         loss_conf = nn.MSELoss(reduction='sum')(conf * conf_mask, tconf * conf_mask) / 2.0
182 |         loss_cls = self.class_scale * nn.CrossEntropyLoss(reduction='sum')(cls, tcls)
183 |         loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
184 |         t4 = time.time()
185 |         if False:
186 |             print('-----------------------------------')
187 |             print('        activation : %f' % (t1 - t0))
188 |             print(' create pred_boxes : %f' % (t2 - t1))
189 |             print('     build targets : %f' % (t3 - t2))
190 |             print('       create loss : %f' % (t4 - t3))
191 |             print('             total : %f' % (t4 - t0))
192 |         print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (
193 |         self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],
194 |         loss_conf.data[0], loss_cls.data[0], loss.data[0]))
195 |         return loss
196 | 


--------------------------------------------------------------------------------
/tool/torch_utils.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import time
  4 | import math
  5 | import torch
  6 | import numpy as np
  7 | from torch.autograd import Variable
  8 | 
  9 | import itertools
 10 | import struct  # get_image_size
 11 | import imghdr  # get_image_size
 12 | 
 13 | from tool import utils
 14 | 
 15 | 
 16 | def bbox_ious(boxes1, boxes2, x1y1x2y2=True):
 17 |     if x1y1x2y2:
 18 |         mx = torch.min(boxes1[0], boxes2[0])
 19 |         Mx = torch.max(boxes1[2], boxes2[2])
 20 |         my = torch.min(boxes1[1], boxes2[1])
 21 |         My = torch.max(boxes1[3], boxes2[3])
 22 |         w1 = boxes1[2] - boxes1[0]
 23 |         h1 = boxes1[3] - boxes1[1]
 24 |         w2 = boxes2[2] - boxes2[0]
 25 |         h2 = boxes2[3] - boxes2[1]
 26 |     else:
 27 |         mx = torch.min(boxes1[0] - boxes1[2] / 2.0, boxes2[0] - boxes2[2] / 2.0)
 28 |         Mx = torch.max(boxes1[0] + boxes1[2] / 2.0, boxes2[0] + boxes2[2] / 2.0)
 29 |         my = torch.min(boxes1[1] - boxes1[3] / 2.0, boxes2[1] - boxes2[3] / 2.0)
 30 |         My = torch.max(boxes1[1] + boxes1[3] / 2.0, boxes2[1] + boxes2[3] / 2.0)
 31 |         w1 = boxes1[2]
 32 |         h1 = boxes1[3]
 33 |         w2 = boxes2[2]
 34 |         h2 = boxes2[3]
 35 |     uw = Mx - mx
 36 |     uh = My - my
 37 |     cw = w1 + w2 - uw
 38 |     ch = h1 + h2 - uh
 39 |     mask = ((cw <= 0) + (ch <= 0) > 0)
 40 |     area1 = w1 * h1
 41 |     area2 = w2 * h2
 42 |     carea = cw * ch
 43 |     carea[mask] = 0
 44 |     uarea = area1 + area2 - carea
 45 |     return carea / uarea
 46 | 
 47 | 
 48 | def get_region_boxes(boxes_and_confs):
 49 | 
 50 |     # print('Getting boxes from boxes and confs ...')
 51 | 
 52 |     boxes_list = []
 53 |     confs_list = []
 54 | 
 55 |     for item in boxes_and_confs:
 56 |         boxes_list.append(item[0])
 57 |         confs_list.append(item[1])
 58 | 
 59 |     # boxes: [batch, num1 + num2 + num3, 1, 4]
 60 |     # confs: [batch, num1 + num2 + num3, num_classes]
 61 |     boxes = torch.cat(boxes_list, dim=1)
 62 |     confs = torch.cat(confs_list, dim=1)
 63 |         
 64 |     return [boxes, confs]
 65 | 
 66 | 
 67 | def convert2cpu(gpu_matrix):
 68 |     return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
 69 | 
 70 | 
 71 | def convert2cpu_long(gpu_matrix):
 72 |     return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
 73 | 
 74 | 
 75 | 
 76 | def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=1):
 77 |     model.eval()
 78 |     t0 = time.time()
 79 | 
 80 |     if type(img) == np.ndarray and len(img.shape) == 3:  # cv2 image
 81 |         img = torch.from_numpy(img.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
 82 |     elif type(img) == np.ndarray and len(img.shape) == 4:
 83 |         img = torch.from_numpy(img.transpose(0, 3, 1, 2)).float().div(255.0)
 84 |     else:
 85 |         print("unknow image type")
 86 |         exit(-1)
 87 | 
 88 |     if use_cuda:
 89 |         img = img.cuda()
 90 |     img = torch.autograd.Variable(img)
 91 |     
 92 |     t1 = time.time()
 93 | 
 94 |     output = model(img)
 95 | 
 96 |     t2 = time.time()
 97 | 
 98 |     print('-----------------------------------')
 99 |     print('           Preprocess : %f' % (t1 - t0))
100 |     print('      Model Inference : %f' % (t2 - t1))
101 |     print('-----------------------------------')
102 | 
103 |     return utils.post_processing(img, conf_thresh, nms_thresh, output)
104 | 
105 | 


--------------------------------------------------------------------------------
/tool/utils.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import time
  4 | import math
  5 | import numpy as np
  6 | 
  7 | import itertools
  8 | import struct  # get_image_size
  9 | import imghdr  # get_image_size
 10 | 
 11 | 
 12 | def sigmoid(x):
 13 |     return 1.0 / (np.exp(-x) + 1.)
 14 | 
 15 | 
 16 | def softmax(x):
 17 |     x = np.exp(x - np.expand_dims(np.max(x, axis=1), axis=1))
 18 |     x = x / np.expand_dims(x.sum(axis=1), axis=1)
 19 |     return x
 20 | 
 21 | 
 22 | def bbox_iou(box1, box2, x1y1x2y2=True):
 23 |     
 24 |     # print('iou box1:', box1)
 25 |     # print('iou box2:', box2)
 26 | 
 27 |     if x1y1x2y2:
 28 |         mx = min(box1[0], box2[0])
 29 |         Mx = max(box1[2], box2[2])
 30 |         my = min(box1[1], box2[1])
 31 |         My = max(box1[3], box2[3])
 32 |         w1 = box1[2] - box1[0]
 33 |         h1 = box1[3] - box1[1]
 34 |         w2 = box2[2] - box2[0]
 35 |         h2 = box2[3] - box2[1]
 36 |     else:
 37 |         w1 = box1[2]
 38 |         h1 = box1[3]
 39 |         w2 = box2[2]
 40 |         h2 = box2[3]
 41 | 
 42 |         mx = min(box1[0], box2[0])
 43 |         Mx = max(box1[0] + w1, box2[0] + w2)
 44 |         my = min(box1[1], box2[1])
 45 |         My = max(box1[1] + h1, box2[1] + h2)
 46 |     uw = Mx - mx
 47 |     uh = My - my
 48 |     cw = w1 + w2 - uw
 49 |     ch = h1 + h2 - uh
 50 |     carea = 0
 51 |     if cw <= 0 or ch <= 0:
 52 |         return 0.0
 53 | 
 54 |     area1 = w1 * h1
 55 |     area2 = w2 * h2
 56 |     carea = cw * ch
 57 |     uarea = area1 + area2 - carea
 58 |     return carea / uarea
 59 | 
 60 | 
 61 | def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False):
 62 |     # print(boxes.shape)
 63 |     x1 = boxes[:, 0]
 64 |     y1 = boxes[:, 1]
 65 |     x2 = boxes[:, 2]
 66 |     y2 = boxes[:, 3]
 67 | 
 68 |     areas = (x2 - x1) * (y2 - y1)
 69 |     order = confs.argsort()[::-1]
 70 | 
 71 |     keep = []
 72 |     while order.size > 0:
 73 |         idx_self = order[0]
 74 |         idx_other = order[1:]
 75 | 
 76 |         keep.append(idx_self)
 77 | 
 78 |         xx1 = np.maximum(x1[idx_self], x1[idx_other])
 79 |         yy1 = np.maximum(y1[idx_self], y1[idx_other])
 80 |         xx2 = np.minimum(x2[idx_self], x2[idx_other])
 81 |         yy2 = np.minimum(y2[idx_self], y2[idx_other])
 82 | 
 83 |         w = np.maximum(0.0, xx2 - xx1)
 84 |         h = np.maximum(0.0, yy2 - yy1)
 85 |         inter = w * h
 86 | 
 87 |         if min_mode:
 88 |             over = inter / np.minimum(areas[order[0]], areas[order[1:]])
 89 |         else:
 90 |             over = inter / (areas[order[0]] + areas[order[1:]] - inter)
 91 | 
 92 |         inds = np.where(over <= nms_thresh)[0]
 93 |         order = order[inds + 1]
 94 |     
 95 |     return np.array(keep)
 96 | 
 97 | 
 98 | 
 99 | def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None):
100 |     import cv2
101 |     img = np.copy(img)
102 |     colors = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.float32)
103 | 
104 |     def get_color(c, x, max_val):
105 |         ratio = float(x) / max_val * 5
106 |         i = int(math.floor(ratio))
107 |         j = int(math.ceil(ratio))
108 |         ratio = ratio - i
109 |         r = (1 - ratio) * colors[i][c] + ratio * colors[j][c]
110 |         return int(r * 255)
111 | 
112 |     width = img.shape[1]
113 |     height = img.shape[0]
114 |     for i in range(len(boxes)):
115 |         box = boxes[i]
116 |         x1 = int(box[0] * width)
117 |         y1 = int(box[1] * height)
118 |         x2 = int(box[2] * width)
119 |         y2 = int(box[3] * height)
120 | 
121 |         if color:
122 |             rgb = color
123 |         else:
124 |             rgb = (255, 0, 0)
125 |         if len(box) >= 7 and class_names:
126 |             cls_conf = box[5]
127 |             cls_id = box[6]
128 |             print('%s: %f' % (class_names[cls_id], cls_conf))
129 |             classes = len(class_names)
130 |             offset = cls_id * 123457 % classes
131 |             red = get_color(2, offset, classes)
132 |             green = get_color(1, offset, classes)
133 |             blue = get_color(0, offset, classes)
134 |             if color is None:
135 |                 rgb = (red, green, blue)
136 |             img = cv2.putText(img, class_names[cls_id], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.2, rgb, 1)
137 |         img = cv2.rectangle(img, (x1, y1), (x2, y2), rgb, 1)
138 |     if savename:
139 |         print("save plot results to %s" % savename)
140 |         cv2.imwrite(savename, img)
141 |     return img
142 | 
143 | 
144 | def read_truths(lab_path):
145 |     if not os.path.exists(lab_path):
146 |         return np.array([])
147 |     if os.path.getsize(lab_path):
148 |         truths = np.loadtxt(lab_path)
149 |         truths = truths.reshape(truths.size / 5, 5)  # to avoid single truth problem
150 |         return truths
151 |     else:
152 |         return np.array([])
153 | 
154 | 
155 | def load_class_names(namesfile):
156 |     class_names = []
157 |     with open(namesfile, 'r') as fp:
158 |         lines = fp.readlines()
159 |     for line in lines:
160 |         line = line.rstrip()
161 |         class_names.append(line)
162 |     return class_names
163 | 
164 | 
165 | 
166 | def post_processing(img, conf_thresh, nms_thresh, output):
167 | 
168 |     # anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401]
169 |     # num_anchors = 9
170 |     # anchor_masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
171 |     # strides = [8, 16, 32]
172 |     # anchor_step = len(anchors) // num_anchors
173 | 
174 |     # [batch, num, 1, 4]
175 |     box_array = output[0]
176 |     # [batch, num, num_classes]
177 |     confs = output[1]
178 | 
179 |     t1 = time.time()
180 | 
181 |     if type(box_array).__name__ != 'ndarray':
182 |         box_array = box_array.cpu().detach().numpy()
183 |         confs = confs.cpu().detach().numpy()
184 | 
185 |     num_classes = confs.shape[2]
186 | 
187 |     # [batch, num, 4]
188 |     box_array = box_array[:, :, 0]
189 | 
190 |     # [batch, num, num_classes] --> [batch, num]
191 |     max_conf = np.max(confs, axis=2)
192 |     max_id = np.argmax(confs, axis=2)
193 | 
194 |     t2 = time.time()
195 | 
196 |     bboxes_batch = []
197 |     for i in range(box_array.shape[0]):
198 |        
199 |         argwhere = max_conf[i] > conf_thresh
200 |         l_box_array = box_array[i, argwhere, :]
201 |         l_max_conf = max_conf[i, argwhere]
202 |         l_max_id = max_id[i, argwhere]
203 | 
204 |         bboxes = []
205 |         # nms for each class
206 |         for j in range(num_classes):
207 | 
208 |             cls_argwhere = l_max_id == j
209 |             ll_box_array = l_box_array[cls_argwhere, :]
210 |             ll_max_conf = l_max_conf[cls_argwhere]
211 |             ll_max_id = l_max_id[cls_argwhere]
212 | 
213 |             keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh)
214 |             
215 |             if (keep.size > 0):
216 |                 ll_box_array = ll_box_array[keep, :]
217 |                 ll_max_conf = ll_max_conf[keep]
218 |                 ll_max_id = ll_max_id[keep]
219 | 
220 |                 for k in range(ll_box_array.shape[0]):
221 |                     bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]])
222 |         
223 |         bboxes_batch.append(bboxes)
224 | 
225 |     t3 = time.time()
226 | 
227 |     print('-----------------------------------')
228 |     print('       max and argmax : %f' % (t2 - t1))
229 |     print('                  nms : %f' % (t3 - t2))
230 |     print('Post processing total : %f' % (t3 - t1))
231 |     print('-----------------------------------')
232 |     
233 |     return bboxes_batch
234 | 


--------------------------------------------------------------------------------
/tool/utils_iou.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | '''
  3 | 
  4 | '''
  5 | import torch
  6 | import os, sys
  7 | from torch.nn import functional as F
  8 | 
  9 | import numpy as np
 10 | from packaging import version
 11 | 
 12 | 
 13 | __all__ = [
 14 |     "bboxes_iou",
 15 |     "bboxes_giou",
 16 |     "bboxes_diou",
 17 |     "bboxes_ciou",
 18 | ]
 19 | 
 20 | 
 21 | if version.parse(torch.__version__) >= version.parse('1.5.0'):
 22 |     def _true_divide(dividend, divisor):
 23 |         return torch.true_divide(dividend, divisor)
 24 | else:
 25 |     def _true_divide(dividend, divisor):
 26 |         return dividend / divisor
 27 | 
 28 | def bboxes_iou(bboxes_a, bboxes_b, fmt='voc', iou_type='iou'):
 29 |     """Calculate the Intersection of Unions (IoUs) between bounding boxes.
 30 |     IoU is calculated as a ratio of area of the intersection
 31 |     and area of the union.
 32 | 
 33 |     Args:
 34 |         bbox_a (array): An array whose shape is :math:`(N, 4)`.
 35 |             :math:`N` is the number of bounding boxes.
 36 |             The dtype should be :obj:`numpy.float32`.
 37 |         bbox_b (array): An array similar to :obj:`bbox_a`,
 38 |             whose shape is :math:`(K, 4)`.
 39 |             The dtype should be :obj:`numpy.float32`.
 40 |     Returns:
 41 |         array:
 42 |         An array whose shape is :math:`(N, K)`. \
 43 |         An element at index :math:`(n, k)` contains IoUs between \
 44 |         :math:`n` th bounding box in :obj:`bbox_a` and :math:`k` th bounding \
 45 |         box in :obj:`bbox_b`.
 46 | 
 47 |     from: https://github.com/chainer/chainercv
 48 |     """
 49 |     if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
 50 |         raise IndexError
 51 | 
 52 |     N, K = bboxes_a.shape[0], bboxes_b.shape[0]
 53 | 
 54 |     if fmt.lower() == 'voc':  # xmin, ymin, xmax, ymax
 55 |         # top left
 56 |         tl_intersect = torch.max(
 57 |             bboxes_a[:, np.newaxis, :2],
 58 |             bboxes_b[:, :2]
 59 |         ) # of shape `(N,K,2)`
 60 |         # bottom right
 61 |         br_intersect = torch.min(
 62 |             bboxes_a[:, np.newaxis, 2:],
 63 |             bboxes_b[:, 2:]
 64 |         )
 65 |         bb_a = bboxes_a[:, 2:] - bboxes_a[:, :2]
 66 |         bb_b = bboxes_b[:, 2:] - bboxes_b[:, :2]
 67 |         # bb_* can also be seen vectors representing box_width, box_height
 68 |     elif fmt.lower() == 'yolo':  # xcen, ycen, w, h
 69 |         # top left
 70 |         tl_intersect = torch.max(
 71 |             bboxes_a[:, np.newaxis, :2] - bboxes_a[:, np.newaxis, 2:] / 2,
 72 |             bboxes_b[:, :2] - bboxes_b[:, 2:] / 2
 73 |         )
 74 |         # bottom right
 75 |         br_intersect = torch.min(
 76 |             bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:] / 2,
 77 |             bboxes_b[:, :2] + bboxes_b[:, 2:] / 2
 78 |         )
 79 |         bb_a = bboxes_a[:, 2:]
 80 |         bb_b = bboxes_b[:, 2:]
 81 |     elif fmt.lower() == 'coco':  # xmin, ymin, w, h
 82 |         # top left
 83 |         tl_intersect = torch.max(
 84 |             bboxes_a[:, np.newaxis, :2],
 85 |             bboxes_b[:, :2]
 86 |         )
 87 |         # bottom right
 88 |         br_intersect = torch.min(
 89 |             bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:],
 90 |             bboxes_b[:, :2] + bboxes_b[:, 2:]
 91 |         )
 92 |         bb_a = bboxes_a[:, 2:]
 93 |         bb_b = bboxes_b[:, 2:]
 94 |     
 95 |     area_a = torch.prod(bb_a, 1)
 96 |     area_b = torch.prod(bb_b, 1)
 97 |     
 98 |     # torch.prod(input, dim, keepdim=False, dtype=None) → Tensor
 99 |     # Returns the product of each row of the input tensor in the given dimension dim
100 |     # if tl, br does not form a nondegenerate squre, then the corr. element in the `prod` would be 0
101 |     en = (tl_intersect < br_intersect).type(tl_intersect.type()).prod(dim=2)  # shape `(N,K,2)` ---> shape `(N,K)`
102 | 
103 |     area_intersect = torch.prod(br_intersect - tl_intersect, 2) * en  # * ((tl < br).all())
104 |     area_union = (area_a[:, np.newaxis] + area_b - area_intersect)
105 | 
106 |     iou = _true_divide(area_intersect, area_union)
107 | 
108 |     if iou_type.lower() == 'iou':
109 |         return iou
110 | 
111 |     if fmt.lower() == 'voc':  # xmin, ymin, xmax, ymax
112 |         # top left
113 |         tl_union = torch.min(
114 |             bboxes_a[:, np.newaxis, :2],
115 |             bboxes_b[:, :2]
116 |         ) # of shape `(N,K,2)`
117 |         # bottom right
118 |         br_union = torch.max(
119 |             bboxes_a[:, np.newaxis, 2:],
120 |             bboxes_b[:, 2:]
121 |         )
122 |     elif fmt.lower() == 'yolo':  # xcen, ycen, w, h
123 |         # top left
124 |         tl_union = torch.min(
125 |             bboxes_a[:, np.newaxis, :2] - bboxes_a[:, np.newaxis, 2:] / 2,
126 |             bboxes_b[:, :2] - bboxes_b[:, 2:] / 2
127 |         )
128 |         # bottom right
129 |         br_union = torch.max(
130 |             bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:] / 2,
131 |             bboxes_b[:, :2] + bboxes_b[:, 2:] / 2
132 |         )
133 |     elif fmt.lower() == 'coco':  # xmin, ymin, w, h
134 |         # top left
135 |         tl_union = torch.min(
136 |             bboxes_a[:, np.newaxis, :2],
137 |             bboxes_b[:, :2]
138 |         )
139 |         # bottom right
140 |         br_union = torch.max(
141 |             bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:],
142 |             bboxes_b[:, :2] + bboxes_b[:, 2:]
143 |         )
144 |     
145 |     # c for covering, of shape `(N,K,2)`
146 |     # the last dim is box width, box hight
147 |     bboxes_c = br_union - tl_union
148 | 
149 |     area_covering = torch.prod(bboxes_c, 2)  # shape `(N,K)`
150 | 
151 |     giou = iou - _true_divide(area_covering - area_union, area_covering)
152 | 
153 |     if iou_type.lower() == 'giou':
154 |         return giou
155 | 
156 |     if fmt.lower() == 'voc':  # xmin, ymin, xmax, ymax
157 |         centre_a = (bboxes_a[..., 2 :] + bboxes_a[..., : 2]) / 2
158 |         centre_b = (bboxes_b[..., 2 :] + bboxes_b[..., : 2]) / 2
159 |     elif fmt.lower() == 'yolo':  # xcen, ycen, w, h
160 |         centre_a = bboxes_a[..., : 2]
161 |         centre_b = bboxes_b[..., : 2]
162 |     elif fmt.lower() == 'coco':  # xmin, ymin, w, h
163 |         centre_a = bboxes_a[..., 2 :] + bboxes_a[..., : 2]/2
164 |         centre_b = bboxes_b[..., 2 :] + bboxes_b[..., : 2]/2
165 | 
166 |     centre_dist = torch.norm(centre_a[:, np.newaxis] - centre_b, p='fro', dim=2)
167 |     diag_len = torch.norm(bboxes_c, p='fro', dim=2)
168 | 
169 |     diou = iou - _true_divide(centre_dist.pow(2), diag_len.pow(2))
170 | 
171 |     if iou_type.lower() == 'diou':
172 |         return diou
173 | 
174 |     """ the legacy custom cosine similarity:
175 | 
176 |     # bb_a of shape `(N,2)`, bb_b of shape `(K,2)`
177 |     v = torch.einsum('nm,km->nk', bb_a, bb_b)
178 |     v = _true_divide(v, (torch.norm(bb_a, p='fro', dim=1)[:,np.newaxis] * torch.norm(bb_b, p='fro', dim=1)))
179 |     # avoid nan for torch.acos near \pm 1
180 |     # https://github.com/pytorch/pytorch/issues/8069
181 |     eps = 1e-7
182 |     v = torch.clamp(v, -1+eps, 1-eps)
183 |     """
184 |     v = F.cosine_similarity(bb_a[:,np.newaxis,:], bb_b, dim=-1)
185 |     v = (_true_divide(2*torch.acos(v), np.pi)).pow(2)
186 |     with torch.no_grad():
187 |         alpha = (_true_divide(v, 1-iou+v)) * ((iou>=0.5).type(iou.type()))
188 | 
189 |     ciou = diou - alpha * v
190 | 
191 |     if iou_type.lower() == 'ciou':
192 |         return ciou
193 | 
194 | 
195 | def bboxes_giou(bboxes_a, bboxes_b, fmt='voc'):
196 |     return bboxes_iou(bboxes_a, bboxes_b, fmt, 'giou')
197 | 
198 | 
199 | def bboxes_diou(bboxes_a, bboxes_b, fmt='voc'):
200 |     return bboxes_iou(bboxes_a, bboxes_b, fmt, 'diou')
201 | 
202 | 
203 | def bboxes_ciou(bboxes_a, bboxes_b, fmt='voc'):
204 |     return bboxes_iou(bboxes_a, bboxes_b, fmt, 'ciou')
205 | 


--------------------------------------------------------------------------------
/tools/freeze_model.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import argparse
  3 | import tensorflow as tf
  4 | import tensorflow.contrib.slim as slim
  5 | 
  6 | 
  7 | def _batch_norm_fn(x, scope=None):
  8 |     if scope is None:
  9 |         scope = tf.get_variable_scope().name + "/bn"
 10 |     return slim.batch_norm(x, scope=scope)
 11 | 
 12 | 
 13 | def create_link(
 14 |         incoming, network_builder, scope, nonlinearity=tf.nn.elu,
 15 |         weights_initializer=tf.truncated_normal_initializer(stddev=1e-3),
 16 |         regularizer=None, is_first=False, summarize_activations=True):
 17 |     if is_first:
 18 |         network = incoming
 19 |     else:
 20 |         network = _batch_norm_fn(incoming, scope=scope + "/bn")
 21 |         network = nonlinearity(network)
 22 |         if summarize_activations:
 23 |             tf.summary.histogram(scope+"/activations", network)
 24 | 
 25 |     pre_block_network = network
 26 |     post_block_network = network_builder(pre_block_network, scope)
 27 | 
 28 |     incoming_dim = pre_block_network.get_shape().as_list()[-1]
 29 |     outgoing_dim = post_block_network.get_shape().as_list()[-1]
 30 |     if incoming_dim != outgoing_dim:
 31 |         assert outgoing_dim == 2 * incoming_dim, \
 32 |             "%d != %d" % (outgoing_dim, 2 * incoming)
 33 |         projection = slim.conv2d(
 34 |             incoming, outgoing_dim, 1, 2, padding="SAME", activation_fn=None,
 35 |             scope=scope+"/projection", weights_initializer=weights_initializer,
 36 |             biases_initializer=None, weights_regularizer=regularizer)
 37 |         network = projection + post_block_network
 38 |     else:
 39 |         network = incoming + post_block_network
 40 |     return network
 41 | 
 42 | 
 43 | def create_inner_block(
 44 |         incoming, scope, nonlinearity=tf.nn.elu,
 45 |         weights_initializer=tf.truncated_normal_initializer(1e-3),
 46 |         bias_initializer=tf.zeros_initializer(), regularizer=None,
 47 |         increase_dim=False, summarize_activations=True):
 48 |     n = incoming.get_shape().as_list()[-1]
 49 |     stride = 1
 50 |     if increase_dim:
 51 |         n *= 2
 52 |         stride = 2
 53 | 
 54 |     incoming = slim.conv2d(
 55 |         incoming, n, [3, 3], stride, activation_fn=nonlinearity, padding="SAME",
 56 |         normalizer_fn=_batch_norm_fn, weights_initializer=weights_initializer,
 57 |         biases_initializer=bias_initializer, weights_regularizer=regularizer,
 58 |         scope=scope + "/1")
 59 |     if summarize_activations:
 60 |         tf.summary.histogram(incoming.name + "/activations", incoming)
 61 | 
 62 |     incoming = slim.dropout(incoming, keep_prob=0.6)
 63 | 
 64 |     incoming = slim.conv2d(
 65 |         incoming, n, [3, 3], 1, activation_fn=None, padding="SAME",
 66 |         normalizer_fn=None, weights_initializer=weights_initializer,
 67 |         biases_initializer=bias_initializer, weights_regularizer=regularizer,
 68 |         scope=scope + "/2")
 69 |     return incoming
 70 | 
 71 | 
 72 | def residual_block(incoming, scope, nonlinearity=tf.nn.elu,
 73 |                    weights_initializer=tf.truncated_normal_initializer(1e3),
 74 |                    bias_initializer=tf.zeros_initializer(), regularizer=None,
 75 |                    increase_dim=False, is_first=False,
 76 |                    summarize_activations=True):
 77 | 
 78 |     def network_builder(x, s):
 79 |         return create_inner_block(
 80 |             x, s, nonlinearity, weights_initializer, bias_initializer,
 81 |             regularizer, increase_dim, summarize_activations)
 82 | 
 83 |     return create_link(
 84 |         incoming, network_builder, scope, nonlinearity, weights_initializer,
 85 |         regularizer, is_first, summarize_activations)
 86 | 
 87 | 
 88 | def _create_network(incoming, reuse=None, weight_decay=1e-8):
 89 |     nonlinearity = tf.nn.elu
 90 |     conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
 91 |     conv_bias_init = tf.zeros_initializer()
 92 |     conv_regularizer = slim.l2_regularizer(weight_decay)
 93 |     fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
 94 |     fc_bias_init = tf.zeros_initializer()
 95 |     fc_regularizer = slim.l2_regularizer(weight_decay)
 96 | 
 97 |     def batch_norm_fn(x):
 98 |         return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn")
 99 | 
100 |     network = incoming
101 |     network = slim.conv2d(
102 |         network, 32, [3, 3], stride=1, activation_fn=nonlinearity,
103 |         padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_1",
104 |         weights_initializer=conv_weight_init, biases_initializer=conv_bias_init,
105 |         weights_regularizer=conv_regularizer)
106 |     network = slim.conv2d(
107 |         network, 32, [3, 3], stride=1, activation_fn=nonlinearity,
108 |         padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_2",
109 |         weights_initializer=conv_weight_init, biases_initializer=conv_bias_init,
110 |         weights_regularizer=conv_regularizer)
111 | 
112 |     # NOTE(nwojke): This is missing a padding="SAME" to match the CNN
113 |     # architecture in Table 1 of the paper. Information on how this affects
114 |     # performance on MOT 16 training sequences can be found in
115 |     # issue 10 https://github.com/nwojke/deep_sort/issues/10
116 |     network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1")
117 | 
118 |     network = residual_block(
119 |         network, "conv2_1", nonlinearity, conv_weight_init, conv_bias_init,
120 |         conv_regularizer, increase_dim=False, is_first=True)
121 |     network = residual_block(
122 |         network, "conv2_3", nonlinearity, conv_weight_init, conv_bias_init,
123 |         conv_regularizer, increase_dim=False)
124 | 
125 |     network = residual_block(
126 |         network, "conv3_1", nonlinearity, conv_weight_init, conv_bias_init,
127 |         conv_regularizer, increase_dim=True)
128 |     network = residual_block(
129 |         network, "conv3_3", nonlinearity, conv_weight_init, conv_bias_init,
130 |         conv_regularizer, increase_dim=False)
131 | 
132 |     network = residual_block(
133 |         network, "conv4_1", nonlinearity, conv_weight_init, conv_bias_init,
134 |         conv_regularizer, increase_dim=True)
135 |     network = residual_block(
136 |         network, "conv4_3", nonlinearity, conv_weight_init, conv_bias_init,
137 |         conv_regularizer, increase_dim=False)
138 | 
139 |     feature_dim = network.get_shape().as_list()[-1]
140 |     network = slim.flatten(network)
141 | 
142 |     network = slim.dropout(network, keep_prob=0.6)
143 |     network = slim.fully_connected(
144 |         network, feature_dim, activation_fn=nonlinearity,
145 |         normalizer_fn=batch_norm_fn, weights_regularizer=fc_regularizer,
146 |         scope="fc1", weights_initializer=fc_weight_init,
147 |         biases_initializer=fc_bias_init)
148 | 
149 |     features = network
150 | 
151 |     # Features in rows, normalize axis 1.
152 |     features = slim.batch_norm(features, scope="ball", reuse=reuse)
153 |     feature_norm = tf.sqrt(
154 |         tf.constant(1e-8, tf.float32) +
155 |         tf.reduce_sum(tf.square(features), [1], keepdims=True))
156 |     features = features / feature_norm
157 |     return features, None
158 | 
159 | 
160 | def _network_factory(weight_decay=1e-8):
161 | 
162 |     def factory_fn(image, reuse):
163 |             with slim.arg_scope([slim.batch_norm, slim.dropout],
164 |                                 is_training=False):
165 |                 with slim.arg_scope([slim.conv2d, slim.fully_connected,
166 |                                      slim.batch_norm, slim.layer_norm],
167 |                                     reuse=reuse):
168 |                     features, logits = _create_network(
169 |                         image, reuse=reuse, weight_decay=weight_decay)
170 |                     return features, logits
171 | 
172 |     return factory_fn
173 | 
174 | 
175 | def _preprocess(image):
176 |     image = image[:, :, ::-1]  # BGR to RGB
177 |     return image
178 | 
179 | 
180 | def parse_args():
181 |     """Parse command line arguments.
182 |     """
183 |     parser = argparse.ArgumentParser(description="Freeze old model")
184 |     parser.add_argument(
185 |         "--checkpoint_in",
186 |         default="resources/networks/mars-small128.ckpt-68577",
187 |         help="Path to checkpoint file")
188 |     parser.add_argument(
189 |         "--graphdef_out",
190 |         default="resources/networks/mars-small128.pb")
191 |     return parser.parse_args()
192 | 
193 | 
194 | def main():
195 |     args = parse_args()
196 | 
197 |     with tf.Session(graph=tf.Graph()) as session:
198 |         input_var = tf.placeholder(
199 |             tf.uint8, (None, 128, 64, 3), name="images")
200 |         image_var = tf.map_fn(
201 |             lambda x: _preprocess(x), tf.cast(input_var, tf.float32),
202 |             back_prop=False)
203 | 
204 |         factory_fn = _network_factory()
205 |         features, _ = factory_fn(image_var, reuse=None)
206 |         features = tf.identity(features, name="features")
207 | 
208 |         saver = tf.train.Saver(slim.get_variables_to_restore())
209 |         saver.restore(session, args.checkpoint_in)
210 | 
211 |         output_graph_def = tf.graph_util.convert_variables_to_constants(
212 |             session, tf.get_default_graph().as_graph_def(),
213 |             [features.name.split(":")[0]])
214 |         with tf.gfile.GFile(args.graphdef_out, "wb") as file_handle:
215 |             file_handle.write(output_graph_def.SerializeToString())
216 | 
217 | 
218 | if __name__ == "__main__":
219 |     main()
220 | 


--------------------------------------------------------------------------------
/tools/generate_clip_detections.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import os
  3 | import errno
  4 | import argparse
  5 | import numpy as np
  6 | import cv2
  7 | import torch
  8 | from PIL import Image
  9 | 
 10 | 
 11 | def _run_in_batches(f, data_dict, out, batch_size):
 12 |     data_len = len(out)
 13 |     num_batches = int(data_len / batch_size)
 14 | 
 15 |     s, e = 0, 0
 16 |     for i in range(num_batches):
 17 |         s, e = i * batch_size, (i + 1) * batch_size
 18 |         batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
 19 |         out[s:e] = f(batch_data_dict)
 20 |     if e < len(out):
 21 |         batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
 22 |         out[e:] = f(batch_data_dict)
 23 | 
 24 | 
 25 | def extract_image_patch(image, bbox, patch_shape=None):
 26 |     """Extract image patch from bounding box.
 27 | 
 28 |     Parameters
 29 |     ----------
 30 |     image : ndarray
 31 |         The full image.
 32 |     bbox : array_like
 33 |         The bounding box in format (x, y, width, height).
 34 |     patch_shape : Optional[array_like]
 35 |         This parameter can be used to enforce a desired patch shape
 36 |         (height, width). First, the `bbox` is adapted to the aspect ratio
 37 |         of the patch shape, then it is clipped at the image boundaries.
 38 |         If None, the shape is computed from :arg:`bbox`.
 39 | 
 40 |     Returns
 41 |     -------
 42 |     ndarray | NoneType
 43 |         An image patch showing the :arg:`bbox`, optionally reshaped to
 44 |         :arg:`patch_shape`.
 45 |         Returns None if the bounding box is empty or fully outside of the image
 46 |         boundaries.
 47 | 
 48 |     """
 49 |     bbox = np.array(bbox.cpu())
 50 |     if patch_shape is not None:
 51 |         # correct aspect ratio to patch shape
 52 |         target_aspect = float(patch_shape[1]) / patch_shape[0]
 53 |         new_width = target_aspect * bbox[3]
 54 |         bbox[0] -= (new_width - bbox[2]) / 2
 55 |         bbox[2] = new_width
 56 | 
 57 |     # convert to top left, bottom right
 58 |     bbox[2:] += bbox[:2]
 59 |     bbox = bbox.astype(np.int)
 60 | 
 61 |     # clip at image boundaries
 62 |     bbox[:2] = np.maximum(0, bbox[:2])
 63 |     bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
 64 |     if np.any(bbox[:2] >= bbox[2:]):
 65 |         return None
 66 |     sx, sy, ex, ey = bbox
 67 |     image = image[sy:ey, sx:ex]
 68 | 
 69 |     #image = cv2.resize(image, tuple(patch_shape[::-1]))
 70 |     return image
 71 | 
 72 | 
 73 | class ImageEncoder(object):
 74 | 
 75 |     def __init__(self, model, transform, device):
 76 | 
 77 |         
 78 |         self.model = model
 79 |         self.transform = transform
 80 |         self.device = device
 81 | 
 82 |     def __call__(self, data_x, batch_size=32):
 83 |         out = []
 84 |         #data_x = [i for i in data_x if i is not None]
 85 | 
 86 |         #print("[ZSOT ImageEncoder] num_none: {}".format(len(num_none)))
 87 |         for patch in range(len(data_x)):
 88 |             if self.device == "cpu":
 89 |                 img = self.transform(Image.fromarray(data_x[patch]))
 90 |             else:
 91 |                 img = self.transform(Image.fromarray(data_x[patch])).cuda()
 92 |             out.append(img)
 93 | 
 94 |         features = self.model.encode_image(torch.stack(out)).cpu().numpy()
 95 |         for idx, i in enumerate(features):
 96 |             if np.isnan(i[0]):
 97 |                 print("nan values")
 98 |                 # features[idx] = np.zeros(512)
 99 |                 # cv2.imshow("image", data_x[idx])
100 |                 # cv2.waitKey(0)
101 | 
102 |         return features
103 | 
104 | 
105 | def create_box_encoder(model, transform, batch_size=32, device="cpu"):
106 |     image_encoder = ImageEncoder(model, transform, device)
107 | 
108 |     def encoder(image, boxes):
109 |         image_patches = []
110 |         for box in boxes:
111 |             #print("extracting box {} from image {}".format(box, image.shape))
112 |             patch = extract_image_patch(image, box)
113 | 
114 |             if patch is None:
115 |                 print("WARNING: Failed to extract image patch: %s." % str(box))
116 |                 patch = np.random.uniform(
117 |                     0., 255., image.shape).astype(np.uint8)
118 |             image_patches.append(patch)
119 |         #image_patches = np.array(image_patches)
120 |         return image_encoder(image_patches, batch_size)
121 | 
122 |     return encoder
123 | 
124 | 
125 | def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
126 |     """Generate detections with features.
127 | 
128 |     Parameters
129 |     ----------
130 |     encoder : Callable[image, ndarray] -> ndarray
131 |         The encoder function takes as input a BGR color image and a matrix of
132 |         bounding boxes in format `(x, y, w, h)` and returns a matrix of
133 |         corresponding feature vectors.
134 |     mot_dir : str
135 |         Path to the MOTChallenge directory (can be either train or test).
136 |     output_dir
137 |         Path to the output directory. Will be created if it does not exist.
138 |     detection_dir
139 |         Path to custom detections. The directory structure should be the default
140 |         MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the
141 |         standard MOTChallenge detections.
142 | 
143 |     """
144 |     if detection_dir is None:
145 |         detection_dir = mot_dir
146 |     try:
147 |         os.makedirs(output_dir)
148 |     except OSError as exception:
149 |         if exception.errno == errno.EEXIST and os.path.isdir(output_dir):
150 |             pass
151 |         else:
152 |             raise ValueError(
153 |                 "Failed to created output directory '%s'" % output_dir)
154 | 
155 |     for sequence in os.listdir(mot_dir):
156 |         print("Processing %s" % sequence)
157 |         sequence_dir = os.path.join(mot_dir, sequence)
158 | 
159 |         image_dir = os.path.join(sequence_dir, "img1")
160 |         image_filenames = {
161 |             int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
162 |             for f in os.listdir(image_dir)}
163 | 
164 |         detection_file = os.path.join(
165 |             detection_dir, sequence, "det/det.txt")
166 |         detections_in = np.loadtxt(detection_file, delimiter=',')
167 |         detections_out = []
168 | 
169 |         frame_indices = detections_in[:, 0].astype(np.int)
170 |         min_frame_idx = frame_indices.astype(np.int).min()
171 |         max_frame_idx = frame_indices.astype(np.int).max()
172 |         for frame_idx in range(min_frame_idx, max_frame_idx + 1):
173 |             print("Frame %05d/%05d" % (frame_idx, max_frame_idx))
174 |             mask = frame_indices == frame_idx
175 |             rows = detections_in[mask]
176 | 
177 |             if frame_idx not in image_filenames:
178 |                 print("WARNING could not find image for frame %d" % frame_idx)
179 |                 continue
180 |             bgr_image = cv2.imread(
181 |                 image_filenames[frame_idx], cv2.IMREAD_COLOR)
182 |             features = encoder(bgr_image, rows[:, 2:6].copy())
183 |             detections_out += [np.r_[(row, feature)] for row, feature
184 |                                in zip(rows, features)]
185 | 
186 |         output_filename = os.path.join(output_dir, "%s.npy" % sequence)
187 |         np.save(
188 |             output_filename, np.asarray(detections_out), allow_pickle=False)
189 | 
190 | 
191 | def parse_args():
192 |     """Parse command line arguments.
193 |     """
194 |     parser = argparse.ArgumentParser(description="Re-ID feature extractor")
195 |     parser.add_argument(
196 |         "--model",
197 |         default="resources/networks/mars-small128.pb",
198 |         help="Path to freezed inference graph protobuf.")
199 |     parser.add_argument(
200 |         "--mot_dir", help="Path to MOTChallenge directory (train or test)",
201 |         required=True)
202 |     parser.add_argument(
203 |         "--detection_dir", help="Path to custom detections. Defaults to "
204 |         "standard MOT detections Directory structure should be the default "
205 |         "MOTChallenge structure: [sequence]/det/det.txt", default=None)
206 |     parser.add_argument(
207 |         "--output_dir", help="Output directory. Will be created if it does not"
208 |         " exist.", default="detections")
209 |     return parser.parse_args()
210 | 
211 | 
212 | def main():
213 |     args = parse_args()
214 |     encoder = create_box_encoder(args.model, batch_size=32)
215 |     generate_detections(encoder, args.mot_dir, args.output_dir,
216 |                         args.detection_dir)
217 | 
218 | 
219 | if __name__ == "__main__":
220 |     main()
221 | 


--------------------------------------------------------------------------------
/tools/generate_detections.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import os
  3 | import errno
  4 | import argparse
  5 | import numpy as np
  6 | import cv2
  7 | import tensorflow.compat.v1 as tf
  8 | 
  9 | physical_devices = tf.config.experimental.list_physical_devices('GPU')
 10 | if len(physical_devices) > 0:
 11 |     tf.config.experimental.set_memory_growth(physical_devices[0], True)
 12 | 
 13 | def _run_in_batches(f, data_dict, out, batch_size):
 14 |     data_len = len(out)
 15 |     num_batches = int(data_len / batch_size)
 16 | 
 17 |     s, e = 0, 0
 18 |     for i in range(num_batches):
 19 |         s, e = i * batch_size, (i + 1) * batch_size
 20 |         batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
 21 |         out[s:e] = f(batch_data_dict)
 22 |     if e < len(out):
 23 |         batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
 24 |         out[e:] = f(batch_data_dict)
 25 | 
 26 | 
 27 | def extract_image_patch(image, bbox, patch_shape):
 28 |     """Extract image patch from bounding box.
 29 | 
 30 |     Parameters
 31 |     ----------
 32 |     image : ndarray
 33 |         The full image.
 34 |     bbox : array_like
 35 |         The bounding box in format (x, y, width, height).
 36 |     patch_shape : Optional[array_like]
 37 |         This parameter can be used to enforce a desired patch shape
 38 |         (height, width). First, the `bbox` is adapted to the aspect ratio
 39 |         of the patch shape, then it is clipped at the image boundaries.
 40 |         If None, the shape is computed from :arg:`bbox`.
 41 | 
 42 |     Returns
 43 |     -------
 44 |     ndarray | NoneType
 45 |         An image patch showing the :arg:`bbox`, optionally reshaped to
 46 |         :arg:`patch_shape`.
 47 |         Returns None if the bounding box is empty or fully outside of the image
 48 |         boundaries.
 49 | 
 50 |     """
 51 |     bbox = np.array(bbox)
 52 |     if patch_shape is not None:
 53 |         # correct aspect ratio to patch shape
 54 |         target_aspect = float(patch_shape[1]) / patch_shape[0]
 55 |         new_width = target_aspect * bbox[3]
 56 |         bbox[0] -= (new_width - bbox[2]) / 2
 57 |         bbox[2] = new_width
 58 | 
 59 |     # convert to top left, bottom right
 60 |     bbox[2:] += bbox[:2]
 61 |     bbox = bbox.astype(np.int)
 62 | 
 63 |     # clip at image boundaries
 64 |     bbox[:2] = np.maximum(0, bbox[:2])
 65 |     bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
 66 |     if np.any(bbox[:2] >= bbox[2:]):
 67 |         return None
 68 |     sx, sy, ex, ey = bbox
 69 |     image = image[sy:ey, sx:ex]
 70 |     image = cv2.resize(image, tuple(patch_shape[::-1]))
 71 |     return image
 72 | 
 73 | 
 74 | class ImageEncoder(object):
 75 | 
 76 |     def __init__(self, checkpoint_filename, input_name="images",
 77 |                  output_name="features"):
 78 |         self.session = tf.Session()
 79 |         with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
 80 |             graph_def = tf.GraphDef()
 81 |             graph_def.ParseFromString(file_handle.read())
 82 |         tf.import_graph_def(graph_def, name="net")
 83 |         self.input_var = tf.get_default_graph().get_tensor_by_name(
 84 |             "%s:0" % input_name)
 85 |         self.output_var = tf.get_default_graph().get_tensor_by_name(
 86 |             "%s:0" % output_name)
 87 | 
 88 |         assert len(self.output_var.get_shape()) == 2
 89 |         assert len(self.input_var.get_shape()) == 4
 90 |         self.feature_dim = self.output_var.get_shape().as_list()[-1]
 91 |         self.image_shape = self.input_var.get_shape().as_list()[1:]
 92 | 
 93 |     def __call__(self, data_x, batch_size=32):
 94 |         out = np.zeros((len(data_x), self.feature_dim), np.float32)
 95 |         _run_in_batches(
 96 |             lambda x: self.session.run(self.output_var, feed_dict=x),
 97 |             {self.input_var: data_x}, out, batch_size)
 98 |         return out
 99 | 
100 | 
101 | def create_box_encoder(model_filename, input_name="images",
102 |                        output_name="features", batch_size=32):
103 |     image_encoder = ImageEncoder(model_filename, input_name, output_name)
104 |     image_shape = image_encoder.image_shape
105 | 
106 |     def encoder(image, boxes):
107 |         image_patches = []
108 |         for box in boxes:
109 |             patch = extract_image_patch(image, box, image_shape[:2])
110 |             if patch is None:
111 |                 print("WARNING: Failed to extract image patch: %s." % str(box))
112 |                 patch = np.random.uniform(
113 |                     0., 255., image_shape).astype(np.uint8)
114 |             image_patches.append(patch)
115 |         image_patches = np.asarray(image_patches)
116 |         return image_encoder(image_patches, batch_size)
117 | 
118 |     return encoder
119 | 
120 | 
121 | def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
122 |     """Generate detections with features.
123 | 
124 |     Parameters
125 |     ----------
126 |     encoder : Callable[image, ndarray] -> ndarray
127 |         The encoder function takes as input a BGR color image and a matrix of
128 |         bounding boxes in format `(x, y, w, h)` and returns a matrix of
129 |         corresponding feature vectors.
130 |     mot_dir : str
131 |         Path to the MOTChallenge directory (can be either train or test).
132 |     output_dir
133 |         Path to the output directory. Will be created if it does not exist.
134 |     detection_dir
135 |         Path to custom detections. The directory structure should be the default
136 |         MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the
137 |         standard MOTChallenge detections.
138 | 
139 |     """
140 |     if detection_dir is None:
141 |         detection_dir = mot_dir
142 |     try:
143 |         os.makedirs(output_dir)
144 |     except OSError as exception:
145 |         if exception.errno == errno.EEXIST and os.path.isdir(output_dir):
146 |             pass
147 |         else:
148 |             raise ValueError(
149 |                 "Failed to created output directory '%s'" % output_dir)
150 | 
151 |     for sequence in os.listdir(mot_dir):
152 |         print("Processing %s" % sequence)
153 |         sequence_dir = os.path.join(mot_dir, sequence)
154 | 
155 |         image_dir = os.path.join(sequence_dir, "img1")
156 |         image_filenames = {
157 |             int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
158 |             for f in os.listdir(image_dir)}
159 | 
160 |         detection_file = os.path.join(
161 |             detection_dir, sequence, "det/det.txt")
162 |         detections_in = np.loadtxt(detection_file, delimiter=',')
163 |         detections_out = []
164 | 
165 |         frame_indices = detections_in[:, 0].astype(np.int)
166 |         min_frame_idx = frame_indices.astype(np.int).min()
167 |         max_frame_idx = frame_indices.astype(np.int).max()
168 |         for frame_idx in range(min_frame_idx, max_frame_idx + 1):
169 |             print("Frame %05d/%05d" % (frame_idx, max_frame_idx))
170 |             mask = frame_indices == frame_idx
171 |             rows = detections_in[mask]
172 | 
173 |             if frame_idx not in image_filenames:
174 |                 print("WARNING could not find image for frame %d" % frame_idx)
175 |                 continue
176 |             bgr_image = cv2.imread(
177 |                 image_filenames[frame_idx], cv2.IMREAD_COLOR)
178 |             features = encoder(bgr_image, rows[:, 2:6].copy())
179 |             detections_out += [np.r_[(row, feature)] for row, feature
180 |                                in zip(rows, features)]
181 | 
182 |         output_filename = os.path.join(output_dir, "%s.npy" % sequence)
183 |         np.save(
184 |             output_filename, np.asarray(detections_out), allow_pickle=False)
185 | 
186 | 
187 | def parse_args():
188 |     """Parse command line arguments.
189 |     """
190 |     parser = argparse.ArgumentParser(description="Re-ID feature extractor")
191 |     parser.add_argument(
192 |         "--model",
193 |         default="resources/networks/mars-small128.pb",
194 |         help="Path to freezed inference graph protobuf.")
195 |     parser.add_argument(
196 |         "--mot_dir", help="Path to MOTChallenge directory (train or test)",
197 |         required=True)
198 |     parser.add_argument(
199 |         "--detection_dir", help="Path to custom detections. Defaults to "
200 |         "standard MOT detections Directory structure should be the default "
201 |         "MOTChallenge structure: [sequence]/det/det.txt", default=None)
202 |     parser.add_argument(
203 |         "--output_dir", help="Output directory. Will be created if it does not"
204 |         " exist.", default="detections")
205 |     return parser.parse_args()
206 | 
207 | 
208 | def main():
209 |     args = parse_args()
210 |     encoder = create_box_encoder(args.model, batch_size=32)
211 |     generate_detections(encoder, args.mot_dir, args.output_dir,
212 |                         args.detection_dir)
213 | 
214 | 
215 | if __name__ == "__main__":
216 |     main()
217 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/utils/__init__.py


--------------------------------------------------------------------------------
/utils/activations.py:
--------------------------------------------------------------------------------
 1 | # Activation functions
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | # SiLU https://arxiv.org/pdf/1905.02244.pdf ----------------------------------------------------------------------------
 9 | class SiLU(nn.Module):  # export-friendly version of nn.SiLU()
10 |     @staticmethod
11 |     def forward(x):
12 |         return x * torch.sigmoid(x)
13 | 
14 | 
15 | class Hardswish(nn.Module):  # export-friendly version of nn.Hardswish()
16 |     @staticmethod
17 |     def forward(x):
18 |         # return x * F.hardsigmoid(x)  # for torchscript and CoreML
19 |         return x * F.hardtanh(x + 3, 0., 6.) / 6.  # for torchscript, CoreML and ONNX
20 | 
21 | 
22 | class MemoryEfficientSwish(nn.Module):
23 |     class F(torch.autograd.Function):
24 |         @staticmethod
25 |         def forward(ctx, x):
26 |             ctx.save_for_backward(x)
27 |             return x * torch.sigmoid(x)
28 | 
29 |         @staticmethod
30 |         def backward(ctx, grad_output):
31 |             x = ctx.saved_tensors[0]
32 |             sx = torch.sigmoid(x)
33 |             return grad_output * (sx * (1 + x * (1 - sx)))
34 | 
35 |     def forward(self, x):
36 |         return self.F.apply(x)
37 | 
38 | 
39 | # Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
40 | class Mish(nn.Module):
41 |     @staticmethod
42 |     def forward(x):
43 |         return x * F.softplus(x).tanh()
44 | 
45 | 
46 | class MemoryEfficientMish(nn.Module):
47 |     class F(torch.autograd.Function):
48 |         @staticmethod
49 |         def forward(ctx, x):
50 |             ctx.save_for_backward(x)
51 |             return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
52 | 
53 |         @staticmethod
54 |         def backward(ctx, grad_output):
55 |             x = ctx.saved_tensors[0]
56 |             sx = torch.sigmoid(x)
57 |             fx = F.softplus(x).tanh()
58 |             return grad_output * (fx + x * sx * (1 - fx * fx))
59 | 
60 |     def forward(self, x):
61 |         return self.F.apply(x)
62 | 
63 | 
64 | # FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
65 | class FReLU(nn.Module):
66 |     def __init__(self, c1, k=3):  # ch_in, kernel
67 |         super().__init__()
68 |         self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
69 |         self.bn = nn.BatchNorm2d(c1)
70 | 
71 |     def forward(self, x):
72 |         return torch.max(x, self.bn(self.conv(x)))
73 | 


--------------------------------------------------------------------------------
/utils/autoanchor.py:
--------------------------------------------------------------------------------
  1 | # Auto-anchor utils
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import yaml
  6 | from scipy.cluster.vq import kmeans
  7 | from tqdm import tqdm
  8 | 
  9 | 
 10 | def check_anchor_order(m):
 11 |     # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
 12 |     a = m.anchor_grid.prod(-1).view(-1)  # anchor area
 13 |     da = a[-1] - a[0]  # delta a
 14 |     ds = m.stride[-1] - m.stride[0]  # delta s
 15 |     if da.sign() != ds.sign():  # same order
 16 |         print('Reversing anchor order')
 17 |         m.anchors[:] = m.anchors.flip(0)
 18 |         m.anchor_grid[:] = m.anchor_grid.flip(0)
 19 | 
 20 | 
 21 | def check_anchors(dataset, model, thr=4.0, imgsz=640):
 22 |     # Check anchor fit to data, recompute if necessary
 23 |     print('\nAnalyzing anchors... ', end='')
 24 |     m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1]  # Detect()
 25 |     shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
 26 |     scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1))  # augment scale
 27 |     wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float()  # wh
 28 | 
 29 |     def metric(k):  # compute metric
 30 |         r = wh[:, None] / k[None]
 31 |         x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
 32 |         best = x.max(1)[0]  # best_x
 33 |         aat = (x > 1. / thr).float().sum(1).mean()  # anchors above threshold
 34 |         bpr = (best > 1. / thr).float().mean()  # best possible recall
 35 |         return bpr, aat
 36 | 
 37 |     bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2))
 38 |     print('anchors/target = %.2f, Best Possible Recall (BPR) = %.4f' % (aat, bpr), end='')
 39 |     if bpr < 0.98:  # threshold to recompute
 40 |         print('. Attempting to improve anchors, please wait...')
 41 |         na = m.anchor_grid.numel() // 2  # number of anchors
 42 |         new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
 43 |         new_bpr = metric(new_anchors.reshape(-1, 2))[0]
 44 |         if new_bpr > bpr:  # replace anchors
 45 |             new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors)
 46 |             m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid)  # for inference
 47 |             m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1)  # loss
 48 |             check_anchor_order(m)
 49 |             print('New anchors saved to model. Update model *.yaml to use these anchors in the future.')
 50 |         else:
 51 |             print('Original anchors better than new anchors. Proceeding with original anchors.')
 52 |     print('')  # newline
 53 | 
 54 | 
 55 | def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
 56 |     """ Creates kmeans-evolved anchors from training dataset
 57 | 
 58 |         Arguments:
 59 |             path: path to dataset *.yaml, or a loaded dataset
 60 |             n: number of anchors
 61 |             img_size: image size used for training
 62 |             thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
 63 |             gen: generations to evolve anchors using genetic algorithm
 64 |             verbose: print all results
 65 | 
 66 |         Return:
 67 |             k: kmeans evolved anchors
 68 | 
 69 |         Usage:
 70 |             from utils.autoanchor import *; _ = kmean_anchors()
 71 |     """
 72 |     thr = 1. / thr
 73 | 
 74 |     def metric(k, wh):  # compute metrics
 75 |         r = wh[:, None] / k[None]
 76 |         x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
 77 |         # x = wh_iou(wh, torch.tensor(k))  # iou metric
 78 |         return x, x.max(1)[0]  # x, best_x
 79 | 
 80 |     def anchor_fitness(k):  # mutation fitness
 81 |         _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
 82 |         return (best * (best > thr).float()).mean()  # fitness
 83 | 
 84 |     def print_results(k):
 85 |         k = k[np.argsort(k.prod(1))]  # sort small to large
 86 |         x, best = metric(k, wh0)
 87 |         bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n  # best possible recall, anch > thr
 88 |         print('thr=%.2f: %.4f best possible recall, %.2f anchors past thr' % (thr, bpr, aat))
 89 |         print('n=%g, img_size=%s, metric_all=%.3f/%.3f-mean/best, past_thr=%.3f-mean: ' %
 90 |               (n, img_size, x.mean(), best.mean(), x[x > thr].mean()), end='')
 91 |         for i, x in enumerate(k):
 92 |             print('%i,%i' % (round(x[0]), round(x[1])), end=',  ' if i < len(k) - 1 else '\n')  # use in *.cfg
 93 |         return k
 94 | 
 95 |     if isinstance(path, str):  # *.yaml file
 96 |         with open(path) as f:
 97 |             data_dict = yaml.load(f, Loader=yaml.FullLoader)  # model dict
 98 |         from utils.datasets import LoadImagesAndLabels
 99 |         dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
100 |     else:
101 |         dataset = path  # dataset
102 | 
103 |     # Get label wh
104 |     shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
105 |     wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)])  # wh
106 | 
107 |     # Filter
108 |     i = (wh0 < 3.0).any(1).sum()
109 |     if i:
110 |         print('WARNING: Extremely small objects found. '
111 |               '%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0)))
112 |     wh = wh0[(wh0 >= 2.0).any(1)]  # filter > 2 pixels
113 | 
114 |     # Kmeans calculation
115 |     print('Running kmeans for %g anchors on %g points...' % (n, len(wh)))
116 |     s = wh.std(0)  # sigmas for whitening
117 |     k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
118 |     k *= s
119 |     wh = torch.tensor(wh, dtype=torch.float32)  # filtered
120 |     wh0 = torch.tensor(wh0, dtype=torch.float32)  # unfiltered
121 |     k = print_results(k)
122 | 
123 |     # Plot
124 |     # k, d = [None] * 20, [None] * 20
125 |     # for i in tqdm(range(1, 21)):
126 |     #     k[i-1], d[i-1] = kmeans(wh / s, i)  # points, mean distance
127 |     # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
128 |     # ax = ax.ravel()
129 |     # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
130 |     # fig, ax = plt.subplots(1, 2, figsize=(14, 7))  # plot wh
131 |     # ax[0].hist(wh[wh[:, 0]<100, 0],400)
132 |     # ax[1].hist(wh[wh[:, 1]<100, 1],400)
133 |     # fig.savefig('wh.png', dpi=200)
134 | 
135 |     # Evolve
136 |     npr = np.random
137 |     f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
138 |     pbar = tqdm(range(gen), desc='Evolving anchors with Genetic Algorithm')  # progress bar
139 |     for _ in pbar:
140 |         v = np.ones(sh)
141 |         while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
142 |             v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
143 |         kg = (k.copy() * v).clip(min=2.0)
144 |         fg = anchor_fitness(kg)
145 |         if fg > f:
146 |             f, k = fg, kg.copy()
147 |             pbar.desc = 'Evolving anchors with Genetic Algorithm: fitness = %.4f' % f
148 |             if verbose:
149 |                 print_results(k)
150 | 
151 |     return print_results(k)
152 | 


--------------------------------------------------------------------------------
/utils/google_app_engine/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM gcr.io/google-appengine/python
 2 | 
 3 | # Create a virtualenv for dependencies. This isolates these packages from
 4 | # system-level packages.
 5 | # Use -p python3 or -p python3.7 to select python version. Default is version 2.
 6 | RUN virtualenv /env -p python3
 7 | 
 8 | # Setting these environment variables are the same as running
 9 | # source /env/bin/activate.
10 | ENV VIRTUAL_ENV /env
11 | ENV PATH /env/bin:$PATH
12 | 
13 | RUN apt-get update && apt-get install -y python-opencv
14 | 
15 | # Copy the application's requirements.txt and run pip to install all
16 | # dependencies into the virtualenv.
17 | ADD requirements.txt /app/requirements.txt
18 | RUN pip install -r /app/requirements.txt
19 | 
20 | # Add the application source code.
21 | ADD . /app
22 | 
23 | # Run a WSGI server to serve the application. gunicorn must be declared as
24 | # a dependency in requirements.txt.
25 | CMD gunicorn -b :$PORT main:app
26 | 


--------------------------------------------------------------------------------
/utils/google_app_engine/additional_requirements.txt:
--------------------------------------------------------------------------------
1 | # add these requirements in your app on top of the existing ones
2 | pip==19.2
3 | Flask==2.3.2
4 | gunicorn==19.9.0
5 | 


--------------------------------------------------------------------------------
/utils/google_app_engine/app.yaml:
--------------------------------------------------------------------------------
 1 | runtime: custom
 2 | env: flex
 3 | 
 4 | service: yolov5app
 5 | 
 6 | liveness_check:
 7 |   initial_delay_sec: 600
 8 | 
 9 | manual_scaling:
10 |   instances: 1
11 | resources:
12 |   cpu: 1
13 |   memory_gb: 4
14 |   disk_size_gb: 20


--------------------------------------------------------------------------------
/utils/google_utils.py:
--------------------------------------------------------------------------------
  1 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries
  2 | 
  3 | import os
  4 | import platform
  5 | import subprocess
  6 | import time
  7 | from pathlib import Path
  8 | 
  9 | import torch
 10 | 
 11 | 
 12 | def gsutil_getsize(url=''):
 13 |     # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
 14 |     s = subprocess.check_output('gsutil du %s' % url, shell=True).decode('utf-8')
 15 |     return eval(s.split(' ')[0]) if len(s) else 0  # bytes
 16 | 
 17 | 
 18 | def attempt_download(weights):
 19 |     # Attempt to download pretrained weights if not found locally
 20 |     weights = str(weights).strip().replace("'", '')
 21 |     file = Path(weights).name.lower()
 22 | 
 23 |     msg = weights + ' missing, try downloading from https://github.com/ultralytics/yolov5/releases/'
 24 |     models = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']  # available models
 25 |     redundant = False  # offer second download option
 26 | 
 27 |     if file in models and not os.path.isfile(weights):
 28 |         # Google Drive
 29 |         # d = {'yolov5s.pt': '1R5T6rIyy3lLwgFXNms8whc-387H0tMQO',
 30 |         #      'yolov5m.pt': '1vobuEExpWQVpXExsJ2w-Mbf3HJjWkQJr',
 31 |         #      'yolov5l.pt': '1hrlqD1Wdei7UT4OgT785BEk1JwnSvNEV',
 32 |         #      'yolov5x.pt': '1mM8aZJlWTxOg7BZJvNUMrTnA2AbeCVzS'}
 33 |         # r = gdrive_download(id=d[file], name=weights) if file in d else 1
 34 |         # if r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6:  # check
 35 |         #    return
 36 | 
 37 |         try:  # GitHub
 38 |             url = 'https://github.com/ultralytics/yolov5/releases/download/v3.1/' + file
 39 |             print('Downloading %s to %s...' % (url, weights))
 40 |             torch.hub.download_url_to_file(url, weights)
 41 |             assert os.path.exists(weights) and os.path.getsize(weights) > 1E6  # check
 42 |         except Exception as e:  # GCP
 43 |             print('Download error: %s' % e)
 44 |             assert redundant, 'No secondary mirror'
 45 |             url = 'https://storage.googleapis.com/ultralytics/yolov5/ckpt/' + file
 46 |             print('Downloading %s to %s...' % (url, weights))
 47 |             r = os.system('curl -L %s -o %s' % (url, weights))  # torch.hub.download_url_to_file(url, weights)
 48 |         finally:
 49 |             if not (os.path.exists(weights) and os.path.getsize(weights) > 1E6):  # check
 50 |                 os.remove(weights) if os.path.exists(weights) else None  # remove partial downloads
 51 |                 print('ERROR: Download failure: %s' % msg)
 52 |             print('')
 53 |             return
 54 | 
 55 | 
 56 | def gdrive_download(id='1uH2BylpFxHKEGXKL6wJJlsgMU2YEjxuc', name='tmp.zip'):
 57 |     # Downloads a file from Google Drive. from utils.google_utils import *; gdrive_download()
 58 |     t = time.time()
 59 | 
 60 |     print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
 61 |     os.remove(name) if os.path.exists(name) else None  # remove existing
 62 |     os.remove('cookie') if os.path.exists('cookie') else None
 63 | 
 64 |     # Attempt file download
 65 |     out = "NUL" if platform.system() == "Windows" else "/dev/null"
 66 |     os.system('curl -c ./cookie -s -L "drive.google.com/uc?export=download&id=%s" > %s ' % (id, out))
 67 |     if os.path.exists('cookie'):  # large file
 68 |         s = 'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm=%s&id=%s" -o %s' % (get_token(), id, name)
 69 |     else:  # small file
 70 |         s = 'curl -s -L -o %s "drive.google.com/uc?export=download&id=%s"' % (name, id)
 71 |     r = os.system(s)  # execute, capture return
 72 |     os.remove('cookie') if os.path.exists('cookie') else None
 73 | 
 74 |     # Error check
 75 |     if r != 0:
 76 |         os.remove(name) if os.path.exists(name) else None  # remove partial
 77 |         print('Download error ')  # raise Exception('Download error')
 78 |         return r
 79 | 
 80 |     # Unzip if archive
 81 |     if name.endswith('.zip'):
 82 |         print('unzipping... ', end='')
 83 |         os.system('unzip -q %s' % name)  # unzip
 84 |         os.remove(name)  # remove zip to free space
 85 | 
 86 |     print('Done (%.1fs)' % (time.time() - t))
 87 |     return r
 88 | 
 89 | 
 90 | def get_token(cookie="./cookie"):
 91 |     with open(cookie) as f:
 92 |         for line in f:
 93 |             if "download" in line:
 94 |                 return line.split()[-1]
 95 |     return ""
 96 | 
 97 | # def upload_blob(bucket_name, source_file_name, destination_blob_name):
 98 | #     # Uploads a file to a bucket
 99 | #     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
100 | #
101 | #     storage_client = storage.Client()
102 | #     bucket = storage_client.get_bucket(bucket_name)
103 | #     blob = bucket.blob(destination_blob_name)
104 | #
105 | #     blob.upload_from_filename(source_file_name)
106 | #
107 | #     print('File {} uploaded to {}.'.format(
108 | #         source_file_name,
109 | #         destination_blob_name))
110 | #
111 | #
112 | # def download_blob(bucket_name, source_blob_name, destination_file_name):
113 | #     # Uploads a blob from a bucket
114 | #     storage_client = storage.Client()
115 | #     bucket = storage_client.get_bucket(bucket_name)
116 | #     blob = bucket.blob(source_blob_name)
117 | #
118 | #     blob.download_to_filename(destination_file_name)
119 | #
120 | #     print('Blob {} downloaded to {}.'.format(
121 | #         source_blob_name,
122 | #         destination_file_name))
123 | 


--------------------------------------------------------------------------------
/utils/loss.py:
--------------------------------------------------------------------------------
  1 | # Loss functions
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | from utils.general import bbox_iou
  7 | from utils.torch_utils import is_parallel
  8 | 
  9 | 
 10 | def smooth_BCE(eps=0.1):  # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
 11 |     # return positive, negative label smoothing BCE targets
 12 |     return 1.0 - 0.5 * eps, 0.5 * eps
 13 | 
 14 | 
 15 | class BCEBlurWithLogitsLoss(nn.Module):
 16 |     # BCEwithLogitLoss() with reduced missing label effects.
 17 |     def __init__(self, alpha=0.05):
 18 |         super(BCEBlurWithLogitsLoss, self).__init__()
 19 |         self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none')  # must be nn.BCEWithLogitsLoss()
 20 |         self.alpha = alpha
 21 | 
 22 |     def forward(self, pred, true):
 23 |         loss = self.loss_fcn(pred, true)
 24 |         pred = torch.sigmoid(pred)  # prob from logits
 25 |         dx = pred - true  # reduce only missing label effects
 26 |         # dx = (pred - true).abs()  # reduce missing label and false label effects
 27 |         alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4))
 28 |         loss *= alpha_factor
 29 |         return loss.mean()
 30 | 
 31 | 
 32 | class FocalLoss(nn.Module):
 33 |     # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
 34 |     def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
 35 |         super(FocalLoss, self).__init__()
 36 |         self.loss_fcn = loss_fcn  # must be nn.BCEWithLogitsLoss()
 37 |         self.gamma = gamma
 38 |         self.alpha = alpha
 39 |         self.reduction = loss_fcn.reduction
 40 |         self.loss_fcn.reduction = 'none'  # required to apply FL to each element
 41 | 
 42 |     def forward(self, pred, true):
 43 |         loss = self.loss_fcn(pred, true)
 44 |         # p_t = torch.exp(-loss)
 45 |         # loss *= self.alpha * (1.000001 - p_t) ** self.gamma  # non-zero power for gradient stability
 46 | 
 47 |         # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
 48 |         pred_prob = torch.sigmoid(pred)  # prob from logits
 49 |         p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
 50 |         alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
 51 |         modulating_factor = (1.0 - p_t) ** self.gamma
 52 |         loss *= alpha_factor * modulating_factor
 53 | 
 54 |         if self.reduction == 'mean':
 55 |             return loss.mean()
 56 |         elif self.reduction == 'sum':
 57 |             return loss.sum()
 58 |         else:  # 'none'
 59 |             return loss
 60 | 
 61 | 
 62 | class QFocalLoss(nn.Module):
 63 |     # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
 64 |     def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
 65 |         super(QFocalLoss, self).__init__()
 66 |         self.loss_fcn = loss_fcn  # must be nn.BCEWithLogitsLoss()
 67 |         self.gamma = gamma
 68 |         self.alpha = alpha
 69 |         self.reduction = loss_fcn.reduction
 70 |         self.loss_fcn.reduction = 'none'  # required to apply FL to each element
 71 | 
 72 |     def forward(self, pred, true):
 73 |         loss = self.loss_fcn(pred, true)
 74 | 
 75 |         pred_prob = torch.sigmoid(pred)  # prob from logits
 76 |         alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
 77 |         modulating_factor = torch.abs(true - pred_prob) ** self.gamma
 78 |         loss *= alpha_factor * modulating_factor
 79 | 
 80 |         if self.reduction == 'mean':
 81 |             return loss.mean()
 82 |         elif self.reduction == 'sum':
 83 |             return loss.sum()
 84 |         else:  # 'none'
 85 |             return loss
 86 | 
 87 | 
 88 | def compute_loss(p, targets, model):  # predictions, targets, model
 89 |     device = targets.device
 90 |     lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
 91 |     tcls, tbox, indices, anchors = build_targets(p, targets, model)  # targets
 92 |     h = model.hyp  # hyperparameters
 93 | 
 94 |     # Define criteria
 95 |     BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))  # weight=model.class_weights)
 96 |     BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
 97 | 
 98 |     # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
 99 |     cp, cn = smooth_BCE(eps=0.0)
100 | 
101 |     # Focal loss
102 |     g = h['fl_gamma']  # focal loss gamma
103 |     if g > 0:
104 |         BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
105 | 
106 |     # Losses
107 |     nt = 0  # number of targets
108 |     no = len(p)  # number of outputs
109 |     balance = [4.0, 1.0, 0.4] if no == 3 else [4.0, 1.0, 0.4, 0.1]  # P3-5 or P3-6
110 |     for i, pi in enumerate(p):  # layer index, layer predictions
111 |         b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
112 |         tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj
113 | 
114 |         n = b.shape[0]  # number of targets
115 |         if n:
116 |             nt += n  # cumulative targets
117 |             ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
118 | 
119 |             # Regression
120 |             pxy = ps[:, :2].sigmoid() * 2. - 0.5
121 |             pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
122 |             pbox = torch.cat((pxy, pwh), 1)  # predicted box
123 |             iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
124 |             lbox += (1.0 - iou).mean()  # iou loss
125 | 
126 |             # Objectness
127 |             tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type(tobj.dtype)  # iou ratio
128 | 
129 |             # Classification
130 |             if model.nc > 1:  # cls loss (only if multiple classes)
131 |                 t = torch.full_like(ps[:, 5:], cn, device=device)  # targets
132 |                 t[range(n), tcls[i]] = cp
133 |                 lcls += BCEcls(ps[:, 5:], t)  # BCE
134 | 
135 |             # Append targets to text file
136 |             # with open('targets.txt', 'a') as file:
137 |             #     [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
138 | 
139 |         lobj += BCEobj(pi[..., 4], tobj) * balance[i]  # obj loss
140 | 
141 |     s = 3 / no  # output count scaling
142 |     lbox *= h['box'] * s
143 |     lobj *= h['obj'] * s * (1.4 if no == 4 else 1.)
144 |     lcls *= h['cls'] * s
145 |     bs = tobj.shape[0]  # batch size
146 | 
147 |     loss = lbox + lobj + lcls
148 |     return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()
149 | 
150 | 
151 | def build_targets(p, targets, model):
152 |     # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
153 |     det = model.module.model[-1] if is_parallel(model) else model.model[-1]  # Detect() module
154 |     na, nt = det.na, targets.shape[0]  # number of anchors, targets
155 |     tcls, tbox, indices, anch = [], [], [], []
156 |     gain = torch.ones(7, device=targets.device)  # normalized to gridspace gain
157 |     ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
158 |     targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)  # append anchor indices
159 | 
160 |     g = 0.5  # bias
161 |     off = torch.tensor([[0, 0],
162 |                         [1, 0], [0, 1], [-1, 0], [0, -1],  # j,k,l,m
163 |                         # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
164 |                         ], device=targets.device).float() * g  # offsets
165 | 
166 |     for i in range(det.nl):
167 |         anchors = det.anchors[i]
168 |         gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]  # xyxy gain
169 | 
170 |         # Match targets to anchors
171 |         t = targets * gain
172 |         if nt:
173 |             # Matches
174 |             r = t[:, :, 4:6] / anchors[:, None]  # wh ratio
175 |             j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t']  # compare
176 |             # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
177 |             t = t[j]  # filter
178 | 
179 |             # Offsets
180 |             gxy = t[:, 2:4]  # grid xy
181 |             gxi = gain[[2, 3]] - gxy  # inverse
182 |             j, k = ((gxy % 1. < g) & (gxy > 1.)).T
183 |             l, m = ((gxi % 1. < g) & (gxi > 1.)).T
184 |             j = torch.stack((torch.ones_like(j), j, k, l, m))
185 |             t = t.repeat((5, 1, 1))[j]
186 |             offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
187 |         else:
188 |             t = targets[0]
189 |             offsets = 0
190 | 
191 |         # Define
192 |         b, c = t[:, :2].long().T  # image, class
193 |         gxy = t[:, 2:4]  # grid xy
194 |         gwh = t[:, 4:6]  # grid wh
195 |         gij = (gxy - offsets).long()
196 |         gi, gj = gij.T  # grid xy indices
197 | 
198 |         # Append
199 |         a = t[:, 6].long()  # anchor indices
200 |         indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices
201 |         tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
202 |         anch.append(anchors[a])  # anchors
203 |         tcls.append(c)  # class
204 | 
205 |     return tcls, tbox, indices, anch
206 | 


--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
  1 | # Model validation metrics
  2 | 
  3 | from pathlib import Path
  4 | 
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | from . import general
 10 | 
 11 | 
 12 | def fitness(x):
 13 |     # Model fitness as a weighted combination of metrics
 14 |     w = [0.0, 0.0, 0.1, 0.9]  # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
 15 |     return (x[:, :4] * w).sum(1)
 16 | 
 17 | 
 18 | def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=[]):
 19 |     """ Compute the average precision, given the recall and precision curves.
 20 |     Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
 21 |     # Arguments
 22 |         tp:  True positives (nparray, nx1 or nx10).
 23 |         conf:  Objectness value from 0-1 (nparray).
 24 |         pred_cls:  Predicted object classes (nparray).
 25 |         target_cls:  True object classes (nparray).
 26 |         plot:  Plot precision-recall curve at mAP@0.5
 27 |         save_dir:  Plot save directory
 28 |     # Returns
 29 |         The average precision as computed in py-faster-rcnn.
 30 |     """
 31 | 
 32 |     # Sort by objectness
 33 |     i = np.argsort(-conf)
 34 |     tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
 35 | 
 36 |     # Find unique classes
 37 |     unique_classes = np.unique(target_cls)
 38 | 
 39 |     # Create Precision-Recall curve and compute AP for each class
 40 |     px, py = np.linspace(0, 1, 1000), []  # for plotting
 41 |     pr_score = 0.1  # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898
 42 |     s = [unique_classes.shape[0], tp.shape[1]]  # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95)
 43 |     ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s)
 44 |     for ci, c in enumerate(unique_classes):
 45 |         i = pred_cls == c
 46 |         n_l = (target_cls == c).sum()  # number of labels
 47 |         n_p = i.sum()  # number of predictions
 48 | 
 49 |         if n_p == 0 or n_l == 0:
 50 |             continue
 51 |         else:
 52 |             # Accumulate FPs and TPs
 53 |             fpc = (1 - tp[i]).cumsum(0)
 54 |             tpc = tp[i].cumsum(0)
 55 | 
 56 |             # Recall
 57 |             recall = tpc / (n_l + 1e-16)  # recall curve
 58 |             r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0])  # r at pr_score, negative x, xp because xp decreases
 59 | 
 60 |             # Precision
 61 |             precision = tpc / (tpc + fpc)  # precision curve
 62 |             p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0])  # p at pr_score
 63 | 
 64 |             # AP from recall-precision curve
 65 |             for j in range(tp.shape[1]):
 66 |                 ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
 67 |                 if plot and (j == 0):
 68 |                     py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
 69 | 
 70 |     # Compute F1 score (harmonic mean of precision and recall)
 71 |     f1 = 2 * p * r / (p + r + 1e-16)
 72 | 
 73 |     if plot:
 74 |         plot_pr_curve(px, py, ap, save_dir, names)
 75 | 
 76 |     return p, r, ap, f1, unique_classes.astype('int32')
 77 | 
 78 | 
 79 | def compute_ap(recall, precision):
 80 |     """ Compute the average precision, given the recall and precision curves
 81 |     # Arguments
 82 |         recall:    The recall curve (list)
 83 |         precision: The precision curve (list)
 84 |     # Returns
 85 |         Average precision, precision curve, recall curve
 86 |     """
 87 | 
 88 |     # Append sentinel values to beginning and end
 89 |     mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01]))
 90 |     mpre = np.concatenate(([1.], precision, [0.]))
 91 | 
 92 |     # Compute the precision envelope
 93 |     mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
 94 | 
 95 |     # Integrate area under curve
 96 |     method = 'interp'  # methods: 'continuous', 'interp'
 97 |     if method == 'interp':
 98 |         x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
 99 |         ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
100 |     else:  # 'continuous'
101 |         i = np.where(mrec[1:] != mrec[:-1])[0]  # points where x axis (recall) changes
102 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve
103 | 
104 |     return ap, mpre, mrec
105 | 
106 | 
107 | class ConfusionMatrix:
108 |     # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
109 |     def __init__(self, nc, conf=0.25, iou_thres=0.45):
110 |         self.matrix = np.zeros((nc + 1, nc + 1))
111 |         self.nc = nc  # number of classes
112 |         self.conf = conf
113 |         self.iou_thres = iou_thres
114 | 
115 |     def process_batch(self, detections, labels):
116 |         """
117 |         Return intersection-over-union (Jaccard index) of boxes.
118 |         Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
119 |         Arguments:
120 |             detections (Array[N, 6]), x1, y1, x2, y2, conf, class
121 |             labels (Array[M, 5]), class, x1, y1, x2, y2
122 |         Returns:
123 |             None, updates confusion matrix accordingly
124 |         """
125 |         detections = detections[detections[:, 4] > self.conf]
126 |         gt_classes = labels[:, 0].int()
127 |         detection_classes = detections[:, 5].int()
128 |         iou = general.box_iou(labels[:, 1:], detections[:, :4])
129 | 
130 |         x = torch.where(iou > self.iou_thres)
131 |         if x[0].shape[0]:
132 |             matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
133 |             if x[0].shape[0] > 1:
134 |                 matches = matches[matches[:, 2].argsort()[::-1]]
135 |                 matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
136 |                 matches = matches[matches[:, 2].argsort()[::-1]]
137 |                 matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
138 |         else:
139 |             matches = np.zeros((0, 3))
140 | 
141 |         n = matches.shape[0] > 0
142 |         m0, m1, _ = matches.transpose().astype(np.int16)
143 |         for i, gc in enumerate(gt_classes):
144 |             j = m0 == i
145 |             if n and sum(j) == 1:
146 |                 self.matrix[gc, detection_classes[m1[j]]] += 1  # correct
147 |             else:
148 |                 self.matrix[gc, self.nc] += 1  # background FP
149 | 
150 |         if n:
151 |             for i, dc in enumerate(detection_classes):
152 |                 if not any(m1 == i):
153 |                     self.matrix[self.nc, dc] += 1  # background FN
154 | 
155 |     def matrix(self):
156 |         return self.matrix
157 | 
158 |     def plot(self, save_dir='', names=()):
159 |         try:
160 |             import seaborn as sn
161 | 
162 |             array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6)  # normalize
163 |             array[array < 0.005] = np.nan  # don't annotate (would appear as 0.00)
164 | 
165 |             fig = plt.figure(figsize=(12, 9), tight_layout=True)
166 |             sn.set(font_scale=1.0 if self.nc < 50 else 0.8)  # for label size
167 |             labels = (0 < len(names) < 99) and len(names) == self.nc  # apply names to ticklabels
168 |             sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True,
169 |                        xticklabels=names + ['background FN'] if labels else "auto",
170 |                        yticklabels=names + ['background FP'] if labels else "auto").set_facecolor((1, 1, 1))
171 |             fig.axes[0].set_xlabel('True')
172 |             fig.axes[0].set_ylabel('Predicted')
173 |             fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
174 |         except Exception as e:
175 |             pass
176 | 
177 |     def print(self):
178 |         for i in range(self.nc + 1):
179 |             print(' '.join(map(str, self.matrix[i])))
180 | 
181 | 
182 | # Plots ----------------------------------------------------------------------------------------------------------------
183 | 
184 | def plot_pr_curve(px, py, ap, save_dir='.', names=()):
185 |     fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
186 |     py = np.stack(py, axis=1)
187 | 
188 |     if 0 < len(names) < 21:  # show mAP in legend if < 10 classes
189 |         for i, y in enumerate(py.T):
190 |             ax.plot(px, y, linewidth=1, label=f'{names[i]} %.3f' % ap[i, 0])  # plot(recall, precision)
191 |     else:
192 |         ax.plot(px, py, linewidth=1, color='grey')  # plot(recall, precision)
193 | 
194 |     ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
195 |     ax.set_xlabel('Recall')
196 |     ax.set_ylabel('Precision')
197 |     ax.set_xlim(0, 1)
198 |     ax.set_ylim(0, 1)
199 |     plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
200 |     fig.savefig(Path(save_dir) / 'precision_recall_curve.png', dpi=250)
201 | 


--------------------------------------------------------------------------------
/utils/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/utils/models/__init__.py


--------------------------------------------------------------------------------
/utils/models/experimental.py:
--------------------------------------------------------------------------------
  1 | # This file contains experimental modules
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn as nn
  6 | 
  7 | from models.common import Conv, DWConv
  8 | from utils.google_utils import attempt_download
  9 | 
 10 | 
 11 | class CrossConv(nn.Module):
 12 |     # Cross Convolution Downsample
 13 |     def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
 14 |         # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
 15 |         super(CrossConv, self).__init__()
 16 |         c_ = int(c2 * e)  # hidden channels
 17 |         self.cv1 = Conv(c1, c_, (1, k), (1, s))
 18 |         self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
 19 |         self.add = shortcut and c1 == c2
 20 | 
 21 |     def forward(self, x):
 22 |         return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
 23 | 
 24 | 
 25 | class Sum(nn.Module):
 26 |     # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
 27 |     def __init__(self, n, weight=False):  # n: number of inputs
 28 |         super(Sum, self).__init__()
 29 |         self.weight = weight  # apply weights boolean
 30 |         self.iter = range(n - 1)  # iter object
 31 |         if weight:
 32 |             self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True)  # layer weights
 33 | 
 34 |     def forward(self, x):
 35 |         y = x[0]  # no weight
 36 |         if self.weight:
 37 |             w = torch.sigmoid(self.w) * 2
 38 |             for i in self.iter:
 39 |                 y = y + x[i + 1] * w[i]
 40 |         else:
 41 |             for i in self.iter:
 42 |                 y = y + x[i + 1]
 43 |         return y
 44 | 
 45 | 
 46 | class GhostConv(nn.Module):
 47 |     # Ghost Convolution https://github.com/huawei-noah/ghostnet
 48 |     def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
 49 |         super(GhostConv, self).__init__()
 50 |         c_ = c2 // 2  # hidden channels
 51 |         self.cv1 = Conv(c1, c_, k, s, None, g, act)
 52 |         self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
 53 | 
 54 |     def forward(self, x):
 55 |         y = self.cv1(x)
 56 |         return torch.cat([y, self.cv2(y)], 1)
 57 | 
 58 | 
 59 | class GhostBottleneck(nn.Module):
 60 |     # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
 61 |     def __init__(self, c1, c2, k, s):
 62 |         super(GhostBottleneck, self).__init__()
 63 |         c_ = c2 // 2
 64 |         self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pw
 65 |                                   DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
 66 |                                   GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
 67 |         self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
 68 |                                       Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
 69 | 
 70 |     def forward(self, x):
 71 |         return self.conv(x) + self.shortcut(x)
 72 | 
 73 | 
 74 | class MixConv2d(nn.Module):
 75 |     # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
 76 |     def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
 77 |         super(MixConv2d, self).__init__()
 78 |         groups = len(k)
 79 |         if equal_ch:  # equal c_ per group
 80 |             i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
 81 |             c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
 82 |         else:  # equal weight.numel() per group
 83 |             b = [c2] + [0] * groups
 84 |             a = np.eye(groups + 1, groups, k=-1)
 85 |             a -= np.roll(a, 1, axis=1)
 86 |             a *= np.array(k) ** 2
 87 |             a[0] = 1
 88 |             c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
 89 | 
 90 |         self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
 91 |         self.bn = nn.BatchNorm2d(c2)
 92 |         self.act = nn.LeakyReLU(0.1, inplace=True)
 93 | 
 94 |     def forward(self, x):
 95 |         return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
 96 | 
 97 | 
 98 | class Ensemble(nn.ModuleList):
 99 |     # Ensemble of models
100 |     def __init__(self):
101 |         super(Ensemble, self).__init__()
102 | 
103 |     def forward(self, x, augment=False):
104 |         y = []
105 |         for module in self:
106 |             y.append(module(x, augment)[0])
107 |         # y = torch.stack(y).max(0)[0]  # max ensemble
108 |         # y = torch.cat(y, 1)  # nms ensemble
109 |         y = torch.stack(y).mean(0)  # mean ensemble
110 |         return y, None  # inference, train output
111 | 
112 | 
113 | def attempt_load(weights, map_location=None):
114 |     # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
115 |     model = Ensemble()
116 |     for w in weights if isinstance(weights, list) else [weights]:
117 |         attempt_download(w)
118 |         model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval())  # load FP32 model
119 | 
120 |     # Compatibility updates
121 |     for m in model.modules():
122 |         if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
123 |             m.inplace = True  # pytorch 1.7.0 compatibility
124 |         elif type(m) is Conv:
125 |             m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
126 | 
127 |     if len(model) == 1:
128 |         return model[-1]  # return model
129 |     else:
130 |         print('Ensemble created with %s\n' % weights)
131 |         for k in ['names', 'stride']:
132 |             setattr(model, k, getattr(model[-1], k))
133 |         return model  # return ensemble
134 | 


--------------------------------------------------------------------------------
/utils/models/export.py:
--------------------------------------------------------------------------------
 1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
 2 | 
 3 | Usage:
 4 |     $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
 5 | """
 6 | 
 7 | import argparse
 8 | import sys
 9 | import time
10 | 
11 | sys.path.append('./')  # to run '$ python *.py' files in subdirectories
12 | 
13 | import torch
14 | import torch.nn as nn
15 | 
16 | import models
17 | from models.experimental import attempt_load
18 | from utils.activations import Hardswish, SiLU
19 | from utils.general import set_logging, check_img_size
20 | 
21 | if __name__ == '__main__':
22 |     parser = argparse.ArgumentParser()
23 |     parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')  # from yolov5/models/
24 |     parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
25 |     parser.add_argument('--batch-size', type=int, default=1, help='batch size')
26 |     opt = parser.parse_args()
27 |     opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
28 |     print(opt)
29 |     set_logging()
30 |     t = time.time()
31 | 
32 |     # Load PyTorch model
33 |     model = attempt_load(opt.weights, map_location=torch.device('cpu'))  # load FP32 model
34 |     labels = model.names
35 | 
36 |     # Checks
37 |     gs = int(max(model.stride))  # grid size (max stride)
38 |     opt.img_size = [check_img_size(x, gs) for x in opt.img_size]  # verify img_size are gs-multiples
39 | 
40 |     # Input
41 |     img = torch.zeros(opt.batch_size, 3, *opt.img_size)  # image size(1,3,320,192) iDetection
42 | 
43 |     # Update model
44 |     for k, m in model.named_modules():
45 |         m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
46 |         if isinstance(m, models.common.Conv):  # assign export-friendly activations
47 |             if isinstance(m.act, nn.Hardswish):
48 |                 m.act = Hardswish()
49 |             elif isinstance(m.act, nn.SiLU):
50 |                 m.act = SiLU()
51 |         # elif isinstance(m, models.yolo.Detect):
52 |         #     m.forward = m.forward_export  # assign forward (optional)
53 |     model.model[-1].export = True  # set Detect() layer export=True
54 |     y = model(img)  # dry run
55 | 
56 |     # TorchScript export
57 |     try:
58 |         print('\nStarting TorchScript export with torch %s...' % torch.__version__)
59 |         f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
60 |         ts = torch.jit.trace(model, img)
61 |         ts.save(f)
62 |         print('TorchScript export success, saved as %s' % f)
63 |     except Exception as e:
64 |         print('TorchScript export failure: %s' % e)
65 | 
66 |     # ONNX export
67 |     try:
68 |         import onnx
69 | 
70 |         print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
71 |         f = opt.weights.replace('.pt', '.onnx')  # filename
72 |         torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
73 |                           output_names=['classes', 'boxes'] if y is None else ['output'])
74 | 
75 |         # Checks
76 |         onnx_model = onnx.load(f)  # load onnx model
77 |         onnx.checker.check_model(onnx_model)  # check onnx model
78 |         # print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
79 |         print('ONNX export success, saved as %s' % f)
80 |     except Exception as e:
81 |         print('ONNX export failure: %s' % e)
82 | 
83 |     # CoreML export
84 |     try:
85 |         import coremltools as ct
86 | 
87 |         print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
88 |         # convert model from torchscript and apply pixel scaling as per detect.py
89 |         model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
90 |         f = opt.weights.replace('.pt', '.mlmodel')  # filename
91 |         model.save(f)
92 |         print('CoreML export success, saved as %s' % f)
93 |     except Exception as e:
94 |         print('CoreML export failure: %s' % e)
95 | 
96 |     # Finish
97 |     print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))
98 | 


--------------------------------------------------------------------------------
/utils/models/hub/yolov3-spp.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # darknet53 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
16 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
17 |    [-1, 1, Bottleneck, [64]],
18 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
19 |    [-1, 2, Bottleneck, [128]],
20 |    [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
21 |    [-1, 8, Bottleneck, [256]],
22 |    [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
23 |    [-1, 8, Bottleneck, [512]],
24 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
25 |    [-1, 4, Bottleneck, [1024]],  # 10
26 |   ]
27 | 
28 | # YOLOv3-SPP head
29 | head:
30 |   [[-1, 1, Bottleneck, [1024, False]],
31 |    [-1, 1, SPP, [512, [5, 9, 13]]],
32 |    [-1, 1, Conv, [1024, 3, 1]],
33 |    [-1, 1, Conv, [512, 1, 1]],
34 |    [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
35 | 
36 |    [-2, 1, Conv, [256, 1, 1]],
37 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P4
39 |    [-1, 1, Bottleneck, [512, False]],
40 |    [-1, 1, Bottleneck, [512, False]],
41 |    [-1, 1, Conv, [256, 1, 1]],
42 |    [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
43 | 
44 |    [-2, 1, Conv, [128, 1, 1]],
45 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P3
47 |    [-1, 1, Bottleneck, [256, False]],
48 |    [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
49 | 
50 |    [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
51 |   ]
52 | 


--------------------------------------------------------------------------------
/utils/models/hub/yolov3-tiny.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,14, 23,27, 37,58]  # P4/16
 9 |   - [81,82, 135,169, 344,319]  # P5/32
10 | 
11 | # YOLOv3-tiny backbone
12 | backbone:
13 |   # [from, number, module, args]
14 |   [[-1, 1, Conv, [16, 3, 1]],  # 0
15 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 1-P1/2
16 |    [-1, 1, Conv, [32, 3, 1]],
17 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 3-P2/4
18 |    [-1, 1, Conv, [64, 3, 1]],
19 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 5-P3/8
20 |    [-1, 1, Conv, [128, 3, 1]],
21 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 7-P4/16
22 |    [-1, 1, Conv, [256, 3, 1]],
23 |    [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 9-P5/32
24 |    [-1, 1, Conv, [512, 3, 1]],
25 |    [-1, 1, nn.ZeroPad2d, [0, 1, 0, 1]],  # 11
26 |    [-1, 1, nn.MaxPool2d, [2, 1, 0]],  # 12
27 |   ]
28 | 
29 | # YOLOv3-tiny head
30 | head:
31 |   [[-1, 1, Conv, [1024, 3, 1]],
32 |    [-1, 1, Conv, [256, 1, 1]],
33 |    [-1, 1, Conv, [512, 3, 1]],  # 15 (P5/32-large)
34 | 
35 |    [-2, 1, Conv, [128, 1, 1]],
36 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P4
38 |    [-1, 1, Conv, [256, 3, 1]],  # 19 (P4/16-medium)
39 | 
40 |    [[19, 15], 1, Detect, [nc, anchors]],  # Detect(P4, P5)
41 |   ]
42 | 


--------------------------------------------------------------------------------
/utils/models/hub/yolov3.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # darknet53 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
16 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
17 |    [-1, 1, Bottleneck, [64]],
18 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
19 |    [-1, 2, Bottleneck, [128]],
20 |    [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
21 |    [-1, 8, Bottleneck, [256]],
22 |    [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
23 |    [-1, 8, Bottleneck, [512]],
24 |    [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
25 |    [-1, 4, Bottleneck, [1024]],  # 10
26 |   ]
27 | 
28 | # YOLOv3 head
29 | head:
30 |   [[-1, 1, Bottleneck, [1024, False]],
31 |    [-1, 1, Conv, [512, [1, 1]]],
32 |    [-1, 1, Conv, [1024, 3, 1]],
33 |    [-1, 1, Conv, [512, 1, 1]],
34 |    [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
35 | 
36 |    [-2, 1, Conv, [256, 1, 1]],
37 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 |    [[-1, 8], 1, Concat, [1]],  # cat backbone P4
39 |    [-1, 1, Bottleneck, [512, False]],
40 |    [-1, 1, Bottleneck, [512, False]],
41 |    [-1, 1, Conv, [256, 1, 1]],
42 |    [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
43 | 
44 |    [-2, 1, Conv, [128, 1, 1]],
45 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P3
47 |    [-1, 1, Bottleneck, [256, False]],
48 |    [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
49 | 
50 |    [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
51 |   ]
52 | 


--------------------------------------------------------------------------------
/utils/models/hub/yolov5-fpn.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, Bottleneck, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 6, BottleneckCSP, [1024]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 FPN head
28 | head:
29 |   [[-1, 3, BottleneckCSP, [1024, False]],  # 10 (P5/32-large)
30 | 
31 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
33 |    [-1, 1, Conv, [512, 1, 1]],
34 |    [-1, 3, BottleneckCSP, [512, False]],  # 14 (P4/16-medium)
35 | 
36 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
38 |    [-1, 1, Conv, [256, 1, 1]],
39 |    [-1, 3, BottleneckCSP, [256, False]],  # 18 (P3/8-small)
40 | 
41 |    [[18, 14, 10], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
42 |   ]
43 | 


--------------------------------------------------------------------------------
/utils/models/hub/yolov5-panet.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 PANet head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/utils/models/yolov5l.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.0  # model depth multiple
 4 | width_multiple: 1.0  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/utils/models/yolov5m.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 0.67  # model depth multiple
 4 | width_multiple: 0.75  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/utils/models/yolov5s.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 0.33  # model depth multiple
 4 | width_multiple: 0.50  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/utils/models/yolov5x.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 80  # number of classes
 3 | depth_multiple: 1.33  # model depth multiple
 4 | width_multiple: 1.25  # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [10,13, 16,30, 33,23]  # P3/8
 9 |   - [30,61, 62,45, 59,119]  # P4/16
10 |   - [116,90, 156,198, 373,326]  # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [[-1, 1, Focus, [64, 3]],  # 0-P1/2
16 |    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
17 |    [-1, 3, BottleneckCSP, [128]],
18 |    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
19 |    [-1, 9, BottleneckCSP, [256]],
20 |    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
21 |    [-1, 9, BottleneckCSP, [512]],
22 |    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
23 |    [-1, 1, SPP, [1024, [5, 9, 13]]],
24 |    [-1, 3, BottleneckCSP, [1024, False]],  # 9
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head:
29 |   [[-1, 1, Conv, [512, 1, 1]],
30 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 |    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
32 |    [-1, 3, BottleneckCSP, [512, False]],  # 13
33 | 
34 |    [-1, 1, Conv, [256, 1, 1]],
35 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 |    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
37 |    [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
38 | 
39 |    [-1, 1, Conv, [256, 3, 2]],
40 |    [[-1, 14], 1, Concat, [1]],  # cat head P4
41 |    [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
42 | 
43 |    [-1, 1, Conv, [512, 3, 2]],
44 |    [[-1, 10], 1, Concat, [1]],  # cat head P5
45 |    [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
46 | 
47 |    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/utils/roboflow.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import base64
 3 | import io
 4 | import cv2
 5 | from PIL import Image
 6 | import numpy as np
 7 | 
 8 | 
 9 | def predict_image(image, api_key, url, confidence, overlap, idx):
10 |     retval, buffer = cv2.imencode('.jpg', image)
11 |     img_str = base64.b64encode(buffer)
12 |     img_str = img_str.decode("ascii")
13 | 
14 |     # Construct the URL
15 |     upload_url = "".join([
16 |         url,
17 |         "?api_key=",
18 |         api_key,
19 |         "&confidence=",
20 |         str(confidence),
21 |         "&overlap=",
22 |         str(overlap),
23 |         "&name=",
24 |         str(idx),
25 |         ".jpg"
26 |     ])
27 | 
28 |     # POST to the API
29 |     r = requests.post(upload_url, data=img_str, headers={
30 |         "Content-Type": "application/x-www-form-urlencoded"
31 |     })
32 | 
33 |     json = r.json()
34 | 
35 |     predictions = json["predictions"]
36 |     formatted_predictions = []
37 |     classes = []
38 | 
39 |     for pred in predictions:
40 |         formatted_pred = [pred["x"], pred["y"], pred["width"], pred["height"], pred["confidence"]]
41 | 
42 |         # convert to top-left x/y from center
43 |         formatted_pred[0] -= formatted_pred[2]/2
44 |         formatted_pred[1] -= formatted_pred[3]/2
45 | 
46 |         formatted_predictions.append(formatted_pred)
47 |         classes.append(pred["class"])
48 | 
49 |     #print(formatted_predictions)
50 | 
51 |     return formatted_predictions, classes
52 | 


--------------------------------------------------------------------------------
/utils/yolov4.py:
--------------------------------------------------------------------------------
 1 | from tool.utils import *
 2 | from tool.torch_utils import *
 3 | from tool.darknet2pytorch import Darknet
 4 | from utils.general import non_max_suppression, xyxy2xywh
 5 | import cv2
 6 | import torch
 7 | 
 8 | class Yolov4Engine:
 9 |     def __init__(self, weights, cfgfile, device, names, classes, conf_thres, iou_thres, agnostic_nms, augment, half):
10 |         self.model = Darknet(cfgfile)
11 |         self.model.load_weights(weights[0])
12 |         self.device = device
13 | 
14 |         if self.device != "cpu":
15 |             self.model.cuda()
16 | 
17 |         self.classes = classes
18 |         self.names = load_class_names(names)
19 |         self.conf_thres = conf_thres
20 |         self.iou_thres = iou_thres
21 |         self.augment = augment
22 |         self.agnostic_nms = agnostic_nms
23 | 
24 |     def infer(self, img):
25 |         img_resized = cv2.resize(img, (self.model.width, self.model.height))
26 |         pred = do_detect(self.model, img_resized, self.conf_thres, self.iou_thres, self.device != "cpu")[0]
27 |         return np.array(pred)
28 | 
29 |     def postprocess(self, pred, img_shape):
30 |         height = img_shape[0]
31 |         width = img_shape[1]
32 |         classes = pred[:, 6].tolist()
33 |         for i, cls in enumerate(classes):
34 |             classes[i] = self.names[int(cls)]
35 | 
36 |         dets = pred[:, :5]
37 |         for i, det in enumerate(dets):
38 |             box = det
39 |             x1 = int(box[0] * width)
40 |             y1 = int(box[1] * height)
41 |             x2 = int(box[2] * width)
42 |             y2 = int(box[3] * height)
43 |             newDet = [x1,y2-(y2-y1),x2-x1,y2-y1,det[4]]
44 |             dets[i] = newDet
45 |         return pred, classes
46 | 
47 | 
48 |     def nms(self, pred):
49 |         out = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms)
50 |         return out
51 | 


--------------------------------------------------------------------------------
/utils/yolov5.py:
--------------------------------------------------------------------------------
 1 | from models.experimental import attempt_load
 2 | from utils.general import non_max_suppression
 3 | 
 4 | class Yolov5Engine:
 5 |     def __init__(self, weights, device, classes, conf_thres, iou_thres, agnostic_nms, augment, half):
 6 |         self.model = attempt_load(weights, map_location=device)
 7 |         if half:
 8 |             self.model.half()
 9 |         self.classes = classes
10 |         self.conf_thres = conf_thres
11 |         self.iou_thres = iou_thres
12 |         self.augment = augment
13 |         self.agnostic_nms = agnostic_nms
14 | 
15 |     def infer(self, img):
16 |         pred = self.model(img, augment=self.augment)[0]
17 |         pred = self.nms(pred)
18 |         return pred
19 | 
20 |     def nms(self, pred):
21 |         out = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms)
22 |         return out
23 | 
24 |     def get_names(self):
25 |         return self.model.module.names if hasattr(self.model, 'module') else self.model.names


--------------------------------------------------------------------------------
/utils/yolov7.py:
--------------------------------------------------------------------------------
 1 | from models.experimental import attempt_load
 2 | from utils.general import non_max_suppression
 3 | 
 4 | class Yolov7Engine:
 5 |     def __init__(self, weights, device, classes, conf_thres, iou_thres, agnostic_nms, augment, half):
 6 |         self.model = attempt_load(weights, map_location=device)
 7 |         if half:
 8 |             self.model.half()
 9 |         self.classes = classes
10 |         self.conf_thres = conf_thres
11 |         self.iou_thres = iou_thres
12 |         self.augment = augment
13 |         self.agnostic_nms = agnostic_nms
14 | 
15 |     def infer(self, img):
16 |         pred = self.model(img, augment=self.augment)[0]
17 |         pred = self.nms(pred)
18 |         return pred
19 | 
20 |     def nms(self, pred):
21 |         out = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms)
22 |         return out
23 | 
24 |     def get_names(self):
25 |         return self.model.module.names if hasattr(self.model, 'module') else self.model.names


--------------------------------------------------------------------------------