├── .gitignore
├── LICENSE
├── README.md
├── app.py
├── asserts
    ├── demo1.gif
    ├── demo1.mp4
    ├── demo2.gif
    ├── demo2.mp4
    ├── demo3.gif
    ├── demo3.mp4
    └── test.mp4
├── config.py
├── config
    ├── coco.data
    ├── coco.names
    ├── yolov3-tiny.cfg
    └── yolov3.cfg
├── counter.py
├── gui.py
├── gui.ui
├── models.py
├── predict.py
├── requirements.txt
└── utils
    ├── .config
    ├── __init__.py
    ├── augmentations.py
    ├── datasets.py
    ├── logger.py
    ├── parse_config.py
    ├── sort.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | __pycache__/
3 | weights/*
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 wsh122333
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Multi-type_vehicles_flow_statistics
 2 | According to YOLOv3 and SORT algorithms, counting multi-type vehicles. Implemented by Pytorch.  
 3 | Detecting and tracking the vehicles in \["bicycle","bus","car","motorbike","truck"].
 4 | 
 5 | ## Reference
 6 | - yolov3-darknet  https://github.com/pjreddie/darknet
 7 | - yolov3-pytorch  https://github.com/eriklindernoren/PyTorch-YOLOv3
 8 | - sort https://github.com/abewley/sort
 9 | 
10 | ## Dependencies
11 | - ubuntu/windows
12 | - cuda>=10.0
13 | - python>=3.6
14 | - `pip3 install -r requirements.txt`
15 | 
16 | ## Usage
17 | 1. Download the pre-trained yolov3 weight file [here](https://pjreddie.com/media/files/yolov3.weights) and put it into `weights` directory;  
18 | 2. Run `python3 app.py` ;
19 | 3. Select video and double click the image to select area, and then start;
20 | 4. After detecting and tracking, the result video and file are saved under `results` directory, the line of `results.txt` with format \[videoName,id,objectName] for each vehicle.
21 | 
22 | ## Demo
23 | ![avatar](https://github.com/wsh122333/Multi-type_vehicles_flow_statistics/raw/master/asserts/demo1.gif)
24 | 
25 | ![avatar](https://github.com/wsh122333/Multi-type_vehicles_flow_statistics/raw/master/asserts/demo2.gif)
26 | 
27 | ![avatar](https://github.com/wsh122333/Multi-type_vehicles_flow_statistics/raw/master/asserts/demo3.gif)
28 | 
29 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | from PyQt5.QtWidgets import QApplication, QMainWindow, QFileDialog
  3 | from PyQt5.QtGui import QImage, QPixmap
  4 | from gui import *
  5 | import copy
  6 | from counter import CounterThread
  7 | from utils.sort import *
  8 | from models import *
  9 | from utils.utils import *
 10 | from utils.datasets import *
 11 | from config import *
 12 | 
 13 | class App(QMainWindow,Ui_mainWindow):
 14 |     def __init__(self):
 15 |         super(App,self).__init__()
 16 |         self.setupUi(self)
 17 |         self.label_image_size = (self.label_image.geometry().width(),self.label_image.geometry().height())
 18 |         self.video = None
 19 |         self.exampleImage = None
 20 |         self.imgScale = None
 21 |         self.get_points_flag = 0
 22 |         self.countArea = []
 23 |         self.road_code = None
 24 |         self.time_code = None
 25 |         self.show_label = names
 26 | 
 27 |         #button function
 28 |         self.pushButton_selectArea.clicked.connect(self.select_area)
 29 |         self.pushButton_openVideo.clicked.connect(self.open_video)
 30 |         self.pushButton_start.clicked.connect(self.start_count)
 31 |         self.pushButton_pause.clicked.connect(self.pause)
 32 |         self.label_image.mouseDoubleClickEvent = self.get_points
 33 | 
 34 | 
 35 |         self.pushButton_selectArea.setEnabled(False)
 36 |         self.pushButton_start.setEnabled(False)
 37 |         self.pushButton_pause.setEnabled(False)
 38 | 
 39 |         #some flags
 40 |         self.running_flag = 0
 41 |         self.pause_flag = 0
 42 |         self.counter_thread_start_flag = 0
 43 | 
 44 |         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 45 | 
 46 | 
 47 |         data_config = "config/coco.data"
 48 |         weights_path = "weights/yolov3.weights"
 49 |         model_def = "config/yolov3.cfg"
 50 |         data_config = parse_data_config(data_config)
 51 |         self.yolo_class_names = load_classes(data_config["names"])
 52 | 
 53 |         # Initiate model
 54 |         print("Loading model ...")
 55 |         self.yolo_model = Darknet(model_def).to(self.device)
 56 |         if weights_path.endswith(".weights"):
 57 |             # Load darknet weights
 58 |             self.yolo_model.load_darknet_weights(weights_path)
 59 |         else:
 60 |             # Load checkpoint weights
 61 |             self.yolo_model.load_state_dict(torch.load(weights_path))
 62 | 
 63 | 
 64 |         # counter Thread
 65 |         self.counterThread = CounterThread(self.yolo_model,self.yolo_class_names,self.device)
 66 |         self.counterThread.sin_counterResult.connect(self.show_image_label)
 67 |         self.counterThread.sin_done.connect(self.done)
 68 |         self.counterThread.sin_counter_results.connect(self.update_counter_results)
 69 | 
 70 | 
 71 | 
 72 |     def open_video(self):
 73 |         openfile_name = QFileDialog.getOpenFileName(self,'Open video','','Video files(*.avi , *.mp4)')
 74 |         self.videoList = [openfile_name[0]]
 75 | 
 76 |         # opendir_name = QFileDialog.getExistingDirectory(self, "Open dir", "./")
 77 |         # self.videoList = [os.path.join(opendir_name,item) for item in os.listdir(opendir_name)]
 78 |         # self.videoList = list(filter(lambda x: not os.path.isdir(x) , self.videoList))
 79 |         # self.videoList.sort()
 80 | 
 81 |         vid = cv2.VideoCapture(self.videoList[0])
 82 | 
 83 |         # self.videoWriter = cv2.VideoWriter(openfile_name[0].split("/")[-1], cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'), 10, (1920, 1080))
 84 | 
 85 |         while vid.isOpened():
 86 |             ret, frame = vid.read()
 87 |             if ret:
 88 |                 self.exampleImage = frame
 89 |                 self.show_image_label(frame)
 90 |                 self.imgScale = np.array(frame.shape[:2]) / [self.label_image_size[1], self.label_image_size[0]]
 91 |                 vid.release()
 92 |                 break
 93 | 
 94 |         self.pushButton_selectArea.setEnabled(True)
 95 |         self.pushButton_start.setText("Start")
 96 |         self.pushButton_start.setEnabled(False)
 97 |         self.pushButton_pause.setText("Pause")
 98 |         self.pushButton_pause.setEnabled(False)
 99 | 
100 |         #clear counting results
101 |         KalmanBoxTracker.count = 0
102 |         self.label_sum.setText("0")
103 |         self.label_sum.repaint()
104 | 
105 | 
106 |     def get_points(self, event):
107 |         if self.get_points_flag:
108 |             x = event.x()
109 |             y = event.y()
110 |             self.countArea.append([int(x*self.imgScale[1]),int(y*self.imgScale[0])])
111 |             exampleImageWithArea = copy.deepcopy(self.exampleImage)
112 |             for point in self.countArea:
113 |                 exampleImageWithArea[point[1]-10:point[1]+10,point[0]-10:point[0]+10] = (0,255,255)
114 |             cv2.fillConvexPoly(exampleImageWithArea, np.array(self.countArea), (0,0,255))
115 |             self.show_image_label(exampleImageWithArea)
116 |         print(self.countArea)
117 | 
118 | 
119 |     def select_area(self):
120 | 
121 |         #change Area needs update exampleImage
122 |         if self.counter_thread_start_flag:
123 |             ret, frame = self.videoCapture.read()
124 |             if ret:
125 |                 self.exampleImage = frame
126 |                 self.show_image_label(frame)
127 | 
128 |         if not self.get_points_flag:
129 |             self.pushButton_selectArea.setText("Submit Area")
130 |             self.get_points_flag = 1
131 |             self.countArea = []
132 |             self.pushButton_openVideo.setEnabled(False)
133 |             self.pushButton_start.setEnabled(False)
134 | 
135 |         else:
136 |             self.pushButton_selectArea.setText("Select Area")
137 |             self.get_points_flag = 0
138 |             exampleImage = copy.deepcopy(self.exampleImage)
139 |             # painting area
140 |             for i in range(len(self.countArea)):
141 |                 cv2.line(exampleImage, tuple(self.countArea[i]), tuple(self.countArea[(i + 1) % (len(self.countArea))]), (0, 0, 255), 2)
142 |             self.show_image_label(exampleImage)
143 | 
144 |             #enable start button
145 |             self.pushButton_openVideo.setEnabled(True)
146 |             self.pushButton_start.setEnabled(True)
147 | 
148 | 
149 |     def show_image_label(self, img_np):
150 |         img_np = cv2.cvtColor(img_np,cv2.COLOR_BGR2RGB)
151 |         img_np = cv2.resize(img_np, self.label_image_size)
152 |         frame = QImage(img_np, self.label_image_size[0], self.label_image_size[1], QImage.Format_RGB888)
153 |         pix = QPixmap.fromImage(frame)
154 |         self.label_image.setPixmap(pix)
155 |         self.label_image.repaint()
156 | 
157 |     def start_count(self):
158 |         if self.running_flag == 0:
159 |             #clear count and display
160 |             KalmanBoxTracker.count = 0
161 |             for item in self.show_label:
162 |                 vars(self)[f"label_{item}"].setText('0')
163 |             # clear result file
164 |             with open("results/results.txt", "w") as f:
165 |                 pass
166 | 
167 |             #start
168 |             self.running_flag = 1
169 |             self.pause_flag = 0
170 |             self.pushButton_start.setText("Stop")
171 |             self.pushButton_openVideo.setEnabled(False)
172 |             self.pushButton_selectArea.setEnabled(False)
173 |             #emit new parameter to counter thread
174 |             self.counterThread.sin_runningFlag.emit(self.running_flag)
175 |             self.counterThread.sin_countArea.emit(self.countArea)
176 |             self.counterThread.sin_videoList.emit(self.videoList)
177 |             #start counter thread
178 |             self.counterThread.start()
179 | 
180 |             self.pushButton_pause.setEnabled(True)
181 | 
182 | 
183 |         elif self.running_flag == 1:  #push pause button
184 |             #stop system
185 |             self.running_flag = 0
186 |             self.counterThread.sin_runningFlag.emit(self.running_flag)
187 |             self.pushButton_openVideo.setEnabled(True)
188 |             self.pushButton_selectArea.setEnabled(True)
189 |             self.pushButton_start.setText("Start")
190 | 
191 | 
192 | 
193 |     def done(self,sin):
194 |         if sin == 1:
195 |             self.pushButton_openVideo.setEnabled(True)
196 |             self.pushButton_start.setEnabled(False)
197 |             self.pushButton_start.setText("Start")
198 | 
199 | 
200 |     def update_counter_results(self,counter_results):
201 |         with open("results/results.txt", "a") as f:
202 |             for i, result in enumerate(counter_results):
203 |                 label_var = vars(self)[f"label_{result[2]}"]
204 |                 label_var.setText(str(int(label_var.text())+1))
205 |                 label_var.repaint()
206 |                 label_sum_var = vars(self)[f"label_sum"]
207 |                 label_sum_var.setText(str(int(label_sum_var.text()) + 1))
208 |                 label_sum_var.repaint()
209 |                 f.writelines(' '.join(map(lambda x: str(x),result)))
210 |                 f.write(("\n"))
211 |         # print("************************************************",len(counter_results))
212 | 
213 | 
214 |     def pause(self):
215 |         if self.pause_flag == 0:
216 |             self.pause_flag = 1
217 |             self.pushButton_pause.setText("Continue")
218 |             self.pushButton_start.setEnabled(False)
219 |         else:
220 |             self.pause_flag = 0
221 |             self.pushButton_pause.setText("Pause")
222 |             self.pushButton_start.setEnabled(True)
223 | 
224 |         self.counterThread.sin_pauseFlag.emit(self.pause_flag)
225 | 
226 | 
227 | if __name__ == '__main__':
228 |     app = QApplication(sys.argv)
229 |     myWin = App()
230 |     myWin.show()
231 |     sys.exit(app.exec_())
232 | 


--------------------------------------------------------------------------------
/asserts/demo1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo1.gif


--------------------------------------------------------------------------------
/asserts/demo1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo1.mp4


--------------------------------------------------------------------------------
/asserts/demo2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo2.gif


--------------------------------------------------------------------------------
/asserts/demo2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo2.mp4


--------------------------------------------------------------------------------
/asserts/demo3.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo3.gif


--------------------------------------------------------------------------------
/asserts/demo3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo3.mp4


--------------------------------------------------------------------------------
/asserts/test.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/test.mp4


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | names = ["bicycle","bus","car","motorbike","truck"]
2 | color_dict = {"bicycle": (179, 52, 255),
3 |               "bus": (255, 191, 0),
4 |               "car": (127, 255, 0),
5 |               "motorbike": (0, 140, 255),
6 |               "truck": (0, 215, 255)}


--------------------------------------------------------------------------------
/config/coco.data:
--------------------------------------------------------------------------------
1 | classes= 80
2 | train=data/train.txt
3 | valid=data/valid.txt
4 | names=config/coco.names
5 | backup=backup/
6 | 


--------------------------------------------------------------------------------
/config/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/config/yolov3-tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | # 0
 26 | [convolutional]
 27 | batch_normalize=1
 28 | filters=16
 29 | size=3
 30 | stride=1
 31 | pad=1
 32 | activation=leaky
 33 | 
 34 | # 1
 35 | [maxpool]
 36 | size=2
 37 | stride=2
 38 | 
 39 | # 2
 40 | [convolutional]
 41 | batch_normalize=1
 42 | filters=32
 43 | size=3
 44 | stride=1
 45 | pad=1
 46 | activation=leaky
 47 | 
 48 | # 3
 49 | [maxpool]
 50 | size=2
 51 | stride=2
 52 | 
 53 | # 4
 54 | [convolutional]
 55 | batch_normalize=1
 56 | filters=64
 57 | size=3
 58 | stride=1
 59 | pad=1
 60 | activation=leaky
 61 | 
 62 | # 5
 63 | [maxpool]
 64 | size=2
 65 | stride=2
 66 | 
 67 | # 6
 68 | [convolutional]
 69 | batch_normalize=1
 70 | filters=128
 71 | size=3
 72 | stride=1
 73 | pad=1
 74 | activation=leaky
 75 | 
 76 | # 7
 77 | [maxpool]
 78 | size=2
 79 | stride=2
 80 | 
 81 | # 8
 82 | [convolutional]
 83 | batch_normalize=1
 84 | filters=256
 85 | size=3
 86 | stride=1
 87 | pad=1
 88 | activation=leaky
 89 | 
 90 | # 9
 91 | [maxpool]
 92 | size=2
 93 | stride=2
 94 | 
 95 | # 10
 96 | [convolutional]
 97 | batch_normalize=1
 98 | filters=512
 99 | size=3
100 | stride=1
101 | pad=1
102 | activation=leaky
103 | 
104 | # 11
105 | [maxpool]
106 | size=2
107 | stride=1
108 | 
109 | # 12
110 | [convolutional]
111 | batch_normalize=1
112 | filters=1024
113 | size=3
114 | stride=1
115 | pad=1
116 | activation=leaky
117 | 
118 | ###########
119 | 
120 | # 13
121 | [convolutional]
122 | batch_normalize=1
123 | filters=256
124 | size=1
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | # 14
130 | [convolutional]
131 | batch_normalize=1
132 | filters=512
133 | size=3
134 | stride=1
135 | pad=1
136 | activation=leaky
137 | 
138 | # 15
139 | [convolutional]
140 | size=1
141 | stride=1
142 | pad=1
143 | filters=255
144 | activation=linear
145 | 
146 | 
147 | 
148 | # 16
149 | [yolo]
150 | mask = 3,4,5
151 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
152 | classes=80
153 | num=6
154 | jitter=.3
155 | ignore_thresh = .7
156 | truth_thresh = 1
157 | random=1
158 | 
159 | # 17
160 | [route]
161 | layers = -4
162 | 
163 | # 18
164 | [convolutional]
165 | batch_normalize=1
166 | filters=128
167 | size=1
168 | stride=1
169 | pad=1
170 | activation=leaky
171 | 
172 | # 19
173 | [upsample]
174 | stride=2
175 | 
176 | # 20
177 | [route]
178 | layers = -1, 8
179 | 
180 | # 21
181 | [convolutional]
182 | batch_normalize=1
183 | filters=256
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | # 22
190 | [convolutional]
191 | size=1
192 | stride=1
193 | pad=1
194 | filters=255
195 | activation=linear
196 | 
197 | # 23
198 | [yolo]
199 | mask = 1,2,3
200 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
201 | classes=80
202 | num=6
203 | jitter=.3
204 | ignore_thresh = .7
205 | truth_thresh = 1
206 | random=1
207 | 


--------------------------------------------------------------------------------
/config/yolov3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=16
  7 | subdivisions=1
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=255
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | classes=80
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=255
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
696 | classes=80
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=255
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
783 | classes=80
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 | 


--------------------------------------------------------------------------------
/counter.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | import cv2
  3 | from utils.sort import *
  4 | from PyQt5.QtCore import  QThread, pyqtSignal
  5 | import predict
  6 | from config import *
  7 | 
  8 | class CounterThread(QThread):
  9 |     sin_counterResult = pyqtSignal(np.ndarray)
 10 |     sin_runningFlag = pyqtSignal(int)
 11 |     sin_videoList = pyqtSignal(list)
 12 |     sin_countArea = pyqtSignal(list)
 13 |     sin_done = pyqtSignal(int)
 14 |     sin_counter_results = pyqtSignal(list)
 15 |     sin_pauseFlag = pyqtSignal(int)
 16 | 
 17 |     def __init__(self,model,class_names,device):
 18 |         super(CounterThread,self).__init__()
 19 | 
 20 |         self.model = model
 21 |         self.class_names = class_names
 22 |         self.device = device
 23 | 
 24 |         self.permission = names
 25 | 
 26 |         self.colorDict = color_dict
 27 | 
 28 |         # create instance of SORT
 29 |         self.mot_tracker = Sort(max_age=10, min_hits=2)
 30 |         self.countArea = None
 31 |         self.running_flag = 0
 32 |         self.pause_flag = 0
 33 |         self.videoList = []
 34 |         self.last_max_id = 0
 35 |         self.history = {}  #save history
 36 |         #history = {id:{"no_update_count": int, "his": list}}
 37 | 
 38 |         self.sin_runningFlag.connect(self.update_flag)
 39 |         self.sin_videoList.connect(self.update_videoList)
 40 |         self.sin_countArea.connect(self.update_countArea)
 41 |         self.sin_pauseFlag.connect(self.update_pauseFlag)
 42 | 
 43 |         self.save_dir = "results"
 44 |         if not os.path.exists(self.save_dir): os.makedirs(self.save_dir)
 45 | 
 46 |     def run(self):
 47 |         for video in self.videoList:
 48 |             self.last_max_id = 0
 49 |             cap = cv2.VideoCapture(video)
 50 |             out =  cv2.VideoWriter(os.path.join(self.save_dir,video.split("/")[-1]), cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'), 10, (1920, 1080))
 51 |             frame_count = 0
 52 |             while cap.isOpened():
 53 |                 # print(frame_count)
 54 |                 if self.running_flag:
 55 |                     if not self.pause_flag:
 56 |                         ret, frame = cap.read()
 57 |                         if ret:
 58 |                             if frame_count % 3 == 0:
 59 |                                 a1 = time.time()
 60 |                                 frame = self.counter(self.permission, self.colorDict, frame,np.array(self.countArea), self.mot_tracker, video)
 61 |                                 self.sin_counterResult.emit(frame)
 62 | 
 63 |                                 out.write(frame)
 64 |                                 a2 = time.time()
 65 |                                 print(f"fps: {1 / (a2 - a1):.2f}")
 66 |                             frame_count += 1
 67 |                         else:
 68 |                             break
 69 |                     else:
 70 |                         time.sleep(0.1)
 71 |                 else:
 72 |                     break
 73 | 
 74 |             #restart count for each video
 75 |             KalmanBoxTracker.count = 0
 76 |             cap.release()
 77 |             out.release()
 78 | 
 79 |             if not self.running_flag:
 80 |                 break
 81 | 
 82 |         if self.running_flag:
 83 |             self.sin_done.emit(1)
 84 | 
 85 |     def update_pauseFlag(self,flag):
 86 |         self.pause_flag = flag
 87 | 
 88 |     def update_flag(self,flag):
 89 |         self.running_flag = flag
 90 | 
 91 |     def update_videoList(self, videoList):
 92 |         print("Update videoList!")
 93 |         self.videoList = videoList
 94 | 
 95 |     def update_countArea(self,Area):
 96 |         print("Update countArea!")
 97 |         self.countArea = Area
 98 | 
 99 |     def counter(self, permission, colorDict, frame, CountArea, mot_tracker, videoName):
100 | 
101 |         # painting area
102 |         AreaBound = [min(CountArea[:, 0]), min(CountArea[:, 1]), max(CountArea[:, 0]), max(CountArea[:, 1])]
103 |         painting = np.zeros((AreaBound[3] - AreaBound[1], AreaBound[2] - AreaBound[0]), dtype=np.uint8)
104 |         CountArea_mini = CountArea - AreaBound[0:2]
105 |         cv2.fillConvexPoly(painting, CountArea_mini, (1,))
106 | 
107 |         objects = predict.yolo_prediction(self.model,self.device,frame,self.class_names)
108 |         objects = filter(lambda x: x[0] in permission, objects)
109 |         objects = filter(lambda x: x[1] > 0.5,objects)
110 |         objects = list(filter(lambda x: pointInCountArea(painting, AreaBound, [int(x[2][0]), int(x[2][1] + x[2][3] / 2)]),objects))
111 | 
112 |         #filter out repeat bbox
113 |         objects = filiter_out_repeat(objects)
114 | 
115 |         detections = []
116 |         for item in objects:
117 |             detections.append([int(item[2][0] - item[2][2] / 2),
118 |                                int(item[2][1] - item[2][3] / 2),
119 |                                int(item[2][0] + item[2][2] / 2),
120 |                                int(item[2][1] + item[2][3] / 2),
121 |                                item[1]])
122 |         track_bbs_ids = mot_tracker.update(np.array(detections))
123 | 
124 |         # for i, item in enumerate(objects):
125 |         #     # x1,y1,x2,y2,id = list(map(lambda x :int(x),item))
126 |         #     # id_log.add(id)
127 |         #     # objectName = get_objName(item, objects)
128 |         #
129 |         #     objectName, province, objectBox = item
130 |         #     x, y, w, h = objectBox
131 |         #     x1, y1, x2, y2 = int(x - w / 2), int(y - h / 2), int(x + w / 2), int(y + h / 2)
132 |         #
133 |         #     boxColor = colorDict[objectName]
134 |         #     cv2.rectangle(frame, (x1, y1), (x2, y2), boxColor, thickness=2)
135 |         #     cv2.putText(frame, objectName, (x1 - 1, y1 - 3), cv2.FONT_HERSHEY_COMPLEX, 0.7, boxColor,
136 |         #                 thickness=2)
137 | 
138 |         # painting area
139 |         for i in range(len(CountArea)):
140 |             cv2.line(frame, tuple(CountArea[i]), tuple(CountArea[(i + 1) % (len(CountArea))]), (0, 0, 255), 2)
141 | 
142 |         if len(track_bbs_ids) > 0:
143 |             for bb in track_bbs_ids:    #add all bbox to history
144 |                 id = int(bb[-1])
145 |                 objectName = get_objName(bb, objects)
146 |                 if id not in self.history.keys():  #add new id
147 |                     self.history[id] = {}
148 |                     self.history[id]["no_update_count"] = 0
149 |                     self.history[id]["his"] = []
150 |                     self.history[id]["his"].append(objectName)
151 |                 else:
152 |                     self.history[id]["no_update_count"] = 0
153 |                     self.history[id]["his"].append(objectName)
154 | 
155 |         for i, item in enumerate(track_bbs_ids):
156 |             bb = list(map(lambda x: int(x), item))
157 |             id = bb[-1]
158 |             x1, y1, x2, y2 = bb[:4]
159 | 
160 |             his = self.history[id]["his"]
161 |             result = {}
162 |             for i in set(his):
163 |                 result[i] = his.count(i)
164 |             res = sorted(result.items(), key=lambda d: d[1], reverse=True)
165 |             objectName = res[0][0]
166 | 
167 |             boxColor = colorDict[objectName]
168 |             cv2.rectangle(frame, (x1, y1), (x2, y2), boxColor, thickness=2)
169 |             cv2.putText(frame, str(id) + "_" + objectName, (x1 - 1, y1 - 3), cv2.FONT_HERSHEY_COMPLEX, 0.7,
170 |                         boxColor,
171 |                         thickness=2)
172 | 
173 | 
174 |         counter_results = []
175 |         videoName = videoName.split('/')[-1]
176 |         removed_id_list = []
177 |         for id in self.history.keys():    #extract id after tracking
178 |             self.history[id]["no_update_count"] += 1
179 |             if  self.history[id]["no_update_count"] > 5:
180 |                 his = self.history[id]["his"]
181 |                 result = {}
182 |                 for i in set(his):
183 |                     result[i] = his.count(i)
184 |                 res = sorted(result.items(), key=lambda d: d[1], reverse=True)
185 |                 objectName = res[0][0]
186 |                 counter_results.append([videoName,id,objectName])
187 |                 #del id
188 |                 removed_id_list.append(id)
189 | 
190 |         for id in removed_id_list:
191 |             _ = self.history.pop(id)
192 | 
193 |         if len(counter_results):
194 |             self.sin_counter_results.emit(counter_results)
195 | 
196 |         # print(self.history)
197 | 
198 | 
199 |         return frame
200 | 
201 |     def emit_timeCode(self,time_code):
202 |         self.sin_timeCode.emit(time_code)
203 | 
204 | def getTwoDimensionListIndex(L,value,pos):
205 |     for i in range(len(L)):
206 |         if L[i][pos] == value:
207 |             return i
208 |     return -1
209 | 
210 | def filiter_out_repeat(objects):
211 |     objects = sorted(objects,key=lambda x: x[1])
212 |     l = len(objects)
213 |     new_objects = []
214 |     if l > 1:
215 |         for i in range(l-1):
216 |             flag = 0
217 |             for j in range(i+1,l):
218 |                 x_i, y_i, w_i, h_i = objects[i][2]
219 |                 x_j, y_j, w_j, h_j = objects[j][2]
220 |                 box1 = [int(x_i - w_i / 2), int(y_i - h_i / 2), int(x_i + w_i / 2), int(y_i + h_i / 2)]
221 |                 box2 = [int(x_j - w_j / 2), int(y_j - h_j / 2), int(x_j + w_j / 2), int(y_j + h_j / 2)]
222 |                 if cal_iou(box1,box2) >= 0.7:
223 |                     flag = 1
224 |                     break
225 |             #if no repeat
226 |             if not flag:
227 |                 new_objects.append(objects[i])
228 |         #add the last one
229 |         new_objects.append(objects[-1])
230 |     else:
231 |         return objects
232 | 
233 |     return list(tuple(new_objects))
234 | 
235 | 
236 | def cal_iou(box1,box2):
237 |     x1 = max(box1[0],box2[0])
238 |     y1 = max(box1[1],box2[1])
239 |     x2 = min(box1[2],box2[2])
240 |     y2 = min(box1[3],box2[3])
241 |     i = max(0,(x2-x1))*max(0,(y2-y1))
242 |     u = (box1[2]-box1[0])*(box1[3]-box1[1]) + (box2[2]-box2[0])*(box2[3]-box2[1]) -  i
243 |     iou = float(i)/float(u)
244 |     return iou
245 | 
246 | def get_objName(item,objects):
247 |     iou_list = []
248 |     for i,object in enumerate(objects):
249 |         x, y, w, h = object[2]
250 |         x1, y1, x2, y2 = int(x - w / 2), int(y - h / 2), int(x + w / 2), int(y + h / 2)
251 |         iou_list.append(cal_iou(item[:4],[x1,y1,x2,y2]))
252 |     max_index = iou_list.index(max(iou_list))
253 |     return objects[max_index][0]
254 | 
255 | def pointInCountArea(painting, AreaBound, point):
256 |     h,w = painting.shape[:2]
257 |     point = np.array(point)
258 |     point = point - AreaBound[:2]
259 |     if point[0] < 0 or point[1] < 0 or point[0] >= w or point[1] >= h:
260 |         return 0
261 |     else:
262 |         return painting[point[1],point[0]]
263 | 
264 | 
265 | 
266 | 
267 | 
268 | 


--------------------------------------------------------------------------------
/gui.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Form implementation generated from reading ui file 'gui.ui'
  4 | #
  5 | # Created by: PyQt5 UI code generator 5.13.1
  6 | #
  7 | # WARNING! All changes made in this file will be lost!
  8 | 
  9 | 
 10 | from PyQt5 import QtCore, QtGui, QtWidgets
 11 | 
 12 | 
 13 | class Ui_mainWindow(object):
 14 |     def setupUi(self, mainWindow):
 15 |         mainWindow.setObjectName("mainWindow")
 16 |         mainWindow.resize(1203, 554)
 17 |         self.centralwidget = QtWidgets.QWidget(mainWindow)
 18 |         self.centralwidget.setObjectName("centralwidget")
 19 |         self.groupBox_count = QtWidgets.QGroupBox(self.centralwidget)
 20 |         self.groupBox_count.setGeometry(QtCore.QRect(990, 10, 211, 341))
 21 |         self.groupBox_count.setObjectName("groupBox_count")
 22 |         self.verticalLayout_2 = QtWidgets.QVBoxLayout(self.groupBox_count)
 23 |         self.verticalLayout_2.setObjectName("verticalLayout_2")
 24 |         self.gridLayout_count = QtWidgets.QGridLayout()
 25 |         self.gridLayout_count.setContentsMargins(2, 2, 2, 2)
 26 |         self.gridLayout_count.setSpacing(6)
 27 |         self.gridLayout_count.setObjectName("gridLayout_count")
 28 |         self.label_truck = QtWidgets.QLabel(self.groupBox_count)
 29 |         self.label_truck.setObjectName("label_truck")
 30 |         self.gridLayout_count.addWidget(self.label_truck, 2, 1, 1, 1, QtCore.Qt.AlignHCenter)
 31 |         self.label_7 = QtWidgets.QLabel(self.groupBox_count)
 32 |         self.label_7.setObjectName("label_7")
 33 |         self.gridLayout_count.addWidget(self.label_7, 4, 0, 1, 1, QtCore.Qt.AlignHCenter)
 34 |         self.label_5 = QtWidgets.QLabel(self.groupBox_count)
 35 |         self.label_5.setObjectName("label_5")
 36 |         self.gridLayout_count.addWidget(self.label_5, 2, 0, 1, 1, QtCore.Qt.AlignHCenter)
 37 |         self.label_6 = QtWidgets.QLabel(self.groupBox_count)
 38 |         self.label_6.setObjectName("label_6")
 39 |         self.gridLayout_count.addWidget(self.label_6, 3, 0, 1, 1, QtCore.Qt.AlignHCenter)
 40 |         self.label_motorbike = QtWidgets.QLabel(self.groupBox_count)
 41 |         self.label_motorbike.setObjectName("label_motorbike")
 42 |         self.gridLayout_count.addWidget(self.label_motorbike, 3, 1, 1, 1, QtCore.Qt.AlignHCenter)
 43 |         self.label_bus = QtWidgets.QLabel(self.groupBox_count)
 44 |         self.label_bus.setObjectName("label_bus")
 45 |         self.gridLayout_count.addWidget(self.label_bus, 1, 1, 1, 1, QtCore.Qt.AlignHCenter)
 46 |         self.label_bicycle = QtWidgets.QLabel(self.groupBox_count)
 47 |         self.label_bicycle.setObjectName("label_bicycle")
 48 |         self.gridLayout_count.addWidget(self.label_bicycle, 4, 1, 1, 1, QtCore.Qt.AlignHCenter)
 49 |         self.label_12 = QtWidgets.QLabel(self.groupBox_count)
 50 |         self.label_12.setObjectName("label_12")
 51 |         self.gridLayout_count.addWidget(self.label_12, 5, 0, 1, 1, QtCore.Qt.AlignHCenter)
 52 |         self.label_3 = QtWidgets.QLabel(self.groupBox_count)
 53 |         self.label_3.setObjectName("label_3")
 54 |         self.gridLayout_count.addWidget(self.label_3, 0, 0, 1, 1, QtCore.Qt.AlignHCenter)
 55 |         self.label_sum = QtWidgets.QLabel(self.groupBox_count)
 56 |         self.label_sum.setObjectName("label_sum")
 57 |         self.gridLayout_count.addWidget(self.label_sum, 5, 1, 1, 1, QtCore.Qt.AlignHCenter)
 58 |         self.label_car = QtWidgets.QLabel(self.groupBox_count)
 59 |         self.label_car.setObjectName("label_car")
 60 |         self.gridLayout_count.addWidget(self.label_car, 0, 1, 1, 1, QtCore.Qt.AlignHCenter)
 61 |         self.label_4 = QtWidgets.QLabel(self.groupBox_count)
 62 |         self.label_4.setObjectName("label_4")
 63 |         self.gridLayout_count.addWidget(self.label_4, 1, 0, 1, 1, QtCore.Qt.AlignHCenter)
 64 |         self.verticalLayout_2.addLayout(self.gridLayout_count)
 65 |         self.label_image = QtWidgets.QLabel(self.centralwidget)
 66 |         self.label_image.setGeometry(QtCore.QRect(10, 10, 960, 540))
 67 |         self.label_image.setStyleSheet("background-color: rgb(233, 185, 110);")
 68 |         self.label_image.setText("")
 69 |         self.label_image.setAlignment(QtCore.Qt.AlignCenter)
 70 |         self.label_image.setObjectName("label_image")
 71 |         self.widget = QtWidgets.QWidget(self.centralwidget)
 72 |         self.widget.setGeometry(QtCore.QRect(1020, 360, 151, 181))
 73 |         self.widget.setObjectName("widget")
 74 |         self.verticalLayout = QtWidgets.QVBoxLayout(self.widget)
 75 |         self.verticalLayout.setContentsMargins(0, 0, 0, 0)
 76 |         self.verticalLayout.setObjectName("verticalLayout")
 77 |         self.pushButton_openVideo = QtWidgets.QPushButton(self.widget)
 78 |         self.pushButton_openVideo.setObjectName("pushButton_openVideo")
 79 |         self.verticalLayout.addWidget(self.pushButton_openVideo)
 80 |         self.pushButton_selectArea = QtWidgets.QPushButton(self.widget)
 81 |         self.pushButton_selectArea.setObjectName("pushButton_selectArea")
 82 |         self.verticalLayout.addWidget(self.pushButton_selectArea)
 83 |         self.pushButton_start = QtWidgets.QPushButton(self.widget)
 84 |         self.pushButton_start.setObjectName("pushButton_start")
 85 |         self.verticalLayout.addWidget(self.pushButton_start)
 86 |         self.pushButton_pause = QtWidgets.QPushButton(self.widget)
 87 |         self.pushButton_pause.setObjectName("pushButton_pause")
 88 |         self.verticalLayout.addWidget(self.pushButton_pause)
 89 |         mainWindow.setCentralWidget(self.centralwidget)
 90 | 
 91 |         self.retranslateUi(mainWindow)
 92 |         QtCore.QMetaObject.connectSlotsByName(mainWindow)
 93 | 
 94 |     def retranslateUi(self, mainWindow):
 95 |         _translate = QtCore.QCoreApplication.translate
 96 |         mainWindow.setWindowTitle(_translate("mainWindow", "Car Counter"))
 97 |         self.groupBox_count.setTitle(_translate("mainWindow", "Counting Results"))
 98 |         self.label_truck.setText(_translate("mainWindow", "0"))
 99 |         self.label_7.setText(_translate("mainWindow", "bicycle"))
100 |         self.label_5.setText(_translate("mainWindow", "truck"))
101 |         self.label_6.setText(_translate("mainWindow", "motorbike"))
102 |         self.label_motorbike.setText(_translate("mainWindow", "0"))
103 |         self.label_bus.setText(_translate("mainWindow", "0"))
104 |         self.label_bicycle.setText(_translate("mainWindow", "0"))
105 |         self.label_12.setText(_translate("mainWindow", "sum"))
106 |         self.label_3.setText(_translate("mainWindow", "car"))
107 |         self.label_sum.setText(_translate("mainWindow", "0"))
108 |         self.label_car.setText(_translate("mainWindow", "0"))
109 |         self.label_4.setText(_translate("mainWindow", "bus"))
110 |         self.pushButton_openVideo.setText(_translate("mainWindow", "Open Video"))
111 |         self.pushButton_selectArea.setText(_translate("mainWindow", "Select Area"))
112 |         self.pushButton_start.setText(_translate("mainWindow", "Start"))
113 |         self.pushButton_pause.setText(_translate("mainWindow", "Pause"))
114 | 


--------------------------------------------------------------------------------
/gui.ui:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <ui version="4.0">
  3 |  <class>mainWindow</class>
  4 |  <widget class="QMainWindow" name="mainWindow">
  5 |   <property name="geometry">
  6 |    <rect>
  7 |     <x>0</x>
  8 |     <y>0</y>
  9 |     <width>1203</width>
 10 |     <height>554</height>
 11 |    </rect>
 12 |   </property>
 13 |   <property name="windowTitle">
 14 |    <string>Car Counter</string>
 15 |   </property>
 16 |   <widget class="QWidget" name="centralwidget">
 17 |    <widget class="QGroupBox" name="groupBox_count">
 18 |     <property name="geometry">
 19 |      <rect>
 20 |       <x>990</x>
 21 |       <y>10</y>
 22 |       <width>211</width>
 23 |       <height>341</height>
 24 |      </rect>
 25 |     </property>
 26 |     <property name="title">
 27 |      <string>Counting Results</string>
 28 |     </property>
 29 |     <layout class="QVBoxLayout" name="verticalLayout_2">
 30 |      <item>
 31 |       <layout class="QGridLayout" name="gridLayout_count">
 32 |        <property name="leftMargin">
 33 |         <number>2</number>
 34 |        </property>
 35 |        <property name="topMargin">
 36 |         <number>2</number>
 37 |        </property>
 38 |        <property name="rightMargin">
 39 |         <number>2</number>
 40 |        </property>
 41 |        <property name="bottomMargin">
 42 |         <number>2</number>
 43 |        </property>
 44 |        <property name="spacing">
 45 |         <number>6</number>
 46 |        </property>
 47 |        <item row="2" column="1" alignment="Qt::AlignHCenter">
 48 |         <widget class="QLabel" name="label_truck">
 49 |          <property name="text">
 50 |           <string>0</string>
 51 |          </property>
 52 |         </widget>
 53 |        </item>
 54 |        <item row="4" column="0" alignment="Qt::AlignHCenter">
 55 |         <widget class="QLabel" name="label_7">
 56 |          <property name="text">
 57 |           <string>bicycle</string>
 58 |          </property>
 59 |         </widget>
 60 |        </item>
 61 |        <item row="2" column="0" alignment="Qt::AlignHCenter">
 62 |         <widget class="QLabel" name="label_5">
 63 |          <property name="text">
 64 |           <string>truck</string>
 65 |          </property>
 66 |         </widget>
 67 |        </item>
 68 |        <item row="3" column="0" alignment="Qt::AlignHCenter">
 69 |         <widget class="QLabel" name="label_6">
 70 |          <property name="text">
 71 |           <string>motorbike</string>
 72 |          </property>
 73 |         </widget>
 74 |        </item>
 75 |        <item row="3" column="1" alignment="Qt::AlignHCenter">
 76 |         <widget class="QLabel" name="label_motorbike">
 77 |          <property name="text">
 78 |           <string>0</string>
 79 |          </property>
 80 |         </widget>
 81 |        </item>
 82 |        <item row="1" column="1" alignment="Qt::AlignHCenter">
 83 |         <widget class="QLabel" name="label_bus">
 84 |          <property name="text">
 85 |           <string>0</string>
 86 |          </property>
 87 |         </widget>
 88 |        </item>
 89 |        <item row="4" column="1" alignment="Qt::AlignHCenter">
 90 |         <widget class="QLabel" name="label_bicycle">
 91 |          <property name="text">
 92 |           <string>0</string>
 93 |          </property>
 94 |         </widget>
 95 |        </item>
 96 |        <item row="5" column="0" alignment="Qt::AlignHCenter">
 97 |         <widget class="QLabel" name="label_12">
 98 |          <property name="text">
 99 |           <string>sum</string>
100 |          </property>
101 |         </widget>
102 |        </item>
103 |        <item row="0" column="0" alignment="Qt::AlignHCenter">
104 |         <widget class="QLabel" name="label_3">
105 |          <property name="text">
106 |           <string>car</string>
107 |          </property>
108 |         </widget>
109 |        </item>
110 |        <item row="5" column="1" alignment="Qt::AlignHCenter">
111 |         <widget class="QLabel" name="label_sum">
112 |          <property name="text">
113 |           <string>0</string>
114 |          </property>
115 |         </widget>
116 |        </item>
117 |        <item row="0" column="1" alignment="Qt::AlignHCenter">
118 |         <widget class="QLabel" name="label_car">
119 |          <property name="text">
120 |           <string>0</string>
121 |          </property>
122 |         </widget>
123 |        </item>
124 |        <item row="1" column="0" alignment="Qt::AlignHCenter">
125 |         <widget class="QLabel" name="label_4">
126 |          <property name="text">
127 |           <string>bus</string>
128 |          </property>
129 |         </widget>
130 |        </item>
131 |       </layout>
132 |      </item>
133 |     </layout>
134 |    </widget>
135 |    <widget class="QLabel" name="label_image">
136 |     <property name="geometry">
137 |      <rect>
138 |       <x>10</x>
139 |       <y>10</y>
140 |       <width>960</width>
141 |       <height>540</height>
142 |      </rect>
143 |     </property>
144 |     <property name="styleSheet">
145 |      <string notr="true">background-color: rgb(233, 185, 110);</string>
146 |     </property>
147 |     <property name="text">
148 |      <string/>
149 |     </property>
150 |     <property name="alignment">
151 |      <set>Qt::AlignCenter</set>
152 |     </property>
153 |    </widget>
154 |    <widget class="QWidget" name="">
155 |     <property name="geometry">
156 |      <rect>
157 |       <x>1020</x>
158 |       <y>360</y>
159 |       <width>151</width>
160 |       <height>181</height>
161 |      </rect>
162 |     </property>
163 |     <layout class="QVBoxLayout" name="verticalLayout">
164 |      <item>
165 |       <widget class="QPushButton" name="pushButton_openVideo">
166 |        <property name="text">
167 |         <string>Open Video</string>
168 |        </property>
169 |       </widget>
170 |      </item>
171 |      <item>
172 |       <widget class="QPushButton" name="pushButton_selectArea">
173 |        <property name="text">
174 |         <string>Select Area</string>
175 |        </property>
176 |       </widget>
177 |      </item>
178 |      <item>
179 |       <widget class="QPushButton" name="pushButton_start">
180 |        <property name="text">
181 |         <string>Start</string>
182 |        </property>
183 |       </widget>
184 |      </item>
185 |      <item>
186 |       <widget class="QPushButton" name="pushButton_pause">
187 |        <property name="text">
188 |         <string>Pause</string>
189 |        </property>
190 |       </widget>
191 |      </item>
192 |     </layout>
193 |    </widget>
194 |   </widget>
195 |  </widget>
196 |  <resources/>
197 |  <connections/>
198 | </ui>
199 | 


--------------------------------------------------------------------------------
/models.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | from torch.autograd import Variable
  7 | import numpy as np
  8 | 
  9 | from utils.parse_config import *
 10 | from utils.utils import build_targets, to_cpu, non_max_suppression
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | import matplotlib.patches as patches
 14 | 
 15 | 
 16 | def create_modules(module_defs):
 17 |     """
 18 |     Constructs module list of layer blocks from module configuration in module_defs
 19 |     """
 20 |     hyperparams = module_defs.pop(0)
 21 |     output_filters = [int(hyperparams["channels"])]
 22 |     module_list = nn.ModuleList()
 23 |     for module_i, module_def in enumerate(module_defs):
 24 |         modules = nn.Sequential()
 25 | 
 26 |         if module_def["type"] == "convolutional":
 27 |             bn = int(module_def["batch_normalize"])
 28 |             filters = int(module_def["filters"])
 29 |             kernel_size = int(module_def["size"])
 30 |             pad = (kernel_size - 1) // 2
 31 |             modules.add_module(
 32 |                 f"conv_{module_i}",
 33 |                 nn.Conv2d(
 34 |                     in_channels=output_filters[-1],
 35 |                     out_channels=filters,
 36 |                     kernel_size=kernel_size,
 37 |                     stride=int(module_def["stride"]),
 38 |                     padding=pad,
 39 |                     bias=not bn,
 40 |                 ),
 41 |             )
 42 |             if bn:
 43 |                 modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5))
 44 |             if module_def["activation"] == "leaky":
 45 |                 modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1))
 46 | 
 47 |         elif module_def["type"] == "maxpool":
 48 |             kernel_size = int(module_def["size"])
 49 |             stride = int(module_def["stride"])
 50 |             if kernel_size == 2 and stride == 1:
 51 |                 modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1)))
 52 |             maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
 53 |             modules.add_module(f"maxpool_{module_i}", maxpool)
 54 | 
 55 |         elif module_def["type"] == "upsample":
 56 |             upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
 57 |             modules.add_module(f"upsample_{module_i}", upsample)
 58 | 
 59 |         elif module_def["type"] == "route":
 60 |             layers = [int(x) for x in module_def["layers"].split(",")]
 61 |             filters = sum([output_filters[1:][i] for i in layers])
 62 |             modules.add_module(f"route_{module_i}", EmptyLayer())
 63 | 
 64 |         elif module_def["type"] == "shortcut":
 65 |             filters = output_filters[1:][int(module_def["from"])]
 66 |             modules.add_module(f"shortcut_{module_i}", EmptyLayer())
 67 | 
 68 |         elif module_def["type"] == "yolo":
 69 |             anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
 70 |             # Extract anchors
 71 |             anchors = [int(x) for x in module_def["anchors"].split(",")]
 72 |             anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
 73 |             anchors = [anchors[i] for i in anchor_idxs]
 74 |             num_classes = int(module_def["classes"])
 75 |             img_size = int(hyperparams["height"])
 76 |             # Define detection layer
 77 |             yolo_layer = YOLOLayer(anchors, num_classes, img_size)
 78 |             modules.add_module(f"yolo_{module_i}", yolo_layer)
 79 |         # Register module list and number of output filters
 80 |         module_list.append(modules)
 81 |         output_filters.append(filters)
 82 | 
 83 |     return hyperparams, module_list
 84 | 
 85 | 
 86 | class Upsample(nn.Module):
 87 |     """ nn.Upsample is deprecated """
 88 | 
 89 |     def __init__(self, scale_factor, mode="nearest"):
 90 |         super(Upsample, self).__init__()
 91 |         self.scale_factor = scale_factor
 92 |         self.mode = mode
 93 | 
 94 |     def forward(self, x):
 95 |         x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
 96 |         return x
 97 | 
 98 | 
 99 | class EmptyLayer(nn.Module):
100 |     """Placeholder for 'route' and 'shortcut' layers"""
101 | 
102 |     def __init__(self):
103 |         super(EmptyLayer, self).__init__()
104 | 
105 | 
106 | class YOLOLayer(nn.Module):
107 |     """Detection layer"""
108 | 
109 |     def __init__(self, anchors, num_classes, img_dim=416):
110 |         super(YOLOLayer, self).__init__()
111 |         self.anchors = anchors
112 |         self.num_anchors = len(anchors)
113 |         self.num_classes = num_classes
114 |         self.ignore_thres = 0.5
115 |         self.mse_loss = nn.MSELoss()
116 |         self.bce_loss = nn.BCELoss()
117 |         self.obj_scale = 1
118 |         self.noobj_scale = 100
119 |         self.metrics = {}
120 |         self.img_dim = img_dim
121 |         self.grid_size = 0  # grid size
122 | 
123 |     def compute_grid_offsets(self, grid_size, cuda=True):
124 |         self.grid_size = grid_size
125 |         g = self.grid_size
126 |         FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
127 |         self.stride = self.img_dim / self.grid_size
128 |         # Calculate offsets for each grid
129 |         self.grid_x = torch.arange(g).repeat(g, 1).view([1, 1, g, g]).type(FloatTensor)
130 |         self.grid_y = torch.arange(g).repeat(g, 1).t().view([1, 1, g, g]).type(FloatTensor)
131 |         self.scaled_anchors = FloatTensor([(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors])
132 |         self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
133 |         self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
134 | 
135 |     def forward(self, x, targets=None, img_dim=None):
136 | 
137 |         # Tensors for cuda support
138 |         FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
139 |         LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
140 |         ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
141 | 
142 |         self.img_dim = img_dim
143 |         num_samples = x.size(0)
144 |         grid_size = x.size(2)
145 | 
146 |         prediction = (
147 |             x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
148 |             .permute(0, 1, 3, 4, 2)
149 |             .contiguous()
150 |         )
151 | 
152 |         # Get outputs
153 |         x = torch.sigmoid(prediction[..., 0])  # Center x
154 |         y = torch.sigmoid(prediction[..., 1])  # Center y
155 |         w = prediction[..., 2]  # Width
156 |         h = prediction[..., 3]  # Height
157 |         pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
158 |         pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.
159 | 
160 |         # If grid size does not match current we compute new offsets
161 |         if grid_size != self.grid_size:
162 |             self.compute_grid_offsets(grid_size, cuda=x.is_cuda)
163 | 
164 |         # Add offset and scale with anchors
165 |         pred_boxes = FloatTensor(prediction[..., :4].shape)
166 |         pred_boxes[..., 0] = x.data + self.grid_x
167 |         pred_boxes[..., 1] = y.data + self.grid_y
168 |         pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
169 |         pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
170 | 
171 |         output = torch.cat(
172 |             (
173 |                 pred_boxes.view(num_samples, -1, 4) * self.stride,
174 |                 pred_conf.view(num_samples, -1, 1),
175 |                 pred_cls.view(num_samples, -1, self.num_classes),
176 |             ),
177 |             -1,
178 |         )
179 | 
180 |         if targets is None:
181 |             return output, 0
182 |         else:
183 |             iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
184 |                 pred_boxes=pred_boxes,
185 |                 pred_cls=pred_cls,
186 |                 target=targets,
187 |                 anchors=self.scaled_anchors,
188 |                 ignore_thres=self.ignore_thres,
189 |             )
190 | 
191 |             # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
192 |             loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
193 |             loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
194 |             loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
195 |             loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
196 |             loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
197 |             loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
198 |             loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
199 |             loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
200 |             total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
201 | 
202 |             # Metrics
203 |             cls_acc = 100 * class_mask[obj_mask].mean()
204 |             conf_obj = pred_conf[obj_mask].mean()
205 |             conf_noobj = pred_conf[noobj_mask].mean()
206 |             conf50 = (pred_conf > 0.5).float()
207 |             iou50 = (iou_scores > 0.5).float()
208 |             iou75 = (iou_scores > 0.75).float()
209 |             detected_mask = conf50 * class_mask * tconf
210 |             precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
211 |             recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
212 |             recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)
213 | 
214 |             self.metrics = {
215 |                 "loss": to_cpu(total_loss).item(),
216 |                 "x": to_cpu(loss_x).item(),
217 |                 "y": to_cpu(loss_y).item(),
218 |                 "w": to_cpu(loss_w).item(),
219 |                 "h": to_cpu(loss_h).item(),
220 |                 "conf": to_cpu(loss_conf).item(),
221 |                 "cls": to_cpu(loss_cls).item(),
222 |                 "cls_acc": to_cpu(cls_acc).item(),
223 |                 "recall50": to_cpu(recall50).item(),
224 |                 "recall75": to_cpu(recall75).item(),
225 |                 "precision": to_cpu(precision).item(),
226 |                 "conf_obj": to_cpu(conf_obj).item(),
227 |                 "conf_noobj": to_cpu(conf_noobj).item(),
228 |                 "grid_size": grid_size,
229 |             }
230 | 
231 |             return output, total_loss
232 | 
233 | 
234 | class Darknet(nn.Module):
235 |     """YOLOv3 object detection model"""
236 | 
237 |     def __init__(self, config_path, img_size=416):
238 |         super(Darknet, self).__init__()
239 |         self.module_defs = parse_model_config(config_path)
240 |         self.hyperparams, self.module_list = create_modules(self.module_defs)
241 |         self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")]
242 |         self.img_size = img_size
243 |         self.seen = 0
244 |         self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32)
245 | 
246 |     def forward(self, x, targets=None):
247 |         img_dim = x.shape[2]
248 |         loss = 0
249 |         layer_outputs, yolo_outputs = [], []
250 |         for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
251 |             if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
252 |                 x = module(x)
253 |             elif module_def["type"] == "route":
254 |                 x = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1)
255 |             elif module_def["type"] == "shortcut":
256 |                 layer_i = int(module_def["from"])
257 |                 x = layer_outputs[-1] + layer_outputs[layer_i]
258 |             elif module_def["type"] == "yolo":
259 |                 x, layer_loss = module[0](x, targets, img_dim)
260 |                 loss += layer_loss
261 |                 yolo_outputs.append(x)
262 |             layer_outputs.append(x)
263 |         yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
264 |         return yolo_outputs if targets is None else (loss, yolo_outputs)
265 | 
266 |     def load_darknet_weights(self, weights_path):
267 |         """Parses and loads the weights stored in 'weights_path'"""
268 | 
269 |         # Open the weights file
270 |         with open(weights_path, "rb") as f:
271 |             header = np.fromfile(f, dtype=np.int32, count=5)  # First five are header values
272 |             self.header_info = header  # Needed to write header when saving weights
273 |             self.seen = header[3]  # number of images seen during training
274 |             weights = np.fromfile(f, dtype=np.float32)  # The rest are weights
275 | 
276 |         # Establish cutoff for loading backbone weights
277 |         cutoff = None
278 |         if "darknet53.conv.74" in weights_path:
279 |             cutoff = 75
280 | 
281 |         ptr = 0
282 |         for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
283 |             if i == cutoff:
284 |                 break
285 |             if module_def["type"] == "convolutional":
286 |                 conv_layer = module[0]
287 |                 if module_def["batch_normalize"]:
288 |                     # Load BN bias, weights, running mean and running variance
289 |                     bn_layer = module[1]
290 |                     num_b = bn_layer.bias.numel()  # Number of biases
291 |                     # Bias
292 |                     bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)
293 |                     bn_layer.bias.data.copy_(bn_b)
294 |                     ptr += num_b
295 |                     # Weight
296 |                     bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)
297 |                     bn_layer.weight.data.copy_(bn_w)
298 |                     ptr += num_b
299 |                     # Running Mean
300 |                     bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)
301 |                     bn_layer.running_mean.data.copy_(bn_rm)
302 |                     ptr += num_b
303 |                     # Running Var
304 |                     bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)
305 |                     bn_layer.running_var.data.copy_(bn_rv)
306 |                     ptr += num_b
307 |                 else:
308 |                     # Load conv. bias
309 |                     num_b = conv_layer.bias.numel()
310 |                     conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)
311 |                     conv_layer.bias.data.copy_(conv_b)
312 |                     ptr += num_b
313 |                 # Load conv. weights
314 |                 num_w = conv_layer.weight.numel()
315 |                 conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)
316 |                 conv_layer.weight.data.copy_(conv_w)
317 |                 ptr += num_w
318 | 
319 |     def save_darknet_weights(self, path, cutoff=-1):
320 |         """
321 |             @:param path    - path of the new weights file
322 |             @:param cutoff  - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
323 |         """
324 |         fp = open(path, "wb")
325 |         self.header_info[3] = self.seen
326 |         self.header_info.tofile(fp)
327 | 
328 |         # Iterate through layers
329 |         for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
330 |             if module_def["type"] == "convolutional":
331 |                 conv_layer = module[0]
332 |                 # If batch norm, load bn first
333 |                 if module_def["batch_normalize"]:
334 |                     bn_layer = module[1]
335 |                     bn_layer.bias.data.cpu().numpy().tofile(fp)
336 |                     bn_layer.weight.data.cpu().numpy().tofile(fp)
337 |                     bn_layer.running_mean.data.cpu().numpy().tofile(fp)
338 |                     bn_layer.running_var.data.cpu().numpy().tofile(fp)
339 |                 # Load conv bias
340 |                 else:
341 |                     conv_layer.bias.data.cpu().numpy().tofile(fp)
342 |                 # Load conv weights
343 |                 conv_layer.weight.data.cpu().numpy().tofile(fp)
344 | 
345 |         fp.close()
346 | 


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | from utils.utils import *
 4 | from utils.datasets import *
 5 | import cv2
 6 | from PIL import Image
 7 | import torch
 8 | from torchvision import transforms
 9 | 
10 | 
11 | def resize(image, size):
12 |     image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
13 |     return image
14 | 
15 | 
16 | def yolo_prediction(model, device, image,class_names):
17 |     image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
18 |     imgs = transforms.ToTensor()(Image.fromarray(image))
19 |     c, h, w = imgs.shape
20 |     img_sacle = [w / 416, h / 416, w / 416, h / 416]
21 |     imgs = resize(imgs, 416)
22 |     imgs = imgs.unsqueeze(0).to(device)
23 | 
24 |     model.eval()
25 |     with torch.no_grad():
26 |         outputs = model(imgs)
27 |         outputs = non_max_suppression(outputs, conf_thres=0.5, nms_thres=0.45)
28 | 
29 |     # print(outputs)
30 |     objects = []
31 |     try:
32 |         outputs = outputs[0].cpu().data
33 |         for i, output in enumerate(outputs):
34 |             item = []
35 |             item.append(class_names[int(output[-1])])
36 |             item.append(float(output[4]))
37 |             box = [int(value * img_sacle[i]) for i, value in enumerate(output[:4])]
38 |             x1,y1,x2,y2 = box
39 |             x = int((x2+x1)/2)
40 |             y = int((y1+y2)/2)
41 |             w = x2-x1
42 |             h = y2-y1
43 |             item.append([x,y,w,h])
44 |             objects.append(item)
45 |     except:
46 |         pass
47 |     return objects
48 | 
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | scikit_learn==0.21.3
 2 | numpy
 3 | matplotlib
 4 | torch
 5 | torchvision
 6 | terminaltables
 7 | pillow
 8 | tqdm
 9 | numba
10 | fire
11 | filterpy
12 | opencv-python
13 | scikit-image
14 | pytesseract
15 | PyQt5
16 | pandas


--------------------------------------------------------------------------------
/utils/.config:
--------------------------------------------------------------------------------
1 | 3577582247857115766766724119402109753611015352440240992284880932986646512687624561324362551252066885950009004775081309546579712498721403446373694387157090085286231224505645450053725901631232615038604189510648071831312164643845750188640913440678425980773097082810461453835363142185772806767051281389323550048308896544778657224119429085618530915004838231420068047280427430441205992226661770454814476085720924852343305997639832355663375736276454752176015641250406861398123958872477894486967939187447133486257111277501919826706888469965968376719017903135941319967538514819613139151903788015692897242959052053924650580372
2 | 5605377600988577560550065397254225420957436224498576618768516259123867506653695772342324276743084984269870011017428054256364217027220194986462263726663651389623329466400510449729248114987953444512598188059336858299682302760808537370136773326006433438381734159585558284654736919864102681493160466327333511033028058967854838844380496016808629272782632933313912375317044566209141444311502150231852276410702944747944105256818585060877714842901476314844375419924879849300110150679899364755163492641805650724663047662328174625989492058437973343511835728532409474547860996128819129370209775297109033581936985340670034201253
3 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/utils/__init__.py


--------------------------------------------------------------------------------
/utils/augmentations.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | import numpy as np
 4 | 
 5 | 
 6 | def horisontal_flip(images, targets):
 7 |     images = torch.flip(images, [-1])
 8 |     targets[:, 2] = 1 - targets[:, 2]
 9 |     return images, targets
10 | 


--------------------------------------------------------------------------------
/utils/datasets.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import random
  3 | import os
  4 | import sys
  5 | import numpy as np
  6 | from PIL import Image
  7 | import torch
  8 | import torch.nn.functional as F
  9 | 
 10 | from utils.augmentations import horisontal_flip
 11 | from torch.utils.data import Dataset
 12 | import torchvision.transforms as transforms
 13 | 
 14 | 
 15 | def pad_to_square(img, pad_value):
 16 |     c, h, w = img.shape
 17 |     dim_diff = np.abs(h - w)
 18 |     # (upper / left) padding and (lower / right) padding
 19 |     pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
 20 |     # Determine padding
 21 |     pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
 22 |     # Add padding
 23 |     img = F.pad(img, pad, "constant", value=pad_value)
 24 | 
 25 |     return img, pad
 26 | 
 27 | 
 28 | def resize(image, size):
 29 |     image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
 30 |     return image
 31 | 
 32 | 
 33 | def random_resize(images, min_size=288, max_size=448):
 34 |     new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0]
 35 |     images = F.interpolate(images, size=new_size, mode="nearest")
 36 |     return images
 37 | 
 38 | 
 39 | class ImageFolder(Dataset):
 40 |     def __init__(self, folder_path, img_size=416):
 41 |         self.files = sorted(glob.glob("%s/*.*" % folder_path))
 42 |         self.img_size = img_size
 43 | 
 44 |     def __getitem__(self, index):
 45 |         img_path = self.files[index % len(self.files)]
 46 |         # Extract image as PyTorch tensor
 47 |         img = transforms.ToTensor()(Image.open(img_path))
 48 |         # Pad to square resolution
 49 |         img, _ = pad_to_square(img, 0)
 50 |         # Resize
 51 |         img = resize(img, self.img_size)
 52 | 
 53 |         return img_path, img
 54 | 
 55 |     def __len__(self):
 56 |         return len(self.files)
 57 | 
 58 | 
 59 | class ListDataset(Dataset):
 60 |     def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True):
 61 |         with open(list_path, "r") as file:
 62 |             self.img_files = file.readlines()
 63 | 
 64 |         self.label_files = [
 65 |             path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt")
 66 |             for path in self.img_files
 67 |         ]
 68 |         self.img_size = img_size
 69 |         self.max_objects = 100
 70 |         self.augment = augment
 71 |         self.multiscale = multiscale
 72 |         self.normalized_labels = normalized_labels
 73 |         self.min_size = self.img_size - 3 * 32
 74 |         self.max_size = self.img_size + 3 * 32
 75 |         self.batch_count = 0
 76 | 
 77 |     def __getitem__(self, index):
 78 | 
 79 |         # ---------
 80 |         #  Image
 81 |         # ---------
 82 | 
 83 |         img_path = self.img_files[index % len(self.img_files)].rstrip()
 84 | 
 85 |         # Extract image as PyTorch tensor
 86 |         img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))
 87 | 
 88 |         # Handle images with less than three channels
 89 |         if len(img.shape) != 3:
 90 |             img = img.unsqueeze(0)
 91 |             img = img.expand((3, img.shape[1:]))
 92 | 
 93 |         _, h, w = img.shape
 94 |         h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
 95 |         # Pad to square resolution
 96 |         img, pad = pad_to_square(img, 0)
 97 |         _, padded_h, padded_w = img.shape
 98 | 
 99 |         # ---------
100 |         #  Label
101 |         # ---------
102 | 
103 |         label_path = self.label_files[index % len(self.img_files)].rstrip()
104 | 
105 |         targets = None
106 |         if os.path.exists(label_path):
107 |             boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))
108 |             # Extract coordinates for unpadded + unscaled image
109 |             x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
110 |             y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
111 |             x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
112 |             y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
113 |             # Adjust for added padding
114 |             x1 += pad[0]
115 |             y1 += pad[2]
116 |             x2 += pad[1]
117 |             y2 += pad[3]
118 |             # Returns (x, y, w, h)
119 |             boxes[:, 1] = ((x1 + x2) / 2) / padded_w
120 |             boxes[:, 2] = ((y1 + y2) / 2) / padded_h
121 |             boxes[:, 3] *= w_factor / padded_w
122 |             boxes[:, 4] *= h_factor / padded_h
123 | 
124 |             targets = torch.zeros((len(boxes), 6))
125 |             targets[:, 1:] = boxes
126 | 
127 |         # Apply augmentations
128 |         if self.augment:
129 |             if np.random.random() < 0.5:
130 |                 img, targets = horisontal_flip(img, targets)
131 | 
132 |         return img_path, img, targets
133 | 
134 | 
135 |     def collate_fn(self, batch):
136 |         paths, imgs, targets = list(zip(*batch))
137 |         # Remove empty placeholder targets
138 |         targets = [boxes for boxes in targets if boxes is not None]
139 |         # Add sample index to targets
140 |         for i, boxes in enumerate(targets):
141 |             boxes[:, 0] = i
142 |         targets = torch.cat(targets, 0)
143 |         # Selects new image size every tenth batch
144 |         if self.multiscale and self.batch_count % 10 == 0:
145 |             self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32))
146 |         # Resize images to input shape
147 |         imgs = torch.stack([resize(img, self.img_size) for img in imgs])
148 |         self.batch_count += 1
149 |         return paths, imgs, targets
150 | 
151 |     def __len__(self):
152 |         return len(self.img_files)
153 | 


--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class Logger(object):
 5 |     def __init__(self, log_dir):
 6 |         """Create a summary writer logging to log_dir."""
 7 |         self.writer = tf.summary.FileWriter(log_dir)
 8 | 
 9 |     def scalar_summary(self, tag, value, step):
10 |         """Log a scalar variable."""
11 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
12 |         self.writer.add_summary(summary, step)
13 | 
14 |     def list_of_scalars_summary(self, tag_value_pairs, step):
15 |         """Log scalar variables."""
16 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs])
17 |         self.writer.add_summary(summary, step)
18 | 


--------------------------------------------------------------------------------
/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def parse_model_config(path):
 4 |     """Parses the yolo-v3 layer configuration file and returns module definitions"""
 5 |     file = open(path, 'r')
 6 |     lines = file.read().split('\n')
 7 |     lines = [x for x in lines if x and not x.startswith('#')]
 8 |     lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
 9 |     module_defs = []
10 |     for line in lines:
11 |         if line.startswith('['): # This marks the start of a new block
12 |             module_defs.append({})
13 |             module_defs[-1]['type'] = line[1:-1].rstrip()
14 |             if module_defs[-1]['type'] == 'convolutional':
15 |                 module_defs[-1]['batch_normalize'] = 0
16 |         else:
17 |             key, value = line.split("=")
18 |             value = value.strip()
19 |             module_defs[-1][key.rstrip()] = value.strip()
20 | 
21 |     return module_defs
22 | 
23 | def parse_data_config(path):
24 |     """Parses the data configuration file"""
25 |     options = dict()
26 |     options['gpus'] = '0,1,2,3'
27 |     options['num_workers'] = '10'
28 |     with open(path, 'r') as fp:
29 |         lines = fp.readlines()
30 |     for line in lines:
31 |         line = line.strip()
32 |         if line == '' or line.startswith('#'):
33 |             continue
34 |         key, value = line.split('=')
35 |         options[key.strip()] = value.strip()
36 |     return options
37 | 


--------------------------------------------------------------------------------
/utils/sort.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     SORT: A Simple, Online and Realtime Tracker
  3 |     Copyright (C) 2016 Alex Bewley alex@dynamicdetection.com
  4 |     This program is free software: you can redistribute it and/or modify
  5 |     it under the terms of the GNU General Public License as published by
  6 |     the Free Software Foundation, either version 3 of the License, or
  7 |     (at your option) any later version.
  8 |     This program is distributed in the hope that it will be useful,
  9 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 |     GNU General Public License for more details.
 12 |     You should have received a copy of the GNU General Public License
 13 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 | """
 15 | from __future__ import print_function
 16 | 
 17 | from numba import jit
 18 | import os.path
 19 | import numpy as np
 20 | import matplotlib.pyplot as plt
 21 | import matplotlib.patches as patches
 22 | from skimage import io
 23 | import glob
 24 | from sklearn.utils.linear_assignment_ import linear_assignment
 25 | # from scipy.optimize import linear_sum_assignment
 26 | import time
 27 | import argparse
 28 | from filterpy.kalman import KalmanFilter
 29 | 
 30 | @jit
 31 | def iou(bb_test,bb_gt):
 32 |   """
 33 |   Computes IUO between two bboxes in the form [x1,y1,x2,y2]
 34 |   """
 35 |   xx1 = np.maximum(bb_test[0], bb_gt[0])
 36 |   yy1 = np.maximum(bb_test[1], bb_gt[1])
 37 |   xx2 = np.minimum(bb_test[2], bb_gt[2])
 38 |   yy2 = np.minimum(bb_test[3], bb_gt[3])
 39 |   w = np.maximum(0., xx2 - xx1)
 40 |   h = np.maximum(0., yy2 - yy1)
 41 |   wh = w * h
 42 |   o = wh / ((bb_test[2]-bb_test[0])*(bb_test[3]-bb_test[1])
 43 |     + (bb_gt[2]-bb_gt[0])*(bb_gt[3]-bb_gt[1]) - wh)
 44 |   return(o)
 45 | 
 46 | def convert_bbox_to_z(bbox):
 47 |   """
 48 |   Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
 49 |     [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
 50 |     the aspect ratio
 51 |   """
 52 |   w = bbox[2]-bbox[0]
 53 |   h = bbox[3]-bbox[1]
 54 |   x = bbox[0]+w/2.
 55 |   y = bbox[1]+h/2.
 56 |   s = w*h    #scale is just area
 57 |   r = w/float(h)
 58 |   return np.array([x,y,s,r]).reshape((4,1))
 59 | 
 60 | def convert_x_to_bbox(x,score=None):
 61 |   """
 62 |   Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
 63 |     [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
 64 |   """
 65 |   w = np.sqrt(x[2]*x[3])
 66 |   h = x[2]/w
 67 |   if(score==None):
 68 |     return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
 69 |   else:
 70 |     return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))
 71 | 
 72 | 
 73 | class KalmanBoxTracker(object):
 74 |   """
 75 |   This class represents the internel state of individual tracked objects observed as bbox.
 76 |   """
 77 |   count = 0
 78 |   def __init__(self,bbox):
 79 |     """
 80 |     Initialises a tracker using initial bounding box.
 81 |     """
 82 |     #define constant velocity model
 83 |     self.kf = KalmanFilter(dim_x=7, dim_z=4)
 84 |     self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0],  [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
 85 |     self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])
 86 | 
 87 |     self.kf.R[2:,2:] *= 10.
 88 |     self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
 89 |     self.kf.P *= 10.
 90 |     self.kf.Q[-1,-1] *= 0.01
 91 |     self.kf.Q[4:,4:] *= 0.01
 92 | 
 93 |     self.kf.x[:4] = convert_bbox_to_z(bbox)
 94 |     self.time_since_update = 0
 95 |     self.id = KalmanBoxTracker.count
 96 |     KalmanBoxTracker.count += 1
 97 |     self.history = []
 98 |     self.hits = 0
 99 |     self.hit_streak = 0
100 |     self.age = 0
101 | 
102 |   def update(self,bbox):
103 |     """
104 |     Updates the state vector with observed bbox.
105 |     """
106 |     self.time_since_update = 0
107 |     self.history = []
108 |     self.hits += 1
109 |     self.hit_streak += 1
110 |     self.kf.update(convert_bbox_to_z(bbox))
111 | 
112 |   def predict(self):
113 |     """
114 |     Advances the state vector and returns the predicted bounding box estimate.
115 |     """
116 |     if((self.kf.x[6]+self.kf.x[2])<=0):
117 |       self.kf.x[6] *= 0.0
118 |     self.kf.predict()
119 |     self.age += 1
120 |     if(self.time_since_update>0):
121 |       self.hit_streak = 0
122 |     self.time_since_update += 1
123 |     self.history.append(convert_x_to_bbox(self.kf.x))
124 |     return self.history[-1]
125 | 
126 |   def get_state(self):
127 |     """
128 |     Returns the current bounding box estimate.
129 |     """
130 |     return convert_x_to_bbox(self.kf.x)
131 | 
132 | def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3):
133 |   """
134 |   Assigns detections to tracked object (both represented as bounding boxes)
135 |   Returns 3 lists of matches, unmatched_detections and unmatched_trackers
136 |   """
137 |   if(len(trackers)==0):
138 |     return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)
139 |   iou_matrix = np.zeros((len(detections),len(trackers)),dtype=np.float32)
140 | 
141 |   for d,det in enumerate(detections):
142 |     for t,trk in enumerate(trackers):
143 |       iou_matrix[d,t] = iou(det,trk)
144 |   matched_indices = linear_assignment(-iou_matrix)
145 | 
146 |   # matched_indices = np.array(matched_indices).reshape((-1,2))
147 |   # print(iou_matrix.shape,matched_indices.shape)
148 |   unmatched_detections = []
149 |   for d,det in enumerate(detections):
150 |     if(d not in matched_indices[:,0]):
151 |       unmatched_detections.append(d)
152 |   unmatched_trackers = []
153 |   for t,trk in enumerate(trackers):
154 |     if(t not in matched_indices[:,1]):
155 |       unmatched_trackers.append(t)
156 | 
157 |   #filter out matched with low IOU
158 |   matches = []
159 |   for m in matched_indices:
160 |     if(iou_matrix[m[0],m[1]]<iou_threshold):
161 |       unmatched_detections.append(m[0])
162 |       unmatched_trackers.append(m[1])
163 |     else:
164 |       matches.append(m.reshape(1,2))
165 |   if(len(matches)==0):
166 |     matches = np.empty((0,2),dtype=int)
167 |   else:
168 |     matches = np.concatenate(matches,axis=0)
169 | 
170 |   return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
171 | 
172 | 
173 | 
174 | class Sort(object):
175 |   def __init__(self,max_age=10,min_hits=3):
176 |     """
177 |     Sets key parameters for SORT
178 |     """
179 |     self.max_age = max_age
180 |     self.min_hits = min_hits
181 |     self.trackers = []
182 |     self.frame_count = 0
183 |     self.counts = 0
184 | 
185 |   def update(self,dets):
186 |     """
187 |     Params:
188 |       dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
189 |     Requires: this method must be called once for each frame even with empty detections.
190 |     Returns the a similar array, where the last column is the object ID.
191 |     NOTE: The number of objects returned may differ from the number of detections provided.
192 |     """
193 |     self.frame_count += 1
194 |     #get predicted locations from existing trackers.
195 |     trks = np.zeros((len(self.trackers),5))
196 |     to_del = []
197 |     ret = []
198 |     for t,trk in enumerate(trks):
199 |       pos = self.trackers[t].predict()[0]
200 |       trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
201 |       if(np.any(np.isnan(pos))):
202 |         to_del.append(t)
203 |     trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
204 |     for t in reversed(to_del):
205 |       self.trackers.pop(t)
206 |     matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets,trks)
207 | 
208 |     #update matched trackers with assigned detections
209 |     for t,trk in enumerate(self.trackers):
210 |       if(t not in unmatched_trks):
211 |         d = matched[np.where(matched[:,1]==t)[0],0]
212 |         trk.update(dets[d,:][0])
213 | 
214 |     #create and initialise new trackers for unmatched detections
215 |     for i in unmatched_dets:
216 |         trk = KalmanBoxTracker(dets[i,:]) 
217 |         self.trackers.append(trk)
218 | 
219 |     i = len(self.trackers)
220 |     for trk in reversed(self.trackers):
221 |         d = trk.get_state()[0]
222 |         if((trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits)):
223 |           ret.append(np.concatenate((d,[trk.id+1])).reshape(1,-1)) # +1 as MOT benchmark requires positive
224 | 
225 |         i -= 1
226 |         #remove dead tracklet
227 |         if(trk.time_since_update > self.max_age):
228 |           self.trackers.pop(i)
229 |     self.counts = KalmanBoxTracker.count
230 |     if(len(ret)>0):
231 |       return np.concatenate(ret)
232 | 
233 |     return np.empty((0,5))
234 |     
235 | def parse_args():
236 |     """Parse input arguments."""
237 |     parser = argparse.ArgumentParser(description='SORT demo')
238 |     parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
239 |     args = parser.parse_args()
240 |     return args
241 | 
242 | if __name__ == '__main__':
243 |   # all train
244 |   sequences = ['PETS09-S2L1','TUD-Campus','TUD-Stadtmitte','ETH-Bahnhof','ETH-Sunnyday','ETH-Pedcross2','KITTI-13','KITTI-17','ADL-Rundle-6','ADL-Rundle-8','Venice-2']
245 |   args = parse_args()
246 |   display = args.display
247 |   phase = 'train'
248 |   total_time = 0.0
249 |   total_frames = 0
250 |   colours = np.random.rand(32,3) #used only for display
251 |   if(display):
252 |     if not os.path.exists('mot_benchmark'):
253 |       print('\n\tERROR: mot_benchmark link not found!\n\n    Create a symbolic link to the MOT benchmark\n    (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n    $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n')
254 |       exit()
255 |     plt.ion()
256 |     fig = plt.figure() 
257 |   
258 |   if not os.path.exists('output'):
259 |     os.makedirs('output')
260 |   
261 |   for seq in sequences:
262 |     mot_tracker = Sort() #create instance of the SORT tracker
263 |     seq_dets = np.loadtxt('data/%s/det.txt'%(seq),delimiter=',') #load detections
264 |     with open('output/%s.txt'%(seq),'w') as out_file:
265 |       print("Processing %s."%(seq))
266 |       for frame in range(int(seq_dets[:,0].max())):
267 |         frame += 1 #detection and frame numbers begin at 1
268 |         dets = seq_dets[seq_dets[:,0]==frame,2:7]
269 |         dets[:,2:4] += dets[:,0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2]
270 |         total_frames += 1
271 | 
272 |         if(display):
273 |           ax1 = fig.add_subplot(111, aspect='equal')
274 |           fn = 'mot_benchmark/%s/%s/img1/%06d.jpg'%(phase,seq,frame)
275 |           im =io.imread(fn)
276 |           ax1.imshow(im)
277 |           plt.title(seq+' Tracked Targets')
278 | 
279 |         start_time = time.time()
280 |         trackers = mot_tracker.update(dets)
281 |         cycle_time = time.time() - start_time
282 |         total_time += cycle_time
283 | 
284 |         for d in trackers:
285 |           print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file)
286 |           if(display):
287 |             d = d.astype(np.int32)
288 |             ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:]))
289 |             ax1.set_adjustable('box-forced')
290 | 
291 |         if(display):
292 |           fig.canvas.flush_events()
293 |           plt.draw()
294 |           ax1.cla()
295 | 
296 |   print("Total Tracking took: %.3f for %d frames or %.1f FPS"%(total_time,total_frames,total_frames/total_time))
297 |   if(display):
298 |     print("Note: to get real runtime results run without the option: --display")
299 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import math
  3 | import time
  4 | import tqdm
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | from torch.autograd import Variable
  9 | import numpy as np
 10 | import matplotlib.pyplot as plt
 11 | import matplotlib.patches as patches
 12 | 
 13 | 
 14 | def to_cpu(tensor):
 15 |     return tensor.detach().cpu()
 16 | 
 17 | 
 18 | def load_classes(path):
 19 |     """
 20 |     Loads class labels at 'path'
 21 |     """
 22 |     fp = open(path, "r")
 23 |     names = fp.read().split("\n")[:-1]
 24 |     return names
 25 | 
 26 | 
 27 | def weights_init_normal(m):
 28 |     classname = m.__class__.__name__
 29 |     if classname.find("Conv") != -1:
 30 |         torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
 31 |     elif classname.find("BatchNorm2d") != -1:
 32 |         torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
 33 |         torch.nn.init.constant_(m.bias.data, 0.0)
 34 | 
 35 | 
 36 | def rescale_boxes(boxes, current_dim, original_shape):
 37 |     """ Rescales bounding boxes to the original shape """
 38 |     orig_h, orig_w = original_shape
 39 |     # The amount of padding that was added
 40 |     pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
 41 |     pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
 42 |     # Image height and width after padding is removed
 43 |     unpad_h = current_dim - pad_y
 44 |     unpad_w = current_dim - pad_x
 45 |     # Rescale bounding boxes to dimension of original image
 46 |     boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
 47 |     boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
 48 |     boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
 49 |     boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
 50 |     return boxes
 51 | 
 52 | 
 53 | def xywh2xyxy(x):
 54 |     y = x.new(x.shape)
 55 |     y[..., 0] = x[..., 0] - x[..., 2] / 2
 56 |     y[..., 1] = x[..., 1] - x[..., 3] / 2
 57 |     y[..., 2] = x[..., 0] + x[..., 2] / 2
 58 |     y[..., 3] = x[..., 1] + x[..., 3] / 2
 59 |     return y
 60 | 
 61 | 
 62 | def ap_per_class(tp, conf, pred_cls, target_cls):
 63 |     """ Compute the average precision, given the recall and precision curves.
 64 |     Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
 65 |     # Arguments
 66 |         tp:    True positives (list).
 67 |         conf:  Objectness value from 0-1 (list).
 68 |         pred_cls: Predicted object classes (list).
 69 |         target_cls: True object classes (list).
 70 |     # Returns
 71 |         The average precision as computed in py-faster-rcnn.
 72 |     """
 73 | 
 74 |     # Sort by objectness
 75 |     i = np.argsort(-conf)
 76 |     tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
 77 | 
 78 |     # Find unique classes
 79 |     unique_classes = np.unique(target_cls)
 80 | 
 81 |     # Create Precision-Recall curve and compute AP for each class
 82 |     ap, p, r = [], [], []
 83 |     for c in tqdm.tqdm(unique_classes, desc="Computing AP"):
 84 |         i = pred_cls == c
 85 |         n_gt = (target_cls == c).sum()  # Number of ground truth objects
 86 |         n_p = i.sum()  # Number of predicted objects
 87 | 
 88 |         if n_p == 0 and n_gt == 0:
 89 |             continue
 90 |         elif n_p == 0 or n_gt == 0:
 91 |             ap.append(0)
 92 |             r.append(0)
 93 |             p.append(0)
 94 |         else:
 95 |             # Accumulate FPs and TPs
 96 |             fpc = (1 - tp[i]).cumsum()
 97 |             tpc = (tp[i]).cumsum()
 98 | 
 99 |             # Recall
100 |             recall_curve = tpc / (n_gt + 1e-16)
101 |             r.append(recall_curve[-1])
102 | 
103 |             # Precision
104 |             precision_curve = tpc / (tpc + fpc)
105 |             p.append(precision_curve[-1])
106 | 
107 |             # AP from recall-precision curve
108 |             ap.append(compute_ap(recall_curve, precision_curve))
109 | 
110 |     # Compute F1 score (harmonic mean of precision and recall)
111 |     p, r, ap = np.array(p), np.array(r), np.array(ap)
112 |     f1 = 2 * p * r / (p + r + 1e-16)
113 | 
114 |     return p, r, ap, f1, unique_classes.astype("int32")
115 | 
116 | 
117 | def compute_ap(recall, precision):
118 |     """ Compute the average precision, given the recall and precision curves.
119 |     Code originally from https://github.com/rbgirshick/py-faster-rcnn.
120 | 
121 |     # Arguments
122 |         recall:    The recall curve (list).
123 |         precision: The precision curve (list).
124 |     # Returns
125 |         The average precision as computed in py-faster-rcnn.
126 |     """
127 |     # correct AP calculation
128 |     # first append sentinel values at the end
129 |     mrec = np.concatenate(([0.0], recall, [1.0]))
130 |     mpre = np.concatenate(([0.0], precision, [0.0]))
131 | 
132 |     # compute the precision envelope
133 |     for i in range(mpre.size - 1, 0, -1):
134 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
135 | 
136 |     # to calculate area under PR curve, look for points
137 |     # where X axis (recall) changes value
138 |     i = np.where(mrec[1:] != mrec[:-1])[0]
139 | 
140 |     # and sum (\Delta recall) * prec
141 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
142 |     return ap
143 | 
144 | 
145 | def get_batch_statistics(outputs, targets, iou_threshold):
146 |     """ Compute true positives, predicted scores and predicted labels per sample """
147 |     batch_metrics = []
148 |     for sample_i in range(len(outputs)):
149 | 
150 |         if outputs[sample_i] is None:
151 |             continue
152 | 
153 |         output = outputs[sample_i]
154 |         pred_boxes = output[:, :4]
155 |         pred_scores = output[:, 4]
156 |         pred_labels = output[:, -1]
157 | 
158 |         true_positives = np.zeros(pred_boxes.shape[0])
159 | 
160 |         annotations = targets[targets[:, 0] == sample_i][:, 1:]
161 |         target_labels = annotations[:, 0] if len(annotations) else []
162 |         if len(annotations):
163 |             detected_boxes = []
164 |             target_boxes = annotations[:, 1:]
165 | 
166 |             for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
167 | 
168 |                 # If targets are found break
169 |                 if len(detected_boxes) == len(annotations):
170 |                     break
171 | 
172 |                 # Ignore if label is not one of the target labels
173 |                 if pred_label not in target_labels:
174 |                     continue
175 | 
176 |                 iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
177 |                 if iou >= iou_threshold and box_index not in detected_boxes:
178 |                     true_positives[pred_i] = 1
179 |                     detected_boxes += [box_index]
180 |         batch_metrics.append([true_positives, pred_scores, pred_labels])
181 |     return batch_metrics
182 | 
183 | 
184 | def bbox_wh_iou(wh1, wh2):
185 |     wh2 = wh2.t()
186 |     w1, h1 = wh1[0], wh1[1]
187 |     w2, h2 = wh2[0], wh2[1]
188 |     inter_area = torch.min(w1, w2) * torch.min(h1, h2)
189 |     union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
190 |     return inter_area / union_area
191 | 
192 | 
193 | def bbox_iou(box1, box2, x1y1x2y2=True):
194 |     """
195 |     Returns the IoU of two bounding boxes
196 |     """
197 |     if not x1y1x2y2:
198 |         # Transform from center and width to exact coordinates
199 |         b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
200 |         b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
201 |         b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
202 |         b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
203 |     else:
204 |         # Get the coordinates of bounding boxes
205 |         b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
206 |         b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
207 | 
208 |     # get the corrdinates of the intersection rectangle
209 |     inter_rect_x1 = torch.max(b1_x1, b2_x1)
210 |     inter_rect_y1 = torch.max(b1_y1, b2_y1)
211 |     inter_rect_x2 = torch.min(b1_x2, b2_x2)
212 |     inter_rect_y2 = torch.min(b1_y2, b2_y2)
213 |     # Intersection area
214 |     inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
215 |         inter_rect_y2 - inter_rect_y1 + 1, min=0
216 |     )
217 |     # Union Area
218 |     b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
219 |     b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
220 | 
221 |     iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
222 | 
223 |     return iou
224 | 
225 | 
226 | def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
227 |     """
228 |     Removes detections with lower object confidence score than 'conf_thres' and performs
229 |     Non-Maximum Suppression to further filter detections.
230 |     Returns detections with shape:
231 |         (x1, y1, x2, y2, object_conf, class_score, class_pred)
232 |     """
233 | 
234 |     # From (center x, center y, width, height) to (x1, y1, x2, y2)
235 |     prediction[..., :4] = xywh2xyxy(prediction[..., :4])
236 |     output = [None for _ in range(len(prediction))]
237 |     for image_i, image_pred in enumerate(prediction):
238 |         # Filter out confidence scores below threshold
239 |         image_pred = image_pred[image_pred[:, 4] >= conf_thres]
240 |         # If none are remaining => process next image
241 |         if not image_pred.size(0):
242 |             continue
243 |         # Object confidence times class confidence
244 |         score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
245 |         # Sort by it
246 |         image_pred = image_pred[(-score).argsort()]
247 |         class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
248 |         detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)
249 |         # Perform non-maximum suppression
250 |         keep_boxes = []
251 |         while detections.size(0):
252 |             large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres
253 |             label_match = detections[0, -1] == detections[:, -1]
254 |             # Indices of boxes with lower confidence scores, large IOUs and matching labels
255 |             invalid = large_overlap & label_match
256 |             weights = detections[invalid, 4:5]
257 |             # Merge overlapping bboxes by order of confidence
258 |             detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum()
259 |             keep_boxes += [detections[0]]
260 |             detections = detections[~invalid]
261 |         if keep_boxes:
262 |             output[image_i] = torch.stack(keep_boxes)
263 | 
264 |     return output
265 | 
266 | 
267 | def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres):
268 | 
269 |     ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor
270 |     FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor
271 | 
272 |     nB = pred_boxes.size(0)
273 |     nA = pred_boxes.size(1)
274 |     nC = pred_cls.size(-1)
275 |     nG = pred_boxes.size(2)
276 | 
277 |     # Output tensors
278 |     obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0)
279 |     noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1)
280 |     class_mask = FloatTensor(nB, nA, nG, nG).fill_(0)
281 |     iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0)
282 |     tx = FloatTensor(nB, nA, nG, nG).fill_(0)
283 |     ty = FloatTensor(nB, nA, nG, nG).fill_(0)
284 |     tw = FloatTensor(nB, nA, nG, nG).fill_(0)
285 |     th = FloatTensor(nB, nA, nG, nG).fill_(0)
286 |     tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0)
287 | 
288 |     # Convert to position relative to box
289 |     target_boxes = target[:, 2:6] * nG
290 |     gxy = target_boxes[:, :2]
291 |     gwh = target_boxes[:, 2:]
292 |     # Get anchors with best iou
293 |     ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors])
294 |     best_ious, best_n = ious.max(0)
295 |     # Separate target values
296 |     b, target_labels = target[:, :2].long().t()
297 |     gx, gy = gxy.t()
298 |     gw, gh = gwh.t()
299 |     gi, gj = gxy.long().t()
300 |     # Set masks
301 |     obj_mask[b, best_n, gj, gi] = 1
302 |     noobj_mask[b, best_n, gj, gi] = 0
303 | 
304 |     # Set noobj mask to zero where iou exceeds ignore threshold
305 |     for i, anchor_ious in enumerate(ious.t()):
306 |         noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0
307 | 
308 |     # Coordinates
309 |     tx[b, best_n, gj, gi] = gx - gx.floor()
310 |     ty[b, best_n, gj, gi] = gy - gy.floor()
311 |     # Width and height
312 |     tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
313 |     th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
314 |     # One-hot encoding of label
315 |     tcls[b, best_n, gj, gi, target_labels] = 1
316 |     # Compute label correctness and iou at best anchor
317 |     class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float()
318 |     iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False)
319 | 
320 |     tconf = obj_mask.float()
321 |     return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
322 | 


--------------------------------------------------------------------------------