├── .gitignore
├── LICENSE
├── README.md
├── app.py
├── asserts
├── demo1.gif
├── demo1.mp4
├── demo2.gif
├── demo2.mp4
├── demo3.gif
├── demo3.mp4
└── test.mp4
├── config.py
├── config
├── coco.data
├── coco.names
├── yolov3-tiny.cfg
└── yolov3.cfg
├── counter.py
├── gui.py
├── gui.ui
├── models.py
├── predict.py
├── requirements.txt
└── utils
├── .config
├── __init__.py
├── augmentations.py
├── datasets.py
├── logger.py
├── parse_config.py
├── sort.py
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | __pycache__/
3 | weights/*
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 wsh122333
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Multi-type_vehicles_flow_statistics
2 | According to YOLOv3 and SORT algorithms, counting multi-type vehicles. Implemented by Pytorch.
3 | Detecting and tracking the vehicles in \["bicycle","bus","car","motorbike","truck"].
4 |
5 | ## Reference
6 | - yolov3-darknet https://github.com/pjreddie/darknet
7 | - yolov3-pytorch https://github.com/eriklindernoren/PyTorch-YOLOv3
8 | - sort https://github.com/abewley/sort
9 |
10 | ## Dependencies
11 | - ubuntu/windows
12 | - cuda>=10.0
13 | - python>=3.6
14 | - `pip3 install -r requirements.txt`
15 |
16 | ## Usage
17 | 1. Download the pre-trained yolov3 weight file [here](https://pjreddie.com/media/files/yolov3.weights) and put it into `weights` directory;
18 | 2. Run `python3 app.py` ;
19 | 3. Select video and double click the image to select area, and then start;
20 | 4. After detecting and tracking, the result video and file are saved under `results` directory, the line of `results.txt` with format \[videoName,id,objectName] for each vehicle.
21 |
22 | ## Demo
23 | 
24 |
25 | 
26 |
27 | 
28 |
29 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | from PyQt5.QtWidgets import QApplication, QMainWindow, QFileDialog
3 | from PyQt5.QtGui import QImage, QPixmap
4 | from gui import *
5 | import copy
6 | from counter import CounterThread
7 | from utils.sort import *
8 | from models import *
9 | from utils.utils import *
10 | from utils.datasets import *
11 | from config import *
12 |
13 | class App(QMainWindow,Ui_mainWindow):
14 | def __init__(self):
15 | super(App,self).__init__()
16 | self.setupUi(self)
17 | self.label_image_size = (self.label_image.geometry().width(),self.label_image.geometry().height())
18 | self.video = None
19 | self.exampleImage = None
20 | self.imgScale = None
21 | self.get_points_flag = 0
22 | self.countArea = []
23 | self.road_code = None
24 | self.time_code = None
25 | self.show_label = names
26 |
27 | #button function
28 | self.pushButton_selectArea.clicked.connect(self.select_area)
29 | self.pushButton_openVideo.clicked.connect(self.open_video)
30 | self.pushButton_start.clicked.connect(self.start_count)
31 | self.pushButton_pause.clicked.connect(self.pause)
32 | self.label_image.mouseDoubleClickEvent = self.get_points
33 |
34 |
35 | self.pushButton_selectArea.setEnabled(False)
36 | self.pushButton_start.setEnabled(False)
37 | self.pushButton_pause.setEnabled(False)
38 |
39 | #some flags
40 | self.running_flag = 0
41 | self.pause_flag = 0
42 | self.counter_thread_start_flag = 0
43 |
44 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
45 |
46 |
47 | data_config = "config/coco.data"
48 | weights_path = "weights/yolov3.weights"
49 | model_def = "config/yolov3.cfg"
50 | data_config = parse_data_config(data_config)
51 | self.yolo_class_names = load_classes(data_config["names"])
52 |
53 | # Initiate model
54 | print("Loading model ...")
55 | self.yolo_model = Darknet(model_def).to(self.device)
56 | if weights_path.endswith(".weights"):
57 | # Load darknet weights
58 | self.yolo_model.load_darknet_weights(weights_path)
59 | else:
60 | # Load checkpoint weights
61 | self.yolo_model.load_state_dict(torch.load(weights_path))
62 |
63 |
64 | # counter Thread
65 | self.counterThread = CounterThread(self.yolo_model,self.yolo_class_names,self.device)
66 | self.counterThread.sin_counterResult.connect(self.show_image_label)
67 | self.counterThread.sin_done.connect(self.done)
68 | self.counterThread.sin_counter_results.connect(self.update_counter_results)
69 |
70 |
71 |
72 | def open_video(self):
73 | openfile_name = QFileDialog.getOpenFileName(self,'Open video','','Video files(*.avi , *.mp4)')
74 | self.videoList = [openfile_name[0]]
75 |
76 | # opendir_name = QFileDialog.getExistingDirectory(self, "Open dir", "./")
77 | # self.videoList = [os.path.join(opendir_name,item) for item in os.listdir(opendir_name)]
78 | # self.videoList = list(filter(lambda x: not os.path.isdir(x) , self.videoList))
79 | # self.videoList.sort()
80 |
81 | vid = cv2.VideoCapture(self.videoList[0])
82 |
83 | # self.videoWriter = cv2.VideoWriter(openfile_name[0].split("/")[-1], cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'), 10, (1920, 1080))
84 |
85 | while vid.isOpened():
86 | ret, frame = vid.read()
87 | if ret:
88 | self.exampleImage = frame
89 | self.show_image_label(frame)
90 | self.imgScale = np.array(frame.shape[:2]) / [self.label_image_size[1], self.label_image_size[0]]
91 | vid.release()
92 | break
93 |
94 | self.pushButton_selectArea.setEnabled(True)
95 | self.pushButton_start.setText("Start")
96 | self.pushButton_start.setEnabled(False)
97 | self.pushButton_pause.setText("Pause")
98 | self.pushButton_pause.setEnabled(False)
99 |
100 | #clear counting results
101 | KalmanBoxTracker.count = 0
102 | self.label_sum.setText("0")
103 | self.label_sum.repaint()
104 |
105 |
106 | def get_points(self, event):
107 | if self.get_points_flag:
108 | x = event.x()
109 | y = event.y()
110 | self.countArea.append([int(x*self.imgScale[1]),int(y*self.imgScale[0])])
111 | exampleImageWithArea = copy.deepcopy(self.exampleImage)
112 | for point in self.countArea:
113 | exampleImageWithArea[point[1]-10:point[1]+10,point[0]-10:point[0]+10] = (0,255,255)
114 | cv2.fillConvexPoly(exampleImageWithArea, np.array(self.countArea), (0,0,255))
115 | self.show_image_label(exampleImageWithArea)
116 | print(self.countArea)
117 |
118 |
119 | def select_area(self):
120 |
121 | #change Area needs update exampleImage
122 | if self.counter_thread_start_flag:
123 | ret, frame = self.videoCapture.read()
124 | if ret:
125 | self.exampleImage = frame
126 | self.show_image_label(frame)
127 |
128 | if not self.get_points_flag:
129 | self.pushButton_selectArea.setText("Submit Area")
130 | self.get_points_flag = 1
131 | self.countArea = []
132 | self.pushButton_openVideo.setEnabled(False)
133 | self.pushButton_start.setEnabled(False)
134 |
135 | else:
136 | self.pushButton_selectArea.setText("Select Area")
137 | self.get_points_flag = 0
138 | exampleImage = copy.deepcopy(self.exampleImage)
139 | # painting area
140 | for i in range(len(self.countArea)):
141 | cv2.line(exampleImage, tuple(self.countArea[i]), tuple(self.countArea[(i + 1) % (len(self.countArea))]), (0, 0, 255), 2)
142 | self.show_image_label(exampleImage)
143 |
144 | #enable start button
145 | self.pushButton_openVideo.setEnabled(True)
146 | self.pushButton_start.setEnabled(True)
147 |
148 |
149 | def show_image_label(self, img_np):
150 | img_np = cv2.cvtColor(img_np,cv2.COLOR_BGR2RGB)
151 | img_np = cv2.resize(img_np, self.label_image_size)
152 | frame = QImage(img_np, self.label_image_size[0], self.label_image_size[1], QImage.Format_RGB888)
153 | pix = QPixmap.fromImage(frame)
154 | self.label_image.setPixmap(pix)
155 | self.label_image.repaint()
156 |
157 | def start_count(self):
158 | if self.running_flag == 0:
159 | #clear count and display
160 | KalmanBoxTracker.count = 0
161 | for item in self.show_label:
162 | vars(self)[f"label_{item}"].setText('0')
163 | # clear result file
164 | with open("results/results.txt", "w") as f:
165 | pass
166 |
167 | #start
168 | self.running_flag = 1
169 | self.pause_flag = 0
170 | self.pushButton_start.setText("Stop")
171 | self.pushButton_openVideo.setEnabled(False)
172 | self.pushButton_selectArea.setEnabled(False)
173 | #emit new parameter to counter thread
174 | self.counterThread.sin_runningFlag.emit(self.running_flag)
175 | self.counterThread.sin_countArea.emit(self.countArea)
176 | self.counterThread.sin_videoList.emit(self.videoList)
177 | #start counter thread
178 | self.counterThread.start()
179 |
180 | self.pushButton_pause.setEnabled(True)
181 |
182 |
183 | elif self.running_flag == 1: #push pause button
184 | #stop system
185 | self.running_flag = 0
186 | self.counterThread.sin_runningFlag.emit(self.running_flag)
187 | self.pushButton_openVideo.setEnabled(True)
188 | self.pushButton_selectArea.setEnabled(True)
189 | self.pushButton_start.setText("Start")
190 |
191 |
192 |
193 | def done(self,sin):
194 | if sin == 1:
195 | self.pushButton_openVideo.setEnabled(True)
196 | self.pushButton_start.setEnabled(False)
197 | self.pushButton_start.setText("Start")
198 |
199 |
200 | def update_counter_results(self,counter_results):
201 | with open("results/results.txt", "a") as f:
202 | for i, result in enumerate(counter_results):
203 | label_var = vars(self)[f"label_{result[2]}"]
204 | label_var.setText(str(int(label_var.text())+1))
205 | label_var.repaint()
206 | label_sum_var = vars(self)[f"label_sum"]
207 | label_sum_var.setText(str(int(label_sum_var.text()) + 1))
208 | label_sum_var.repaint()
209 | f.writelines(' '.join(map(lambda x: str(x),result)))
210 | f.write(("\n"))
211 | # print("************************************************",len(counter_results))
212 |
213 |
214 | def pause(self):
215 | if self.pause_flag == 0:
216 | self.pause_flag = 1
217 | self.pushButton_pause.setText("Continue")
218 | self.pushButton_start.setEnabled(False)
219 | else:
220 | self.pause_flag = 0
221 | self.pushButton_pause.setText("Pause")
222 | self.pushButton_start.setEnabled(True)
223 |
224 | self.counterThread.sin_pauseFlag.emit(self.pause_flag)
225 |
226 |
227 | if __name__ == '__main__':
228 | app = QApplication(sys.argv)
229 | myWin = App()
230 | myWin.show()
231 | sys.exit(app.exec_())
232 |
--------------------------------------------------------------------------------
/asserts/demo1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo1.gif
--------------------------------------------------------------------------------
/asserts/demo1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo1.mp4
--------------------------------------------------------------------------------
/asserts/demo2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo2.gif
--------------------------------------------------------------------------------
/asserts/demo2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo2.mp4
--------------------------------------------------------------------------------
/asserts/demo3.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo3.gif
--------------------------------------------------------------------------------
/asserts/demo3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo3.mp4
--------------------------------------------------------------------------------
/asserts/test.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/test.mp4
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | names = ["bicycle","bus","car","motorbike","truck"]
2 | color_dict = {"bicycle": (179, 52, 255),
3 | "bus": (255, 191, 0),
4 | "car": (127, 255, 0),
5 | "motorbike": (0, 140, 255),
6 | "truck": (0, 215, 255)}
--------------------------------------------------------------------------------
/config/coco.data:
--------------------------------------------------------------------------------
1 | classes= 80
2 | train=data/train.txt
3 | valid=data/valid.txt
4 | names=config/coco.names
5 | backup=backup/
6 |
--------------------------------------------------------------------------------
/config/coco.names:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorbike
5 | aeroplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 |
--------------------------------------------------------------------------------
/config/yolov3-tiny.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | batch=1
4 | subdivisions=1
5 | # Training
6 | # batch=64
7 | # subdivisions=2
8 | width=416
9 | height=416
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | # 0
26 | [convolutional]
27 | batch_normalize=1
28 | filters=16
29 | size=3
30 | stride=1
31 | pad=1
32 | activation=leaky
33 |
34 | # 1
35 | [maxpool]
36 | size=2
37 | stride=2
38 |
39 | # 2
40 | [convolutional]
41 | batch_normalize=1
42 | filters=32
43 | size=3
44 | stride=1
45 | pad=1
46 | activation=leaky
47 |
48 | # 3
49 | [maxpool]
50 | size=2
51 | stride=2
52 |
53 | # 4
54 | [convolutional]
55 | batch_normalize=1
56 | filters=64
57 | size=3
58 | stride=1
59 | pad=1
60 | activation=leaky
61 |
62 | # 5
63 | [maxpool]
64 | size=2
65 | stride=2
66 |
67 | # 6
68 | [convolutional]
69 | batch_normalize=1
70 | filters=128
71 | size=3
72 | stride=1
73 | pad=1
74 | activation=leaky
75 |
76 | # 7
77 | [maxpool]
78 | size=2
79 | stride=2
80 |
81 | # 8
82 | [convolutional]
83 | batch_normalize=1
84 | filters=256
85 | size=3
86 | stride=1
87 | pad=1
88 | activation=leaky
89 |
90 | # 9
91 | [maxpool]
92 | size=2
93 | stride=2
94 |
95 | # 10
96 | [convolutional]
97 | batch_normalize=1
98 | filters=512
99 | size=3
100 | stride=1
101 | pad=1
102 | activation=leaky
103 |
104 | # 11
105 | [maxpool]
106 | size=2
107 | stride=1
108 |
109 | # 12
110 | [convolutional]
111 | batch_normalize=1
112 | filters=1024
113 | size=3
114 | stride=1
115 | pad=1
116 | activation=leaky
117 |
118 | ###########
119 |
120 | # 13
121 | [convolutional]
122 | batch_normalize=1
123 | filters=256
124 | size=1
125 | stride=1
126 | pad=1
127 | activation=leaky
128 |
129 | # 14
130 | [convolutional]
131 | batch_normalize=1
132 | filters=512
133 | size=3
134 | stride=1
135 | pad=1
136 | activation=leaky
137 |
138 | # 15
139 | [convolutional]
140 | size=1
141 | stride=1
142 | pad=1
143 | filters=255
144 | activation=linear
145 |
146 |
147 |
148 | # 16
149 | [yolo]
150 | mask = 3,4,5
151 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
152 | classes=80
153 | num=6
154 | jitter=.3
155 | ignore_thresh = .7
156 | truth_thresh = 1
157 | random=1
158 |
159 | # 17
160 | [route]
161 | layers = -4
162 |
163 | # 18
164 | [convolutional]
165 | batch_normalize=1
166 | filters=128
167 | size=1
168 | stride=1
169 | pad=1
170 | activation=leaky
171 |
172 | # 19
173 | [upsample]
174 | stride=2
175 |
176 | # 20
177 | [route]
178 | layers = -1, 8
179 |
180 | # 21
181 | [convolutional]
182 | batch_normalize=1
183 | filters=256
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 |
189 | # 22
190 | [convolutional]
191 | size=1
192 | stride=1
193 | pad=1
194 | filters=255
195 | activation=linear
196 |
197 | # 23
198 | [yolo]
199 | mask = 1,2,3
200 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
201 | classes=80
202 | num=6
203 | jitter=.3
204 | ignore_thresh = .7
205 | truth_thresh = 1
206 | random=1
207 |
--------------------------------------------------------------------------------
/config/yolov3.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | #batch=1
4 | #subdivisions=1
5 | # Training
6 | batch=16
7 | subdivisions=1
8 | width=416
9 | height=416
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=32
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | # Downsample
34 |
35 | [convolutional]
36 | batch_normalize=1
37 | filters=64
38 | size=3
39 | stride=2
40 | pad=1
41 | activation=leaky
42 |
43 | [convolutional]
44 | batch_normalize=1
45 | filters=32
46 | size=1
47 | stride=1
48 | pad=1
49 | activation=leaky
50 |
51 | [convolutional]
52 | batch_normalize=1
53 | filters=64
54 | size=3
55 | stride=1
56 | pad=1
57 | activation=leaky
58 |
59 | [shortcut]
60 | from=-3
61 | activation=linear
62 |
63 | # Downsample
64 |
65 | [convolutional]
66 | batch_normalize=1
67 | filters=128
68 | size=3
69 | stride=2
70 | pad=1
71 | activation=leaky
72 |
73 | [convolutional]
74 | batch_normalize=1
75 | filters=64
76 | size=1
77 | stride=1
78 | pad=1
79 | activation=leaky
80 |
81 | [convolutional]
82 | batch_normalize=1
83 | filters=128
84 | size=3
85 | stride=1
86 | pad=1
87 | activation=leaky
88 |
89 | [shortcut]
90 | from=-3
91 | activation=linear
92 |
93 | [convolutional]
94 | batch_normalize=1
95 | filters=64
96 | size=1
97 | stride=1
98 | pad=1
99 | activation=leaky
100 |
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 |
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 |
113 | # Downsample
114 |
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 |
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 |
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 |
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 |
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 |
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 |
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 |
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 |
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 |
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 |
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 |
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 |
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 |
203 |
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 |
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 |
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 |
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 |
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 |
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 |
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 |
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 |
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 |
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 |
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 |
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 |
284 | # Downsample
285 |
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 |
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 |
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 |
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 |
314 |
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 |
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 |
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 |
335 |
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 |
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 |
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 |
356 |
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 |
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 |
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 |
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 |
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 |
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 |
397 |
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 |
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 |
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 |
418 |
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 |
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 |
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 |
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 |
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 |
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 |
459 | # Downsample
460 |
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 |
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 |
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 |
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 |
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 |
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 |
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 |
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 |
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 |
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 |
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 |
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 |
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 |
549 | ######################
550 |
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 |
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 |
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 |
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 |
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 |
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 |
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=255
604 | activation=linear
605 |
606 |
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
610 | classes=80
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 |
617 |
618 | [route]
619 | layers = -4
620 |
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 |
629 | [upsample]
630 | stride=2
631 |
632 | [route]
633 | layers = -1, 61
634 |
635 |
636 |
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 |
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 |
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 |
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 |
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 |
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 |
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=255
690 | activation=linear
691 |
692 |
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
696 | classes=80
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 |
703 |
704 |
705 | [route]
706 | layers = -4
707 |
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 |
716 | [upsample]
717 | stride=2
718 |
719 | [route]
720 | layers = -1, 36
721 |
722 |
723 |
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 |
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 |
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 |
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 |
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 |
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 |
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=255
777 | activation=linear
778 |
779 |
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
783 | classes=80
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 |
--------------------------------------------------------------------------------
/counter.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | import cv2
3 | from utils.sort import *
4 | from PyQt5.QtCore import QThread, pyqtSignal
5 | import predict
6 | from config import *
7 |
8 | class CounterThread(QThread):
9 | sin_counterResult = pyqtSignal(np.ndarray)
10 | sin_runningFlag = pyqtSignal(int)
11 | sin_videoList = pyqtSignal(list)
12 | sin_countArea = pyqtSignal(list)
13 | sin_done = pyqtSignal(int)
14 | sin_counter_results = pyqtSignal(list)
15 | sin_pauseFlag = pyqtSignal(int)
16 |
17 | def __init__(self,model,class_names,device):
18 | super(CounterThread,self).__init__()
19 |
20 | self.model = model
21 | self.class_names = class_names
22 | self.device = device
23 |
24 | self.permission = names
25 |
26 | self.colorDict = color_dict
27 |
28 | # create instance of SORT
29 | self.mot_tracker = Sort(max_age=10, min_hits=2)
30 | self.countArea = None
31 | self.running_flag = 0
32 | self.pause_flag = 0
33 | self.videoList = []
34 | self.last_max_id = 0
35 | self.history = {} #save history
36 | #history = {id:{"no_update_count": int, "his": list}}
37 |
38 | self.sin_runningFlag.connect(self.update_flag)
39 | self.sin_videoList.connect(self.update_videoList)
40 | self.sin_countArea.connect(self.update_countArea)
41 | self.sin_pauseFlag.connect(self.update_pauseFlag)
42 |
43 | self.save_dir = "results"
44 | if not os.path.exists(self.save_dir): os.makedirs(self.save_dir)
45 |
46 | def run(self):
47 | for video in self.videoList:
48 | self.last_max_id = 0
49 | cap = cv2.VideoCapture(video)
50 | out = cv2.VideoWriter(os.path.join(self.save_dir,video.split("/")[-1]), cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'), 10, (1920, 1080))
51 | frame_count = 0
52 | while cap.isOpened():
53 | # print(frame_count)
54 | if self.running_flag:
55 | if not self.pause_flag:
56 | ret, frame = cap.read()
57 | if ret:
58 | if frame_count % 3 == 0:
59 | a1 = time.time()
60 | frame = self.counter(self.permission, self.colorDict, frame,np.array(self.countArea), self.mot_tracker, video)
61 | self.sin_counterResult.emit(frame)
62 |
63 | out.write(frame)
64 | a2 = time.time()
65 | print(f"fps: {1 / (a2 - a1):.2f}")
66 | frame_count += 1
67 | else:
68 | break
69 | else:
70 | time.sleep(0.1)
71 | else:
72 | break
73 |
74 | #restart count for each video
75 | KalmanBoxTracker.count = 0
76 | cap.release()
77 | out.release()
78 |
79 | if not self.running_flag:
80 | break
81 |
82 | if self.running_flag:
83 | self.sin_done.emit(1)
84 |
85 | def update_pauseFlag(self,flag):
86 | self.pause_flag = flag
87 |
88 | def update_flag(self,flag):
89 | self.running_flag = flag
90 |
91 | def update_videoList(self, videoList):
92 | print("Update videoList!")
93 | self.videoList = videoList
94 |
95 | def update_countArea(self,Area):
96 | print("Update countArea!")
97 | self.countArea = Area
98 |
99 | def counter(self, permission, colorDict, frame, CountArea, mot_tracker, videoName):
100 |
101 | # painting area
102 | AreaBound = [min(CountArea[:, 0]), min(CountArea[:, 1]), max(CountArea[:, 0]), max(CountArea[:, 1])]
103 | painting = np.zeros((AreaBound[3] - AreaBound[1], AreaBound[2] - AreaBound[0]), dtype=np.uint8)
104 | CountArea_mini = CountArea - AreaBound[0:2]
105 | cv2.fillConvexPoly(painting, CountArea_mini, (1,))
106 |
107 | objects = predict.yolo_prediction(self.model,self.device,frame,self.class_names)
108 | objects = filter(lambda x: x[0] in permission, objects)
109 | objects = filter(lambda x: x[1] > 0.5,objects)
110 | objects = list(filter(lambda x: pointInCountArea(painting, AreaBound, [int(x[2][0]), int(x[2][1] + x[2][3] / 2)]),objects))
111 |
112 | #filter out repeat bbox
113 | objects = filiter_out_repeat(objects)
114 |
115 | detections = []
116 | for item in objects:
117 | detections.append([int(item[2][0] - item[2][2] / 2),
118 | int(item[2][1] - item[2][3] / 2),
119 | int(item[2][0] + item[2][2] / 2),
120 | int(item[2][1] + item[2][3] / 2),
121 | item[1]])
122 | track_bbs_ids = mot_tracker.update(np.array(detections))
123 |
124 | # for i, item in enumerate(objects):
125 | # # x1,y1,x2,y2,id = list(map(lambda x :int(x),item))
126 | # # id_log.add(id)
127 | # # objectName = get_objName(item, objects)
128 | #
129 | # objectName, province, objectBox = item
130 | # x, y, w, h = objectBox
131 | # x1, y1, x2, y2 = int(x - w / 2), int(y - h / 2), int(x + w / 2), int(y + h / 2)
132 | #
133 | # boxColor = colorDict[objectName]
134 | # cv2.rectangle(frame, (x1, y1), (x2, y2), boxColor, thickness=2)
135 | # cv2.putText(frame, objectName, (x1 - 1, y1 - 3), cv2.FONT_HERSHEY_COMPLEX, 0.7, boxColor,
136 | # thickness=2)
137 |
138 | # painting area
139 | for i in range(len(CountArea)):
140 | cv2.line(frame, tuple(CountArea[i]), tuple(CountArea[(i + 1) % (len(CountArea))]), (0, 0, 255), 2)
141 |
142 | if len(track_bbs_ids) > 0:
143 | for bb in track_bbs_ids: #add all bbox to history
144 | id = int(bb[-1])
145 | objectName = get_objName(bb, objects)
146 | if id not in self.history.keys(): #add new id
147 | self.history[id] = {}
148 | self.history[id]["no_update_count"] = 0
149 | self.history[id]["his"] = []
150 | self.history[id]["his"].append(objectName)
151 | else:
152 | self.history[id]["no_update_count"] = 0
153 | self.history[id]["his"].append(objectName)
154 |
155 | for i, item in enumerate(track_bbs_ids):
156 | bb = list(map(lambda x: int(x), item))
157 | id = bb[-1]
158 | x1, y1, x2, y2 = bb[:4]
159 |
160 | his = self.history[id]["his"]
161 | result = {}
162 | for i in set(his):
163 | result[i] = his.count(i)
164 | res = sorted(result.items(), key=lambda d: d[1], reverse=True)
165 | objectName = res[0][0]
166 |
167 | boxColor = colorDict[objectName]
168 | cv2.rectangle(frame, (x1, y1), (x2, y2), boxColor, thickness=2)
169 | cv2.putText(frame, str(id) + "_" + objectName, (x1 - 1, y1 - 3), cv2.FONT_HERSHEY_COMPLEX, 0.7,
170 | boxColor,
171 | thickness=2)
172 |
173 |
174 | counter_results = []
175 | videoName = videoName.split('/')[-1]
176 | removed_id_list = []
177 | for id in self.history.keys(): #extract id after tracking
178 | self.history[id]["no_update_count"] += 1
179 | if self.history[id]["no_update_count"] > 5:
180 | his = self.history[id]["his"]
181 | result = {}
182 | for i in set(his):
183 | result[i] = his.count(i)
184 | res = sorted(result.items(), key=lambda d: d[1], reverse=True)
185 | objectName = res[0][0]
186 | counter_results.append([videoName,id,objectName])
187 | #del id
188 | removed_id_list.append(id)
189 |
190 | for id in removed_id_list:
191 | _ = self.history.pop(id)
192 |
193 | if len(counter_results):
194 | self.sin_counter_results.emit(counter_results)
195 |
196 | # print(self.history)
197 |
198 |
199 | return frame
200 |
201 | def emit_timeCode(self,time_code):
202 | self.sin_timeCode.emit(time_code)
203 |
204 | def getTwoDimensionListIndex(L,value,pos):
205 | for i in range(len(L)):
206 | if L[i][pos] == value:
207 | return i
208 | return -1
209 |
210 | def filiter_out_repeat(objects):
211 | objects = sorted(objects,key=lambda x: x[1])
212 | l = len(objects)
213 | new_objects = []
214 | if l > 1:
215 | for i in range(l-1):
216 | flag = 0
217 | for j in range(i+1,l):
218 | x_i, y_i, w_i, h_i = objects[i][2]
219 | x_j, y_j, w_j, h_j = objects[j][2]
220 | box1 = [int(x_i - w_i / 2), int(y_i - h_i / 2), int(x_i + w_i / 2), int(y_i + h_i / 2)]
221 | box2 = [int(x_j - w_j / 2), int(y_j - h_j / 2), int(x_j + w_j / 2), int(y_j + h_j / 2)]
222 | if cal_iou(box1,box2) >= 0.7:
223 | flag = 1
224 | break
225 | #if no repeat
226 | if not flag:
227 | new_objects.append(objects[i])
228 | #add the last one
229 | new_objects.append(objects[-1])
230 | else:
231 | return objects
232 |
233 | return list(tuple(new_objects))
234 |
235 |
236 | def cal_iou(box1,box2):
237 | x1 = max(box1[0],box2[0])
238 | y1 = max(box1[1],box2[1])
239 | x2 = min(box1[2],box2[2])
240 | y2 = min(box1[3],box2[3])
241 | i = max(0,(x2-x1))*max(0,(y2-y1))
242 | u = (box1[2]-box1[0])*(box1[3]-box1[1]) + (box2[2]-box2[0])*(box2[3]-box2[1]) - i
243 | iou = float(i)/float(u)
244 | return iou
245 |
246 | def get_objName(item,objects):
247 | iou_list = []
248 | for i,object in enumerate(objects):
249 | x, y, w, h = object[2]
250 | x1, y1, x2, y2 = int(x - w / 2), int(y - h / 2), int(x + w / 2), int(y + h / 2)
251 | iou_list.append(cal_iou(item[:4],[x1,y1,x2,y2]))
252 | max_index = iou_list.index(max(iou_list))
253 | return objects[max_index][0]
254 |
255 | def pointInCountArea(painting, AreaBound, point):
256 | h,w = painting.shape[:2]
257 | point = np.array(point)
258 | point = point - AreaBound[:2]
259 | if point[0] < 0 or point[1] < 0 or point[0] >= w or point[1] >= h:
260 | return 0
261 | else:
262 | return painting[point[1],point[0]]
263 |
264 |
265 |
266 |
267 |
268 |
--------------------------------------------------------------------------------
/gui.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Form implementation generated from reading ui file 'gui.ui'
4 | #
5 | # Created by: PyQt5 UI code generator 5.13.1
6 | #
7 | # WARNING! All changes made in this file will be lost!
8 |
9 |
10 | from PyQt5 import QtCore, QtGui, QtWidgets
11 |
12 |
13 | class Ui_mainWindow(object):
14 | def setupUi(self, mainWindow):
15 | mainWindow.setObjectName("mainWindow")
16 | mainWindow.resize(1203, 554)
17 | self.centralwidget = QtWidgets.QWidget(mainWindow)
18 | self.centralwidget.setObjectName("centralwidget")
19 | self.groupBox_count = QtWidgets.QGroupBox(self.centralwidget)
20 | self.groupBox_count.setGeometry(QtCore.QRect(990, 10, 211, 341))
21 | self.groupBox_count.setObjectName("groupBox_count")
22 | self.verticalLayout_2 = QtWidgets.QVBoxLayout(self.groupBox_count)
23 | self.verticalLayout_2.setObjectName("verticalLayout_2")
24 | self.gridLayout_count = QtWidgets.QGridLayout()
25 | self.gridLayout_count.setContentsMargins(2, 2, 2, 2)
26 | self.gridLayout_count.setSpacing(6)
27 | self.gridLayout_count.setObjectName("gridLayout_count")
28 | self.label_truck = QtWidgets.QLabel(self.groupBox_count)
29 | self.label_truck.setObjectName("label_truck")
30 | self.gridLayout_count.addWidget(self.label_truck, 2, 1, 1, 1, QtCore.Qt.AlignHCenter)
31 | self.label_7 = QtWidgets.QLabel(self.groupBox_count)
32 | self.label_7.setObjectName("label_7")
33 | self.gridLayout_count.addWidget(self.label_7, 4, 0, 1, 1, QtCore.Qt.AlignHCenter)
34 | self.label_5 = QtWidgets.QLabel(self.groupBox_count)
35 | self.label_5.setObjectName("label_5")
36 | self.gridLayout_count.addWidget(self.label_5, 2, 0, 1, 1, QtCore.Qt.AlignHCenter)
37 | self.label_6 = QtWidgets.QLabel(self.groupBox_count)
38 | self.label_6.setObjectName("label_6")
39 | self.gridLayout_count.addWidget(self.label_6, 3, 0, 1, 1, QtCore.Qt.AlignHCenter)
40 | self.label_motorbike = QtWidgets.QLabel(self.groupBox_count)
41 | self.label_motorbike.setObjectName("label_motorbike")
42 | self.gridLayout_count.addWidget(self.label_motorbike, 3, 1, 1, 1, QtCore.Qt.AlignHCenter)
43 | self.label_bus = QtWidgets.QLabel(self.groupBox_count)
44 | self.label_bus.setObjectName("label_bus")
45 | self.gridLayout_count.addWidget(self.label_bus, 1, 1, 1, 1, QtCore.Qt.AlignHCenter)
46 | self.label_bicycle = QtWidgets.QLabel(self.groupBox_count)
47 | self.label_bicycle.setObjectName("label_bicycle")
48 | self.gridLayout_count.addWidget(self.label_bicycle, 4, 1, 1, 1, QtCore.Qt.AlignHCenter)
49 | self.label_12 = QtWidgets.QLabel(self.groupBox_count)
50 | self.label_12.setObjectName("label_12")
51 | self.gridLayout_count.addWidget(self.label_12, 5, 0, 1, 1, QtCore.Qt.AlignHCenter)
52 | self.label_3 = QtWidgets.QLabel(self.groupBox_count)
53 | self.label_3.setObjectName("label_3")
54 | self.gridLayout_count.addWidget(self.label_3, 0, 0, 1, 1, QtCore.Qt.AlignHCenter)
55 | self.label_sum = QtWidgets.QLabel(self.groupBox_count)
56 | self.label_sum.setObjectName("label_sum")
57 | self.gridLayout_count.addWidget(self.label_sum, 5, 1, 1, 1, QtCore.Qt.AlignHCenter)
58 | self.label_car = QtWidgets.QLabel(self.groupBox_count)
59 | self.label_car.setObjectName("label_car")
60 | self.gridLayout_count.addWidget(self.label_car, 0, 1, 1, 1, QtCore.Qt.AlignHCenter)
61 | self.label_4 = QtWidgets.QLabel(self.groupBox_count)
62 | self.label_4.setObjectName("label_4")
63 | self.gridLayout_count.addWidget(self.label_4, 1, 0, 1, 1, QtCore.Qt.AlignHCenter)
64 | self.verticalLayout_2.addLayout(self.gridLayout_count)
65 | self.label_image = QtWidgets.QLabel(self.centralwidget)
66 | self.label_image.setGeometry(QtCore.QRect(10, 10, 960, 540))
67 | self.label_image.setStyleSheet("background-color: rgb(233, 185, 110);")
68 | self.label_image.setText("")
69 | self.label_image.setAlignment(QtCore.Qt.AlignCenter)
70 | self.label_image.setObjectName("label_image")
71 | self.widget = QtWidgets.QWidget(self.centralwidget)
72 | self.widget.setGeometry(QtCore.QRect(1020, 360, 151, 181))
73 | self.widget.setObjectName("widget")
74 | self.verticalLayout = QtWidgets.QVBoxLayout(self.widget)
75 | self.verticalLayout.setContentsMargins(0, 0, 0, 0)
76 | self.verticalLayout.setObjectName("verticalLayout")
77 | self.pushButton_openVideo = QtWidgets.QPushButton(self.widget)
78 | self.pushButton_openVideo.setObjectName("pushButton_openVideo")
79 | self.verticalLayout.addWidget(self.pushButton_openVideo)
80 | self.pushButton_selectArea = QtWidgets.QPushButton(self.widget)
81 | self.pushButton_selectArea.setObjectName("pushButton_selectArea")
82 | self.verticalLayout.addWidget(self.pushButton_selectArea)
83 | self.pushButton_start = QtWidgets.QPushButton(self.widget)
84 | self.pushButton_start.setObjectName("pushButton_start")
85 | self.verticalLayout.addWidget(self.pushButton_start)
86 | self.pushButton_pause = QtWidgets.QPushButton(self.widget)
87 | self.pushButton_pause.setObjectName("pushButton_pause")
88 | self.verticalLayout.addWidget(self.pushButton_pause)
89 | mainWindow.setCentralWidget(self.centralwidget)
90 |
91 | self.retranslateUi(mainWindow)
92 | QtCore.QMetaObject.connectSlotsByName(mainWindow)
93 |
94 | def retranslateUi(self, mainWindow):
95 | _translate = QtCore.QCoreApplication.translate
96 | mainWindow.setWindowTitle(_translate("mainWindow", "Car Counter"))
97 | self.groupBox_count.setTitle(_translate("mainWindow", "Counting Results"))
98 | self.label_truck.setText(_translate("mainWindow", "0"))
99 | self.label_7.setText(_translate("mainWindow", "bicycle"))
100 | self.label_5.setText(_translate("mainWindow", "truck"))
101 | self.label_6.setText(_translate("mainWindow", "motorbike"))
102 | self.label_motorbike.setText(_translate("mainWindow", "0"))
103 | self.label_bus.setText(_translate("mainWindow", "0"))
104 | self.label_bicycle.setText(_translate("mainWindow", "0"))
105 | self.label_12.setText(_translate("mainWindow", "sum"))
106 | self.label_3.setText(_translate("mainWindow", "car"))
107 | self.label_sum.setText(_translate("mainWindow", "0"))
108 | self.label_car.setText(_translate("mainWindow", "0"))
109 | self.label_4.setText(_translate("mainWindow", "bus"))
110 | self.pushButton_openVideo.setText(_translate("mainWindow", "Open Video"))
111 | self.pushButton_selectArea.setText(_translate("mainWindow", "Select Area"))
112 | self.pushButton_start.setText(_translate("mainWindow", "Start"))
113 | self.pushButton_pause.setText(_translate("mainWindow", "Pause"))
114 |
--------------------------------------------------------------------------------
/gui.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | mainWindow
4 |
5 |
6 |
7 | 0
8 | 0
9 | 1203
10 | 554
11 |
12 |
13 |
14 | Car Counter
15 |
16 |
17 |
18 |
19 |
20 | 990
21 | 10
22 | 211
23 | 341
24 |
25 |
26 |
27 | Counting Results
28 |
29 |
30 | -
31 |
32 |
33 | 2
34 |
35 |
36 | 2
37 |
38 |
39 | 2
40 |
41 |
42 | 2
43 |
44 |
45 | 6
46 |
47 |
-
48 |
49 |
50 | 0
51 |
52 |
53 |
54 | -
55 |
56 |
57 | bicycle
58 |
59 |
60 |
61 | -
62 |
63 |
64 | truck
65 |
66 |
67 |
68 | -
69 |
70 |
71 | motorbike
72 |
73 |
74 |
75 | -
76 |
77 |
78 | 0
79 |
80 |
81 |
82 | -
83 |
84 |
85 | 0
86 |
87 |
88 |
89 | -
90 |
91 |
92 | 0
93 |
94 |
95 |
96 | -
97 |
98 |
99 | sum
100 |
101 |
102 |
103 | -
104 |
105 |
106 | car
107 |
108 |
109 |
110 | -
111 |
112 |
113 | 0
114 |
115 |
116 |
117 | -
118 |
119 |
120 | 0
121 |
122 |
123 |
124 | -
125 |
126 |
127 | bus
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 | 10
139 | 10
140 | 960
141 | 540
142 |
143 |
144 |
145 | background-color: rgb(233, 185, 110);
146 |
147 |
148 |
149 |
150 |
151 | Qt::AlignCenter
152 |
153 |
154 |
155 |
156 |
157 | 1020
158 | 360
159 | 151
160 | 181
161 |
162 |
163 |
164 | -
165 |
166 |
167 | Open Video
168 |
169 |
170 |
171 | -
172 |
173 |
174 | Select Area
175 |
176 |
177 |
178 | -
179 |
180 |
181 | Start
182 |
183 |
184 |
185 | -
186 |
187 |
188 | Pause
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
--------------------------------------------------------------------------------
/models.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | from torch.autograd import Variable
7 | import numpy as np
8 |
9 | from utils.parse_config import *
10 | from utils.utils import build_targets, to_cpu, non_max_suppression
11 |
12 | import matplotlib.pyplot as plt
13 | import matplotlib.patches as patches
14 |
15 |
16 | def create_modules(module_defs):
17 | """
18 | Constructs module list of layer blocks from module configuration in module_defs
19 | """
20 | hyperparams = module_defs.pop(0)
21 | output_filters = [int(hyperparams["channels"])]
22 | module_list = nn.ModuleList()
23 | for module_i, module_def in enumerate(module_defs):
24 | modules = nn.Sequential()
25 |
26 | if module_def["type"] == "convolutional":
27 | bn = int(module_def["batch_normalize"])
28 | filters = int(module_def["filters"])
29 | kernel_size = int(module_def["size"])
30 | pad = (kernel_size - 1) // 2
31 | modules.add_module(
32 | f"conv_{module_i}",
33 | nn.Conv2d(
34 | in_channels=output_filters[-1],
35 | out_channels=filters,
36 | kernel_size=kernel_size,
37 | stride=int(module_def["stride"]),
38 | padding=pad,
39 | bias=not bn,
40 | ),
41 | )
42 | if bn:
43 | modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5))
44 | if module_def["activation"] == "leaky":
45 | modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1))
46 |
47 | elif module_def["type"] == "maxpool":
48 | kernel_size = int(module_def["size"])
49 | stride = int(module_def["stride"])
50 | if kernel_size == 2 and stride == 1:
51 | modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1)))
52 | maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
53 | modules.add_module(f"maxpool_{module_i}", maxpool)
54 |
55 | elif module_def["type"] == "upsample":
56 | upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
57 | modules.add_module(f"upsample_{module_i}", upsample)
58 |
59 | elif module_def["type"] == "route":
60 | layers = [int(x) for x in module_def["layers"].split(",")]
61 | filters = sum([output_filters[1:][i] for i in layers])
62 | modules.add_module(f"route_{module_i}", EmptyLayer())
63 |
64 | elif module_def["type"] == "shortcut":
65 | filters = output_filters[1:][int(module_def["from"])]
66 | modules.add_module(f"shortcut_{module_i}", EmptyLayer())
67 |
68 | elif module_def["type"] == "yolo":
69 | anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
70 | # Extract anchors
71 | anchors = [int(x) for x in module_def["anchors"].split(",")]
72 | anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
73 | anchors = [anchors[i] for i in anchor_idxs]
74 | num_classes = int(module_def["classes"])
75 | img_size = int(hyperparams["height"])
76 | # Define detection layer
77 | yolo_layer = YOLOLayer(anchors, num_classes, img_size)
78 | modules.add_module(f"yolo_{module_i}", yolo_layer)
79 | # Register module list and number of output filters
80 | module_list.append(modules)
81 | output_filters.append(filters)
82 |
83 | return hyperparams, module_list
84 |
85 |
86 | class Upsample(nn.Module):
87 | """ nn.Upsample is deprecated """
88 |
89 | def __init__(self, scale_factor, mode="nearest"):
90 | super(Upsample, self).__init__()
91 | self.scale_factor = scale_factor
92 | self.mode = mode
93 |
94 | def forward(self, x):
95 | x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
96 | return x
97 |
98 |
99 | class EmptyLayer(nn.Module):
100 | """Placeholder for 'route' and 'shortcut' layers"""
101 |
102 | def __init__(self):
103 | super(EmptyLayer, self).__init__()
104 |
105 |
106 | class YOLOLayer(nn.Module):
107 | """Detection layer"""
108 |
109 | def __init__(self, anchors, num_classes, img_dim=416):
110 | super(YOLOLayer, self).__init__()
111 | self.anchors = anchors
112 | self.num_anchors = len(anchors)
113 | self.num_classes = num_classes
114 | self.ignore_thres = 0.5
115 | self.mse_loss = nn.MSELoss()
116 | self.bce_loss = nn.BCELoss()
117 | self.obj_scale = 1
118 | self.noobj_scale = 100
119 | self.metrics = {}
120 | self.img_dim = img_dim
121 | self.grid_size = 0 # grid size
122 |
123 | def compute_grid_offsets(self, grid_size, cuda=True):
124 | self.grid_size = grid_size
125 | g = self.grid_size
126 | FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
127 | self.stride = self.img_dim / self.grid_size
128 | # Calculate offsets for each grid
129 | self.grid_x = torch.arange(g).repeat(g, 1).view([1, 1, g, g]).type(FloatTensor)
130 | self.grid_y = torch.arange(g).repeat(g, 1).t().view([1, 1, g, g]).type(FloatTensor)
131 | self.scaled_anchors = FloatTensor([(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors])
132 | self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
133 | self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
134 |
135 | def forward(self, x, targets=None, img_dim=None):
136 |
137 | # Tensors for cuda support
138 | FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
139 | LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
140 | ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
141 |
142 | self.img_dim = img_dim
143 | num_samples = x.size(0)
144 | grid_size = x.size(2)
145 |
146 | prediction = (
147 | x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
148 | .permute(0, 1, 3, 4, 2)
149 | .contiguous()
150 | )
151 |
152 | # Get outputs
153 | x = torch.sigmoid(prediction[..., 0]) # Center x
154 | y = torch.sigmoid(prediction[..., 1]) # Center y
155 | w = prediction[..., 2] # Width
156 | h = prediction[..., 3] # Height
157 | pred_conf = torch.sigmoid(prediction[..., 4]) # Conf
158 | pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.
159 |
160 | # If grid size does not match current we compute new offsets
161 | if grid_size != self.grid_size:
162 | self.compute_grid_offsets(grid_size, cuda=x.is_cuda)
163 |
164 | # Add offset and scale with anchors
165 | pred_boxes = FloatTensor(prediction[..., :4].shape)
166 | pred_boxes[..., 0] = x.data + self.grid_x
167 | pred_boxes[..., 1] = y.data + self.grid_y
168 | pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
169 | pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
170 |
171 | output = torch.cat(
172 | (
173 | pred_boxes.view(num_samples, -1, 4) * self.stride,
174 | pred_conf.view(num_samples, -1, 1),
175 | pred_cls.view(num_samples, -1, self.num_classes),
176 | ),
177 | -1,
178 | )
179 |
180 | if targets is None:
181 | return output, 0
182 | else:
183 | iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
184 | pred_boxes=pred_boxes,
185 | pred_cls=pred_cls,
186 | target=targets,
187 | anchors=self.scaled_anchors,
188 | ignore_thres=self.ignore_thres,
189 | )
190 |
191 | # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
192 | loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
193 | loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
194 | loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
195 | loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
196 | loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
197 | loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
198 | loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
199 | loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
200 | total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
201 |
202 | # Metrics
203 | cls_acc = 100 * class_mask[obj_mask].mean()
204 | conf_obj = pred_conf[obj_mask].mean()
205 | conf_noobj = pred_conf[noobj_mask].mean()
206 | conf50 = (pred_conf > 0.5).float()
207 | iou50 = (iou_scores > 0.5).float()
208 | iou75 = (iou_scores > 0.75).float()
209 | detected_mask = conf50 * class_mask * tconf
210 | precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
211 | recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
212 | recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)
213 |
214 | self.metrics = {
215 | "loss": to_cpu(total_loss).item(),
216 | "x": to_cpu(loss_x).item(),
217 | "y": to_cpu(loss_y).item(),
218 | "w": to_cpu(loss_w).item(),
219 | "h": to_cpu(loss_h).item(),
220 | "conf": to_cpu(loss_conf).item(),
221 | "cls": to_cpu(loss_cls).item(),
222 | "cls_acc": to_cpu(cls_acc).item(),
223 | "recall50": to_cpu(recall50).item(),
224 | "recall75": to_cpu(recall75).item(),
225 | "precision": to_cpu(precision).item(),
226 | "conf_obj": to_cpu(conf_obj).item(),
227 | "conf_noobj": to_cpu(conf_noobj).item(),
228 | "grid_size": grid_size,
229 | }
230 |
231 | return output, total_loss
232 |
233 |
234 | class Darknet(nn.Module):
235 | """YOLOv3 object detection model"""
236 |
237 | def __init__(self, config_path, img_size=416):
238 | super(Darknet, self).__init__()
239 | self.module_defs = parse_model_config(config_path)
240 | self.hyperparams, self.module_list = create_modules(self.module_defs)
241 | self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")]
242 | self.img_size = img_size
243 | self.seen = 0
244 | self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32)
245 |
246 | def forward(self, x, targets=None):
247 | img_dim = x.shape[2]
248 | loss = 0
249 | layer_outputs, yolo_outputs = [], []
250 | for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
251 | if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
252 | x = module(x)
253 | elif module_def["type"] == "route":
254 | x = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1)
255 | elif module_def["type"] == "shortcut":
256 | layer_i = int(module_def["from"])
257 | x = layer_outputs[-1] + layer_outputs[layer_i]
258 | elif module_def["type"] == "yolo":
259 | x, layer_loss = module[0](x, targets, img_dim)
260 | loss += layer_loss
261 | yolo_outputs.append(x)
262 | layer_outputs.append(x)
263 | yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
264 | return yolo_outputs if targets is None else (loss, yolo_outputs)
265 |
266 | def load_darknet_weights(self, weights_path):
267 | """Parses and loads the weights stored in 'weights_path'"""
268 |
269 | # Open the weights file
270 | with open(weights_path, "rb") as f:
271 | header = np.fromfile(f, dtype=np.int32, count=5) # First five are header values
272 | self.header_info = header # Needed to write header when saving weights
273 | self.seen = header[3] # number of images seen during training
274 | weights = np.fromfile(f, dtype=np.float32) # The rest are weights
275 |
276 | # Establish cutoff for loading backbone weights
277 | cutoff = None
278 | if "darknet53.conv.74" in weights_path:
279 | cutoff = 75
280 |
281 | ptr = 0
282 | for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
283 | if i == cutoff:
284 | break
285 | if module_def["type"] == "convolutional":
286 | conv_layer = module[0]
287 | if module_def["batch_normalize"]:
288 | # Load BN bias, weights, running mean and running variance
289 | bn_layer = module[1]
290 | num_b = bn_layer.bias.numel() # Number of biases
291 | # Bias
292 | bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)
293 | bn_layer.bias.data.copy_(bn_b)
294 | ptr += num_b
295 | # Weight
296 | bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)
297 | bn_layer.weight.data.copy_(bn_w)
298 | ptr += num_b
299 | # Running Mean
300 | bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)
301 | bn_layer.running_mean.data.copy_(bn_rm)
302 | ptr += num_b
303 | # Running Var
304 | bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)
305 | bn_layer.running_var.data.copy_(bn_rv)
306 | ptr += num_b
307 | else:
308 | # Load conv. bias
309 | num_b = conv_layer.bias.numel()
310 | conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)
311 | conv_layer.bias.data.copy_(conv_b)
312 | ptr += num_b
313 | # Load conv. weights
314 | num_w = conv_layer.weight.numel()
315 | conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)
316 | conv_layer.weight.data.copy_(conv_w)
317 | ptr += num_w
318 |
319 | def save_darknet_weights(self, path, cutoff=-1):
320 | """
321 | @:param path - path of the new weights file
322 | @:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
323 | """
324 | fp = open(path, "wb")
325 | self.header_info[3] = self.seen
326 | self.header_info.tofile(fp)
327 |
328 | # Iterate through layers
329 | for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
330 | if module_def["type"] == "convolutional":
331 | conv_layer = module[0]
332 | # If batch norm, load bn first
333 | if module_def["batch_normalize"]:
334 | bn_layer = module[1]
335 | bn_layer.bias.data.cpu().numpy().tofile(fp)
336 | bn_layer.weight.data.cpu().numpy().tofile(fp)
337 | bn_layer.running_mean.data.cpu().numpy().tofile(fp)
338 | bn_layer.running_var.data.cpu().numpy().tofile(fp)
339 | # Load conv bias
340 | else:
341 | conv_layer.bias.data.cpu().numpy().tofile(fp)
342 | # Load conv weights
343 | conv_layer.weight.data.cpu().numpy().tofile(fp)
344 |
345 | fp.close()
346 |
--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | from utils.utils import *
4 | from utils.datasets import *
5 | import cv2
6 | from PIL import Image
7 | import torch
8 | from torchvision import transforms
9 |
10 |
11 | def resize(image, size):
12 | image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
13 | return image
14 |
15 |
16 | def yolo_prediction(model, device, image,class_names):
17 | image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
18 | imgs = transforms.ToTensor()(Image.fromarray(image))
19 | c, h, w = imgs.shape
20 | img_sacle = [w / 416, h / 416, w / 416, h / 416]
21 | imgs = resize(imgs, 416)
22 | imgs = imgs.unsqueeze(0).to(device)
23 |
24 | model.eval()
25 | with torch.no_grad():
26 | outputs = model(imgs)
27 | outputs = non_max_suppression(outputs, conf_thres=0.5, nms_thres=0.45)
28 |
29 | # print(outputs)
30 | objects = []
31 | try:
32 | outputs = outputs[0].cpu().data
33 | for i, output in enumerate(outputs):
34 | item = []
35 | item.append(class_names[int(output[-1])])
36 | item.append(float(output[4]))
37 | box = [int(value * img_sacle[i]) for i, value in enumerate(output[:4])]
38 | x1,y1,x2,y2 = box
39 | x = int((x2+x1)/2)
40 | y = int((y1+y2)/2)
41 | w = x2-x1
42 | h = y2-y1
43 | item.append([x,y,w,h])
44 | objects.append(item)
45 | except:
46 | pass
47 | return objects
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit_learn==0.21.3
2 | numpy
3 | matplotlib
4 | torch
5 | torchvision
6 | terminaltables
7 | pillow
8 | tqdm
9 | numba
10 | fire
11 | filterpy
12 | opencv-python
13 | scikit-image
14 | pytesseract
15 | PyQt5
16 | pandas
--------------------------------------------------------------------------------
/utils/.config:
--------------------------------------------------------------------------------
1 | 3577582247857115766766724119402109753611015352440240992284880932986646512687624561324362551252066885950009004775081309546579712498721403446373694387157090085286231224505645450053725901631232615038604189510648071831312164643845750188640913440678425980773097082810461453835363142185772806767051281389323550048308896544778657224119429085618530915004838231420068047280427430441205992226661770454814476085720924852343305997639832355663375736276454752176015641250406861398123958872477894486967939187447133486257111277501919826706888469965968376719017903135941319967538514819613139151903788015692897242959052053924650580372
2 | 5605377600988577560550065397254225420957436224498576618768516259123867506653695772342324276743084984269870011017428054256364217027220194986462263726663651389623329466400510449729248114987953444512598188059336858299682302760808537370136773326006433438381734159585558284654736919864102681493160466327333511033028058967854838844380496016808629272782632933313912375317044566209141444311502150231852276410702944747944105256818585060877714842901476314844375419924879849300110150679899364755163492641805650724663047662328174625989492058437973343511835728532409474547860996128819129370209775297109033581936985340670034201253
3 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/utils/__init__.py
--------------------------------------------------------------------------------
/utils/augmentations.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 | import numpy as np
4 |
5 |
6 | def horisontal_flip(images, targets):
7 | images = torch.flip(images, [-1])
8 | targets[:, 2] = 1 - targets[:, 2]
9 | return images, targets
10 |
--------------------------------------------------------------------------------
/utils/datasets.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import random
3 | import os
4 | import sys
5 | import numpy as np
6 | from PIL import Image
7 | import torch
8 | import torch.nn.functional as F
9 |
10 | from utils.augmentations import horisontal_flip
11 | from torch.utils.data import Dataset
12 | import torchvision.transforms as transforms
13 |
14 |
15 | def pad_to_square(img, pad_value):
16 | c, h, w = img.shape
17 | dim_diff = np.abs(h - w)
18 | # (upper / left) padding and (lower / right) padding
19 | pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
20 | # Determine padding
21 | pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
22 | # Add padding
23 | img = F.pad(img, pad, "constant", value=pad_value)
24 |
25 | return img, pad
26 |
27 |
28 | def resize(image, size):
29 | image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
30 | return image
31 |
32 |
33 | def random_resize(images, min_size=288, max_size=448):
34 | new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0]
35 | images = F.interpolate(images, size=new_size, mode="nearest")
36 | return images
37 |
38 |
39 | class ImageFolder(Dataset):
40 | def __init__(self, folder_path, img_size=416):
41 | self.files = sorted(glob.glob("%s/*.*" % folder_path))
42 | self.img_size = img_size
43 |
44 | def __getitem__(self, index):
45 | img_path = self.files[index % len(self.files)]
46 | # Extract image as PyTorch tensor
47 | img = transforms.ToTensor()(Image.open(img_path))
48 | # Pad to square resolution
49 | img, _ = pad_to_square(img, 0)
50 | # Resize
51 | img = resize(img, self.img_size)
52 |
53 | return img_path, img
54 |
55 | def __len__(self):
56 | return len(self.files)
57 |
58 |
59 | class ListDataset(Dataset):
60 | def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True):
61 | with open(list_path, "r") as file:
62 | self.img_files = file.readlines()
63 |
64 | self.label_files = [
65 | path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt")
66 | for path in self.img_files
67 | ]
68 | self.img_size = img_size
69 | self.max_objects = 100
70 | self.augment = augment
71 | self.multiscale = multiscale
72 | self.normalized_labels = normalized_labels
73 | self.min_size = self.img_size - 3 * 32
74 | self.max_size = self.img_size + 3 * 32
75 | self.batch_count = 0
76 |
77 | def __getitem__(self, index):
78 |
79 | # ---------
80 | # Image
81 | # ---------
82 |
83 | img_path = self.img_files[index % len(self.img_files)].rstrip()
84 |
85 | # Extract image as PyTorch tensor
86 | img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))
87 |
88 | # Handle images with less than three channels
89 | if len(img.shape) != 3:
90 | img = img.unsqueeze(0)
91 | img = img.expand((3, img.shape[1:]))
92 |
93 | _, h, w = img.shape
94 | h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
95 | # Pad to square resolution
96 | img, pad = pad_to_square(img, 0)
97 | _, padded_h, padded_w = img.shape
98 |
99 | # ---------
100 | # Label
101 | # ---------
102 |
103 | label_path = self.label_files[index % len(self.img_files)].rstrip()
104 |
105 | targets = None
106 | if os.path.exists(label_path):
107 | boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))
108 | # Extract coordinates for unpadded + unscaled image
109 | x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
110 | y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
111 | x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
112 | y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
113 | # Adjust for added padding
114 | x1 += pad[0]
115 | y1 += pad[2]
116 | x2 += pad[1]
117 | y2 += pad[3]
118 | # Returns (x, y, w, h)
119 | boxes[:, 1] = ((x1 + x2) / 2) / padded_w
120 | boxes[:, 2] = ((y1 + y2) / 2) / padded_h
121 | boxes[:, 3] *= w_factor / padded_w
122 | boxes[:, 4] *= h_factor / padded_h
123 |
124 | targets = torch.zeros((len(boxes), 6))
125 | targets[:, 1:] = boxes
126 |
127 | # Apply augmentations
128 | if self.augment:
129 | if np.random.random() < 0.5:
130 | img, targets = horisontal_flip(img, targets)
131 |
132 | return img_path, img, targets
133 |
134 |
135 | def collate_fn(self, batch):
136 | paths, imgs, targets = list(zip(*batch))
137 | # Remove empty placeholder targets
138 | targets = [boxes for boxes in targets if boxes is not None]
139 | # Add sample index to targets
140 | for i, boxes in enumerate(targets):
141 | boxes[:, 0] = i
142 | targets = torch.cat(targets, 0)
143 | # Selects new image size every tenth batch
144 | if self.multiscale and self.batch_count % 10 == 0:
145 | self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32))
146 | # Resize images to input shape
147 | imgs = torch.stack([resize(img, self.img_size) for img in imgs])
148 | self.batch_count += 1
149 | return paths, imgs, targets
150 |
151 | def __len__(self):
152 | return len(self.img_files)
153 |
--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 |
4 | class Logger(object):
5 | def __init__(self, log_dir):
6 | """Create a summary writer logging to log_dir."""
7 | self.writer = tf.summary.FileWriter(log_dir)
8 |
9 | def scalar_summary(self, tag, value, step):
10 | """Log a scalar variable."""
11 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
12 | self.writer.add_summary(summary, step)
13 |
14 | def list_of_scalars_summary(self, tag_value_pairs, step):
15 | """Log scalar variables."""
16 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs])
17 | self.writer.add_summary(summary, step)
18 |
--------------------------------------------------------------------------------
/utils/parse_config.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | def parse_model_config(path):
4 | """Parses the yolo-v3 layer configuration file and returns module definitions"""
5 | file = open(path, 'r')
6 | lines = file.read().split('\n')
7 | lines = [x for x in lines if x and not x.startswith('#')]
8 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
9 | module_defs = []
10 | for line in lines:
11 | if line.startswith('['): # This marks the start of a new block
12 | module_defs.append({})
13 | module_defs[-1]['type'] = line[1:-1].rstrip()
14 | if module_defs[-1]['type'] == 'convolutional':
15 | module_defs[-1]['batch_normalize'] = 0
16 | else:
17 | key, value = line.split("=")
18 | value = value.strip()
19 | module_defs[-1][key.rstrip()] = value.strip()
20 |
21 | return module_defs
22 |
23 | def parse_data_config(path):
24 | """Parses the data configuration file"""
25 | options = dict()
26 | options['gpus'] = '0,1,2,3'
27 | options['num_workers'] = '10'
28 | with open(path, 'r') as fp:
29 | lines = fp.readlines()
30 | for line in lines:
31 | line = line.strip()
32 | if line == '' or line.startswith('#'):
33 | continue
34 | key, value = line.split('=')
35 | options[key.strip()] = value.strip()
36 | return options
37 |
--------------------------------------------------------------------------------
/utils/sort.py:
--------------------------------------------------------------------------------
1 | """
2 | SORT: A Simple, Online and Realtime Tracker
3 | Copyright (C) 2016 Alex Bewley alex@dynamicdetection.com
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, either version 3 of the License, or
7 | (at your option) any later version.
8 | This program is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU General Public License for more details.
12 | You should have received a copy of the GNU General Public License
13 | along with this program. If not, see .
14 | """
15 | from __future__ import print_function
16 |
17 | from numba import jit
18 | import os.path
19 | import numpy as np
20 | import matplotlib.pyplot as plt
21 | import matplotlib.patches as patches
22 | from skimage import io
23 | import glob
24 | from sklearn.utils.linear_assignment_ import linear_assignment
25 | # from scipy.optimize import linear_sum_assignment
26 | import time
27 | import argparse
28 | from filterpy.kalman import KalmanFilter
29 |
30 | @jit
31 | def iou(bb_test,bb_gt):
32 | """
33 | Computes IUO between two bboxes in the form [x1,y1,x2,y2]
34 | """
35 | xx1 = np.maximum(bb_test[0], bb_gt[0])
36 | yy1 = np.maximum(bb_test[1], bb_gt[1])
37 | xx2 = np.minimum(bb_test[2], bb_gt[2])
38 | yy2 = np.minimum(bb_test[3], bb_gt[3])
39 | w = np.maximum(0., xx2 - xx1)
40 | h = np.maximum(0., yy2 - yy1)
41 | wh = w * h
42 | o = wh / ((bb_test[2]-bb_test[0])*(bb_test[3]-bb_test[1])
43 | + (bb_gt[2]-bb_gt[0])*(bb_gt[3]-bb_gt[1]) - wh)
44 | return(o)
45 |
46 | def convert_bbox_to_z(bbox):
47 | """
48 | Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
49 | [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
50 | the aspect ratio
51 | """
52 | w = bbox[2]-bbox[0]
53 | h = bbox[3]-bbox[1]
54 | x = bbox[0]+w/2.
55 | y = bbox[1]+h/2.
56 | s = w*h #scale is just area
57 | r = w/float(h)
58 | return np.array([x,y,s,r]).reshape((4,1))
59 |
60 | def convert_x_to_bbox(x,score=None):
61 | """
62 | Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
63 | [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
64 | """
65 | w = np.sqrt(x[2]*x[3])
66 | h = x[2]/w
67 | if(score==None):
68 | return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
69 | else:
70 | return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))
71 |
72 |
73 | class KalmanBoxTracker(object):
74 | """
75 | This class represents the internel state of individual tracked objects observed as bbox.
76 | """
77 | count = 0
78 | def __init__(self,bbox):
79 | """
80 | Initialises a tracker using initial bounding box.
81 | """
82 | #define constant velocity model
83 | self.kf = KalmanFilter(dim_x=7, dim_z=4)
84 | self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0], [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
85 | self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])
86 |
87 | self.kf.R[2:,2:] *= 10.
88 | self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
89 | self.kf.P *= 10.
90 | self.kf.Q[-1,-1] *= 0.01
91 | self.kf.Q[4:,4:] *= 0.01
92 |
93 | self.kf.x[:4] = convert_bbox_to_z(bbox)
94 | self.time_since_update = 0
95 | self.id = KalmanBoxTracker.count
96 | KalmanBoxTracker.count += 1
97 | self.history = []
98 | self.hits = 0
99 | self.hit_streak = 0
100 | self.age = 0
101 |
102 | def update(self,bbox):
103 | """
104 | Updates the state vector with observed bbox.
105 | """
106 | self.time_since_update = 0
107 | self.history = []
108 | self.hits += 1
109 | self.hit_streak += 1
110 | self.kf.update(convert_bbox_to_z(bbox))
111 |
112 | def predict(self):
113 | """
114 | Advances the state vector and returns the predicted bounding box estimate.
115 | """
116 | if((self.kf.x[6]+self.kf.x[2])<=0):
117 | self.kf.x[6] *= 0.0
118 | self.kf.predict()
119 | self.age += 1
120 | if(self.time_since_update>0):
121 | self.hit_streak = 0
122 | self.time_since_update += 1
123 | self.history.append(convert_x_to_bbox(self.kf.x))
124 | return self.history[-1]
125 |
126 | def get_state(self):
127 | """
128 | Returns the current bounding box estimate.
129 | """
130 | return convert_x_to_bbox(self.kf.x)
131 |
132 | def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3):
133 | """
134 | Assigns detections to tracked object (both represented as bounding boxes)
135 | Returns 3 lists of matches, unmatched_detections and unmatched_trackers
136 | """
137 | if(len(trackers)==0):
138 | return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)
139 | iou_matrix = np.zeros((len(detections),len(trackers)),dtype=np.float32)
140 |
141 | for d,det in enumerate(detections):
142 | for t,trk in enumerate(trackers):
143 | iou_matrix[d,t] = iou(det,trk)
144 | matched_indices = linear_assignment(-iou_matrix)
145 |
146 | # matched_indices = np.array(matched_indices).reshape((-1,2))
147 | # print(iou_matrix.shape,matched_indices.shape)
148 | unmatched_detections = []
149 | for d,det in enumerate(detections):
150 | if(d not in matched_indices[:,0]):
151 | unmatched_detections.append(d)
152 | unmatched_trackers = []
153 | for t,trk in enumerate(trackers):
154 | if(t not in matched_indices[:,1]):
155 | unmatched_trackers.append(t)
156 |
157 | #filter out matched with low IOU
158 | matches = []
159 | for m in matched_indices:
160 | if(iou_matrix[m[0],m[1]]= self.min_hits or self.frame_count <= self.min_hits)):
223 | ret.append(np.concatenate((d,[trk.id+1])).reshape(1,-1)) # +1 as MOT benchmark requires positive
224 |
225 | i -= 1
226 | #remove dead tracklet
227 | if(trk.time_since_update > self.max_age):
228 | self.trackers.pop(i)
229 | self.counts = KalmanBoxTracker.count
230 | if(len(ret)>0):
231 | return np.concatenate(ret)
232 |
233 | return np.empty((0,5))
234 |
235 | def parse_args():
236 | """Parse input arguments."""
237 | parser = argparse.ArgumentParser(description='SORT demo')
238 | parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
239 | args = parser.parse_args()
240 | return args
241 |
242 | if __name__ == '__main__':
243 | # all train
244 | sequences = ['PETS09-S2L1','TUD-Campus','TUD-Stadtmitte','ETH-Bahnhof','ETH-Sunnyday','ETH-Pedcross2','KITTI-13','KITTI-17','ADL-Rundle-6','ADL-Rundle-8','Venice-2']
245 | args = parse_args()
246 | display = args.display
247 | phase = 'train'
248 | total_time = 0.0
249 | total_frames = 0
250 | colours = np.random.rand(32,3) #used only for display
251 | if(display):
252 | if not os.path.exists('mot_benchmark'):
253 | print('\n\tERROR: mot_benchmark link not found!\n\n Create a symbolic link to the MOT benchmark\n (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n')
254 | exit()
255 | plt.ion()
256 | fig = plt.figure()
257 |
258 | if not os.path.exists('output'):
259 | os.makedirs('output')
260 |
261 | for seq in sequences:
262 | mot_tracker = Sort() #create instance of the SORT tracker
263 | seq_dets = np.loadtxt('data/%s/det.txt'%(seq),delimiter=',') #load detections
264 | with open('output/%s.txt'%(seq),'w') as out_file:
265 | print("Processing %s."%(seq))
266 | for frame in range(int(seq_dets[:,0].max())):
267 | frame += 1 #detection and frame numbers begin at 1
268 | dets = seq_dets[seq_dets[:,0]==frame,2:7]
269 | dets[:,2:4] += dets[:,0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2]
270 | total_frames += 1
271 |
272 | if(display):
273 | ax1 = fig.add_subplot(111, aspect='equal')
274 | fn = 'mot_benchmark/%s/%s/img1/%06d.jpg'%(phase,seq,frame)
275 | im =io.imread(fn)
276 | ax1.imshow(im)
277 | plt.title(seq+' Tracked Targets')
278 |
279 | start_time = time.time()
280 | trackers = mot_tracker.update(dets)
281 | cycle_time = time.time() - start_time
282 | total_time += cycle_time
283 |
284 | for d in trackers:
285 | print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file)
286 | if(display):
287 | d = d.astype(np.int32)
288 | ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:]))
289 | ax1.set_adjustable('box-forced')
290 |
291 | if(display):
292 | fig.canvas.flush_events()
293 | plt.draw()
294 | ax1.cla()
295 |
296 | print("Total Tracking took: %.3f for %d frames or %.1f FPS"%(total_time,total_frames,total_frames/total_time))
297 | if(display):
298 | print("Note: to get real runtime results run without the option: --display")
299 |
--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import math
3 | import time
4 | import tqdm
5 | import torch
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 | from torch.autograd import Variable
9 | import numpy as np
10 | import matplotlib.pyplot as plt
11 | import matplotlib.patches as patches
12 |
13 |
14 | def to_cpu(tensor):
15 | return tensor.detach().cpu()
16 |
17 |
18 | def load_classes(path):
19 | """
20 | Loads class labels at 'path'
21 | """
22 | fp = open(path, "r")
23 | names = fp.read().split("\n")[:-1]
24 | return names
25 |
26 |
27 | def weights_init_normal(m):
28 | classname = m.__class__.__name__
29 | if classname.find("Conv") != -1:
30 | torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
31 | elif classname.find("BatchNorm2d") != -1:
32 | torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
33 | torch.nn.init.constant_(m.bias.data, 0.0)
34 |
35 |
36 | def rescale_boxes(boxes, current_dim, original_shape):
37 | """ Rescales bounding boxes to the original shape """
38 | orig_h, orig_w = original_shape
39 | # The amount of padding that was added
40 | pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
41 | pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
42 | # Image height and width after padding is removed
43 | unpad_h = current_dim - pad_y
44 | unpad_w = current_dim - pad_x
45 | # Rescale bounding boxes to dimension of original image
46 | boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
47 | boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
48 | boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
49 | boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
50 | return boxes
51 |
52 |
53 | def xywh2xyxy(x):
54 | y = x.new(x.shape)
55 | y[..., 0] = x[..., 0] - x[..., 2] / 2
56 | y[..., 1] = x[..., 1] - x[..., 3] / 2
57 | y[..., 2] = x[..., 0] + x[..., 2] / 2
58 | y[..., 3] = x[..., 1] + x[..., 3] / 2
59 | return y
60 |
61 |
62 | def ap_per_class(tp, conf, pred_cls, target_cls):
63 | """ Compute the average precision, given the recall and precision curves.
64 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
65 | # Arguments
66 | tp: True positives (list).
67 | conf: Objectness value from 0-1 (list).
68 | pred_cls: Predicted object classes (list).
69 | target_cls: True object classes (list).
70 | # Returns
71 | The average precision as computed in py-faster-rcnn.
72 | """
73 |
74 | # Sort by objectness
75 | i = np.argsort(-conf)
76 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
77 |
78 | # Find unique classes
79 | unique_classes = np.unique(target_cls)
80 |
81 | # Create Precision-Recall curve and compute AP for each class
82 | ap, p, r = [], [], []
83 | for c in tqdm.tqdm(unique_classes, desc="Computing AP"):
84 | i = pred_cls == c
85 | n_gt = (target_cls == c).sum() # Number of ground truth objects
86 | n_p = i.sum() # Number of predicted objects
87 |
88 | if n_p == 0 and n_gt == 0:
89 | continue
90 | elif n_p == 0 or n_gt == 0:
91 | ap.append(0)
92 | r.append(0)
93 | p.append(0)
94 | else:
95 | # Accumulate FPs and TPs
96 | fpc = (1 - tp[i]).cumsum()
97 | tpc = (tp[i]).cumsum()
98 |
99 | # Recall
100 | recall_curve = tpc / (n_gt + 1e-16)
101 | r.append(recall_curve[-1])
102 |
103 | # Precision
104 | precision_curve = tpc / (tpc + fpc)
105 | p.append(precision_curve[-1])
106 |
107 | # AP from recall-precision curve
108 | ap.append(compute_ap(recall_curve, precision_curve))
109 |
110 | # Compute F1 score (harmonic mean of precision and recall)
111 | p, r, ap = np.array(p), np.array(r), np.array(ap)
112 | f1 = 2 * p * r / (p + r + 1e-16)
113 |
114 | return p, r, ap, f1, unique_classes.astype("int32")
115 |
116 |
117 | def compute_ap(recall, precision):
118 | """ Compute the average precision, given the recall and precision curves.
119 | Code originally from https://github.com/rbgirshick/py-faster-rcnn.
120 |
121 | # Arguments
122 | recall: The recall curve (list).
123 | precision: The precision curve (list).
124 | # Returns
125 | The average precision as computed in py-faster-rcnn.
126 | """
127 | # correct AP calculation
128 | # first append sentinel values at the end
129 | mrec = np.concatenate(([0.0], recall, [1.0]))
130 | mpre = np.concatenate(([0.0], precision, [0.0]))
131 |
132 | # compute the precision envelope
133 | for i in range(mpre.size - 1, 0, -1):
134 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
135 |
136 | # to calculate area under PR curve, look for points
137 | # where X axis (recall) changes value
138 | i = np.where(mrec[1:] != mrec[:-1])[0]
139 |
140 | # and sum (\Delta recall) * prec
141 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
142 | return ap
143 |
144 |
145 | def get_batch_statistics(outputs, targets, iou_threshold):
146 | """ Compute true positives, predicted scores and predicted labels per sample """
147 | batch_metrics = []
148 | for sample_i in range(len(outputs)):
149 |
150 | if outputs[sample_i] is None:
151 | continue
152 |
153 | output = outputs[sample_i]
154 | pred_boxes = output[:, :4]
155 | pred_scores = output[:, 4]
156 | pred_labels = output[:, -1]
157 |
158 | true_positives = np.zeros(pred_boxes.shape[0])
159 |
160 | annotations = targets[targets[:, 0] == sample_i][:, 1:]
161 | target_labels = annotations[:, 0] if len(annotations) else []
162 | if len(annotations):
163 | detected_boxes = []
164 | target_boxes = annotations[:, 1:]
165 |
166 | for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
167 |
168 | # If targets are found break
169 | if len(detected_boxes) == len(annotations):
170 | break
171 |
172 | # Ignore if label is not one of the target labels
173 | if pred_label not in target_labels:
174 | continue
175 |
176 | iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
177 | if iou >= iou_threshold and box_index not in detected_boxes:
178 | true_positives[pred_i] = 1
179 | detected_boxes += [box_index]
180 | batch_metrics.append([true_positives, pred_scores, pred_labels])
181 | return batch_metrics
182 |
183 |
184 | def bbox_wh_iou(wh1, wh2):
185 | wh2 = wh2.t()
186 | w1, h1 = wh1[0], wh1[1]
187 | w2, h2 = wh2[0], wh2[1]
188 | inter_area = torch.min(w1, w2) * torch.min(h1, h2)
189 | union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
190 | return inter_area / union_area
191 |
192 |
193 | def bbox_iou(box1, box2, x1y1x2y2=True):
194 | """
195 | Returns the IoU of two bounding boxes
196 | """
197 | if not x1y1x2y2:
198 | # Transform from center and width to exact coordinates
199 | b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
200 | b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
201 | b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
202 | b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
203 | else:
204 | # Get the coordinates of bounding boxes
205 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
206 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
207 |
208 | # get the corrdinates of the intersection rectangle
209 | inter_rect_x1 = torch.max(b1_x1, b2_x1)
210 | inter_rect_y1 = torch.max(b1_y1, b2_y1)
211 | inter_rect_x2 = torch.min(b1_x2, b2_x2)
212 | inter_rect_y2 = torch.min(b1_y2, b2_y2)
213 | # Intersection area
214 | inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
215 | inter_rect_y2 - inter_rect_y1 + 1, min=0
216 | )
217 | # Union Area
218 | b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
219 | b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
220 |
221 | iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
222 |
223 | return iou
224 |
225 |
226 | def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
227 | """
228 | Removes detections with lower object confidence score than 'conf_thres' and performs
229 | Non-Maximum Suppression to further filter detections.
230 | Returns detections with shape:
231 | (x1, y1, x2, y2, object_conf, class_score, class_pred)
232 | """
233 |
234 | # From (center x, center y, width, height) to (x1, y1, x2, y2)
235 | prediction[..., :4] = xywh2xyxy(prediction[..., :4])
236 | output = [None for _ in range(len(prediction))]
237 | for image_i, image_pred in enumerate(prediction):
238 | # Filter out confidence scores below threshold
239 | image_pred = image_pred[image_pred[:, 4] >= conf_thres]
240 | # If none are remaining => process next image
241 | if not image_pred.size(0):
242 | continue
243 | # Object confidence times class confidence
244 | score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
245 | # Sort by it
246 | image_pred = image_pred[(-score).argsort()]
247 | class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
248 | detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)
249 | # Perform non-maximum suppression
250 | keep_boxes = []
251 | while detections.size(0):
252 | large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres
253 | label_match = detections[0, -1] == detections[:, -1]
254 | # Indices of boxes with lower confidence scores, large IOUs and matching labels
255 | invalid = large_overlap & label_match
256 | weights = detections[invalid, 4:5]
257 | # Merge overlapping bboxes by order of confidence
258 | detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum()
259 | keep_boxes += [detections[0]]
260 | detections = detections[~invalid]
261 | if keep_boxes:
262 | output[image_i] = torch.stack(keep_boxes)
263 |
264 | return output
265 |
266 |
267 | def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres):
268 |
269 | ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor
270 | FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor
271 |
272 | nB = pred_boxes.size(0)
273 | nA = pred_boxes.size(1)
274 | nC = pred_cls.size(-1)
275 | nG = pred_boxes.size(2)
276 |
277 | # Output tensors
278 | obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0)
279 | noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1)
280 | class_mask = FloatTensor(nB, nA, nG, nG).fill_(0)
281 | iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0)
282 | tx = FloatTensor(nB, nA, nG, nG).fill_(0)
283 | ty = FloatTensor(nB, nA, nG, nG).fill_(0)
284 | tw = FloatTensor(nB, nA, nG, nG).fill_(0)
285 | th = FloatTensor(nB, nA, nG, nG).fill_(0)
286 | tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0)
287 |
288 | # Convert to position relative to box
289 | target_boxes = target[:, 2:6] * nG
290 | gxy = target_boxes[:, :2]
291 | gwh = target_boxes[:, 2:]
292 | # Get anchors with best iou
293 | ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors])
294 | best_ious, best_n = ious.max(0)
295 | # Separate target values
296 | b, target_labels = target[:, :2].long().t()
297 | gx, gy = gxy.t()
298 | gw, gh = gwh.t()
299 | gi, gj = gxy.long().t()
300 | # Set masks
301 | obj_mask[b, best_n, gj, gi] = 1
302 | noobj_mask[b, best_n, gj, gi] = 0
303 |
304 | # Set noobj mask to zero where iou exceeds ignore threshold
305 | for i, anchor_ious in enumerate(ious.t()):
306 | noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0
307 |
308 | # Coordinates
309 | tx[b, best_n, gj, gi] = gx - gx.floor()
310 | ty[b, best_n, gj, gi] = gy - gy.floor()
311 | # Width and height
312 | tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
313 | th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
314 | # One-hot encoding of label
315 | tcls[b, best_n, gj, gi, target_labels] = 1
316 | # Compute label correctness and iou at best anchor
317 | class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float()
318 | iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False)
319 |
320 | tconf = obj_mask.float()
321 | return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
322 |
--------------------------------------------------------------------------------