├── .gitignore ├── LICENSE ├── README.md ├── app.py ├── asserts ├── demo1.gif ├── demo1.mp4 ├── demo2.gif ├── demo2.mp4 ├── demo3.gif ├── demo3.mp4 └── test.mp4 ├── config.py ├── config ├── coco.data ├── coco.names ├── yolov3-tiny.cfg └── yolov3.cfg ├── counter.py ├── gui.py ├── gui.ui ├── models.py ├── predict.py ├── requirements.txt └── utils ├── .config ├── __init__.py ├── augmentations.py ├── datasets.py ├── logger.py ├── parse_config.py ├── sort.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | __pycache__/ 3 | weights/* 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 wsh122333 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Multi-type_vehicles_flow_statistics 2 | According to YOLOv3 and SORT algorithms, counting multi-type vehicles. Implemented by Pytorch. 3 | Detecting and tracking the vehicles in \["bicycle","bus","car","motorbike","truck"]. 4 | 5 | ## Reference 6 | - yolov3-darknet https://github.com/pjreddie/darknet 7 | - yolov3-pytorch https://github.com/eriklindernoren/PyTorch-YOLOv3 8 | - sort https://github.com/abewley/sort 9 | 10 | ## Dependencies 11 | - ubuntu/windows 12 | - cuda>=10.0 13 | - python>=3.6 14 | - `pip3 install -r requirements.txt` 15 | 16 | ## Usage 17 | 1. Download the pre-trained yolov3 weight file [here](https://pjreddie.com/media/files/yolov3.weights) and put it into `weights` directory; 18 | 2. Run `python3 app.py` ; 19 | 3. Select video and double click the image to select area, and then start; 20 | 4. After detecting and tracking, the result video and file are saved under `results` directory, the line of `results.txt` with format \[videoName,id,objectName] for each vehicle. 21 | 22 | ## Demo 23 | ![avatar](https://github.com/wsh122333/Multi-type_vehicles_flow_statistics/raw/master/asserts/demo1.gif) 24 | 25 | ![avatar](https://github.com/wsh122333/Multi-type_vehicles_flow_statistics/raw/master/asserts/demo2.gif) 26 | 27 | ![avatar](https://github.com/wsh122333/Multi-type_vehicles_flow_statistics/raw/master/asserts/demo3.gif) 28 | 29 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from PyQt5.QtWidgets import QApplication, QMainWindow, QFileDialog 3 | from PyQt5.QtGui import QImage, QPixmap 4 | from gui import * 5 | import copy 6 | from counter import CounterThread 7 | from utils.sort import * 8 | from models import * 9 | from utils.utils import * 10 | from utils.datasets import * 11 | from config import * 12 | 13 | class App(QMainWindow,Ui_mainWindow): 14 | def __init__(self): 15 | super(App,self).__init__() 16 | self.setupUi(self) 17 | self.label_image_size = (self.label_image.geometry().width(),self.label_image.geometry().height()) 18 | self.video = None 19 | self.exampleImage = None 20 | self.imgScale = None 21 | self.get_points_flag = 0 22 | self.countArea = [] 23 | self.road_code = None 24 | self.time_code = None 25 | self.show_label = names 26 | 27 | #button function 28 | self.pushButton_selectArea.clicked.connect(self.select_area) 29 | self.pushButton_openVideo.clicked.connect(self.open_video) 30 | self.pushButton_start.clicked.connect(self.start_count) 31 | self.pushButton_pause.clicked.connect(self.pause) 32 | self.label_image.mouseDoubleClickEvent = self.get_points 33 | 34 | 35 | self.pushButton_selectArea.setEnabled(False) 36 | self.pushButton_start.setEnabled(False) 37 | self.pushButton_pause.setEnabled(False) 38 | 39 | #some flags 40 | self.running_flag = 0 41 | self.pause_flag = 0 42 | self.counter_thread_start_flag = 0 43 | 44 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 45 | 46 | 47 | data_config = "config/coco.data" 48 | weights_path = "weights/yolov3.weights" 49 | model_def = "config/yolov3.cfg" 50 | data_config = parse_data_config(data_config) 51 | self.yolo_class_names = load_classes(data_config["names"]) 52 | 53 | # Initiate model 54 | print("Loading model ...") 55 | self.yolo_model = Darknet(model_def).to(self.device) 56 | if weights_path.endswith(".weights"): 57 | # Load darknet weights 58 | self.yolo_model.load_darknet_weights(weights_path) 59 | else: 60 | # Load checkpoint weights 61 | self.yolo_model.load_state_dict(torch.load(weights_path)) 62 | 63 | 64 | # counter Thread 65 | self.counterThread = CounterThread(self.yolo_model,self.yolo_class_names,self.device) 66 | self.counterThread.sin_counterResult.connect(self.show_image_label) 67 | self.counterThread.sin_done.connect(self.done) 68 | self.counterThread.sin_counter_results.connect(self.update_counter_results) 69 | 70 | 71 | 72 | def open_video(self): 73 | openfile_name = QFileDialog.getOpenFileName(self,'Open video','','Video files(*.avi , *.mp4)') 74 | self.videoList = [openfile_name[0]] 75 | 76 | # opendir_name = QFileDialog.getExistingDirectory(self, "Open dir", "./") 77 | # self.videoList = [os.path.join(opendir_name,item) for item in os.listdir(opendir_name)] 78 | # self.videoList = list(filter(lambda x: not os.path.isdir(x) , self.videoList)) 79 | # self.videoList.sort() 80 | 81 | vid = cv2.VideoCapture(self.videoList[0]) 82 | 83 | # self.videoWriter = cv2.VideoWriter(openfile_name[0].split("/")[-1], cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'), 10, (1920, 1080)) 84 | 85 | while vid.isOpened(): 86 | ret, frame = vid.read() 87 | if ret: 88 | self.exampleImage = frame 89 | self.show_image_label(frame) 90 | self.imgScale = np.array(frame.shape[:2]) / [self.label_image_size[1], self.label_image_size[0]] 91 | vid.release() 92 | break 93 | 94 | self.pushButton_selectArea.setEnabled(True) 95 | self.pushButton_start.setText("Start") 96 | self.pushButton_start.setEnabled(False) 97 | self.pushButton_pause.setText("Pause") 98 | self.pushButton_pause.setEnabled(False) 99 | 100 | #clear counting results 101 | KalmanBoxTracker.count = 0 102 | self.label_sum.setText("0") 103 | self.label_sum.repaint() 104 | 105 | 106 | def get_points(self, event): 107 | if self.get_points_flag: 108 | x = event.x() 109 | y = event.y() 110 | self.countArea.append([int(x*self.imgScale[1]),int(y*self.imgScale[0])]) 111 | exampleImageWithArea = copy.deepcopy(self.exampleImage) 112 | for point in self.countArea: 113 | exampleImageWithArea[point[1]-10:point[1]+10,point[0]-10:point[0]+10] = (0,255,255) 114 | cv2.fillConvexPoly(exampleImageWithArea, np.array(self.countArea), (0,0,255)) 115 | self.show_image_label(exampleImageWithArea) 116 | print(self.countArea) 117 | 118 | 119 | def select_area(self): 120 | 121 | #change Area needs update exampleImage 122 | if self.counter_thread_start_flag: 123 | ret, frame = self.videoCapture.read() 124 | if ret: 125 | self.exampleImage = frame 126 | self.show_image_label(frame) 127 | 128 | if not self.get_points_flag: 129 | self.pushButton_selectArea.setText("Submit Area") 130 | self.get_points_flag = 1 131 | self.countArea = [] 132 | self.pushButton_openVideo.setEnabled(False) 133 | self.pushButton_start.setEnabled(False) 134 | 135 | else: 136 | self.pushButton_selectArea.setText("Select Area") 137 | self.get_points_flag = 0 138 | exampleImage = copy.deepcopy(self.exampleImage) 139 | # painting area 140 | for i in range(len(self.countArea)): 141 | cv2.line(exampleImage, tuple(self.countArea[i]), tuple(self.countArea[(i + 1) % (len(self.countArea))]), (0, 0, 255), 2) 142 | self.show_image_label(exampleImage) 143 | 144 | #enable start button 145 | self.pushButton_openVideo.setEnabled(True) 146 | self.pushButton_start.setEnabled(True) 147 | 148 | 149 | def show_image_label(self, img_np): 150 | img_np = cv2.cvtColor(img_np,cv2.COLOR_BGR2RGB) 151 | img_np = cv2.resize(img_np, self.label_image_size) 152 | frame = QImage(img_np, self.label_image_size[0], self.label_image_size[1], QImage.Format_RGB888) 153 | pix = QPixmap.fromImage(frame) 154 | self.label_image.setPixmap(pix) 155 | self.label_image.repaint() 156 | 157 | def start_count(self): 158 | if self.running_flag == 0: 159 | #clear count and display 160 | KalmanBoxTracker.count = 0 161 | for item in self.show_label: 162 | vars(self)[f"label_{item}"].setText('0') 163 | # clear result file 164 | with open("results/results.txt", "w") as f: 165 | pass 166 | 167 | #start 168 | self.running_flag = 1 169 | self.pause_flag = 0 170 | self.pushButton_start.setText("Stop") 171 | self.pushButton_openVideo.setEnabled(False) 172 | self.pushButton_selectArea.setEnabled(False) 173 | #emit new parameter to counter thread 174 | self.counterThread.sin_runningFlag.emit(self.running_flag) 175 | self.counterThread.sin_countArea.emit(self.countArea) 176 | self.counterThread.sin_videoList.emit(self.videoList) 177 | #start counter thread 178 | self.counterThread.start() 179 | 180 | self.pushButton_pause.setEnabled(True) 181 | 182 | 183 | elif self.running_flag == 1: #push pause button 184 | #stop system 185 | self.running_flag = 0 186 | self.counterThread.sin_runningFlag.emit(self.running_flag) 187 | self.pushButton_openVideo.setEnabled(True) 188 | self.pushButton_selectArea.setEnabled(True) 189 | self.pushButton_start.setText("Start") 190 | 191 | 192 | 193 | def done(self,sin): 194 | if sin == 1: 195 | self.pushButton_openVideo.setEnabled(True) 196 | self.pushButton_start.setEnabled(False) 197 | self.pushButton_start.setText("Start") 198 | 199 | 200 | def update_counter_results(self,counter_results): 201 | with open("results/results.txt", "a") as f: 202 | for i, result in enumerate(counter_results): 203 | label_var = vars(self)[f"label_{result[2]}"] 204 | label_var.setText(str(int(label_var.text())+1)) 205 | label_var.repaint() 206 | label_sum_var = vars(self)[f"label_sum"] 207 | label_sum_var.setText(str(int(label_sum_var.text()) + 1)) 208 | label_sum_var.repaint() 209 | f.writelines(' '.join(map(lambda x: str(x),result))) 210 | f.write(("\n")) 211 | # print("************************************************",len(counter_results)) 212 | 213 | 214 | def pause(self): 215 | if self.pause_flag == 0: 216 | self.pause_flag = 1 217 | self.pushButton_pause.setText("Continue") 218 | self.pushButton_start.setEnabled(False) 219 | else: 220 | self.pause_flag = 0 221 | self.pushButton_pause.setText("Pause") 222 | self.pushButton_start.setEnabled(True) 223 | 224 | self.counterThread.sin_pauseFlag.emit(self.pause_flag) 225 | 226 | 227 | if __name__ == '__main__': 228 | app = QApplication(sys.argv) 229 | myWin = App() 230 | myWin.show() 231 | sys.exit(app.exec_()) 232 | -------------------------------------------------------------------------------- /asserts/demo1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo1.gif -------------------------------------------------------------------------------- /asserts/demo1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo1.mp4 -------------------------------------------------------------------------------- /asserts/demo2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo2.gif -------------------------------------------------------------------------------- /asserts/demo2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo2.mp4 -------------------------------------------------------------------------------- /asserts/demo3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo3.gif -------------------------------------------------------------------------------- /asserts/demo3.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/demo3.mp4 -------------------------------------------------------------------------------- /asserts/test.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/asserts/test.mp4 -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | names = ["bicycle","bus","car","motorbike","truck"] 2 | color_dict = {"bicycle": (179, 52, 255), 3 | "bus": (255, 191, 0), 4 | "car": (127, 255, 0), 5 | "motorbike": (0, 140, 255), 6 | "truck": (0, 215, 255)} -------------------------------------------------------------------------------- /config/coco.data: -------------------------------------------------------------------------------- 1 | classes= 80 2 | train=data/train.txt 3 | valid=data/valid.txt 4 | names=config/coco.names 5 | backup=backup/ 6 | -------------------------------------------------------------------------------- /config/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /config/yolov3-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | # 0 26 | [convolutional] 27 | batch_normalize=1 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | # 1 35 | [maxpool] 36 | size=2 37 | stride=2 38 | 39 | # 2 40 | [convolutional] 41 | batch_normalize=1 42 | filters=32 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | # 3 49 | [maxpool] 50 | size=2 51 | stride=2 52 | 53 | # 4 54 | [convolutional] 55 | batch_normalize=1 56 | filters=64 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | # 5 63 | [maxpool] 64 | size=2 65 | stride=2 66 | 67 | # 6 68 | [convolutional] 69 | batch_normalize=1 70 | filters=128 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | # 7 77 | [maxpool] 78 | size=2 79 | stride=2 80 | 81 | # 8 82 | [convolutional] 83 | batch_normalize=1 84 | filters=256 85 | size=3 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | # 9 91 | [maxpool] 92 | size=2 93 | stride=2 94 | 95 | # 10 96 | [convolutional] 97 | batch_normalize=1 98 | filters=512 99 | size=3 100 | stride=1 101 | pad=1 102 | activation=leaky 103 | 104 | # 11 105 | [maxpool] 106 | size=2 107 | stride=1 108 | 109 | # 12 110 | [convolutional] 111 | batch_normalize=1 112 | filters=1024 113 | size=3 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | ########### 119 | 120 | # 13 121 | [convolutional] 122 | batch_normalize=1 123 | filters=256 124 | size=1 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | # 14 130 | [convolutional] 131 | batch_normalize=1 132 | filters=512 133 | size=3 134 | stride=1 135 | pad=1 136 | activation=leaky 137 | 138 | # 15 139 | [convolutional] 140 | size=1 141 | stride=1 142 | pad=1 143 | filters=255 144 | activation=linear 145 | 146 | 147 | 148 | # 16 149 | [yolo] 150 | mask = 3,4,5 151 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 152 | classes=80 153 | num=6 154 | jitter=.3 155 | ignore_thresh = .7 156 | truth_thresh = 1 157 | random=1 158 | 159 | # 17 160 | [route] 161 | layers = -4 162 | 163 | # 18 164 | [convolutional] 165 | batch_normalize=1 166 | filters=128 167 | size=1 168 | stride=1 169 | pad=1 170 | activation=leaky 171 | 172 | # 19 173 | [upsample] 174 | stride=2 175 | 176 | # 20 177 | [route] 178 | layers = -1, 8 179 | 180 | # 21 181 | [convolutional] 182 | batch_normalize=1 183 | filters=256 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | # 22 190 | [convolutional] 191 | size=1 192 | stride=1 193 | pad=1 194 | filters=255 195 | activation=linear 196 | 197 | # 23 198 | [yolo] 199 | mask = 1,2,3 200 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 201 | classes=80 202 | num=6 203 | jitter=.3 204 | ignore_thresh = .7 205 | truth_thresh = 1 206 | random=1 207 | -------------------------------------------------------------------------------- /config/yolov3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=16 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=255 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=80 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .7 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=255 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=80 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .7 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=255 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=80 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .7 787 | truth_thresh = 1 788 | random=1 789 | -------------------------------------------------------------------------------- /counter.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import cv2 3 | from utils.sort import * 4 | from PyQt5.QtCore import QThread, pyqtSignal 5 | import predict 6 | from config import * 7 | 8 | class CounterThread(QThread): 9 | sin_counterResult = pyqtSignal(np.ndarray) 10 | sin_runningFlag = pyqtSignal(int) 11 | sin_videoList = pyqtSignal(list) 12 | sin_countArea = pyqtSignal(list) 13 | sin_done = pyqtSignal(int) 14 | sin_counter_results = pyqtSignal(list) 15 | sin_pauseFlag = pyqtSignal(int) 16 | 17 | def __init__(self,model,class_names,device): 18 | super(CounterThread,self).__init__() 19 | 20 | self.model = model 21 | self.class_names = class_names 22 | self.device = device 23 | 24 | self.permission = names 25 | 26 | self.colorDict = color_dict 27 | 28 | # create instance of SORT 29 | self.mot_tracker = Sort(max_age=10, min_hits=2) 30 | self.countArea = None 31 | self.running_flag = 0 32 | self.pause_flag = 0 33 | self.videoList = [] 34 | self.last_max_id = 0 35 | self.history = {} #save history 36 | #history = {id:{"no_update_count": int, "his": list}} 37 | 38 | self.sin_runningFlag.connect(self.update_flag) 39 | self.sin_videoList.connect(self.update_videoList) 40 | self.sin_countArea.connect(self.update_countArea) 41 | self.sin_pauseFlag.connect(self.update_pauseFlag) 42 | 43 | self.save_dir = "results" 44 | if not os.path.exists(self.save_dir): os.makedirs(self.save_dir) 45 | 46 | def run(self): 47 | for video in self.videoList: 48 | self.last_max_id = 0 49 | cap = cv2.VideoCapture(video) 50 | out = cv2.VideoWriter(os.path.join(self.save_dir,video.split("/")[-1]), cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'), 10, (1920, 1080)) 51 | frame_count = 0 52 | while cap.isOpened(): 53 | # print(frame_count) 54 | if self.running_flag: 55 | if not self.pause_flag: 56 | ret, frame = cap.read() 57 | if ret: 58 | if frame_count % 3 == 0: 59 | a1 = time.time() 60 | frame = self.counter(self.permission, self.colorDict, frame,np.array(self.countArea), self.mot_tracker, video) 61 | self.sin_counterResult.emit(frame) 62 | 63 | out.write(frame) 64 | a2 = time.time() 65 | print(f"fps: {1 / (a2 - a1):.2f}") 66 | frame_count += 1 67 | else: 68 | break 69 | else: 70 | time.sleep(0.1) 71 | else: 72 | break 73 | 74 | #restart count for each video 75 | KalmanBoxTracker.count = 0 76 | cap.release() 77 | out.release() 78 | 79 | if not self.running_flag: 80 | break 81 | 82 | if self.running_flag: 83 | self.sin_done.emit(1) 84 | 85 | def update_pauseFlag(self,flag): 86 | self.pause_flag = flag 87 | 88 | def update_flag(self,flag): 89 | self.running_flag = flag 90 | 91 | def update_videoList(self, videoList): 92 | print("Update videoList!") 93 | self.videoList = videoList 94 | 95 | def update_countArea(self,Area): 96 | print("Update countArea!") 97 | self.countArea = Area 98 | 99 | def counter(self, permission, colorDict, frame, CountArea, mot_tracker, videoName): 100 | 101 | # painting area 102 | AreaBound = [min(CountArea[:, 0]), min(CountArea[:, 1]), max(CountArea[:, 0]), max(CountArea[:, 1])] 103 | painting = np.zeros((AreaBound[3] - AreaBound[1], AreaBound[2] - AreaBound[0]), dtype=np.uint8) 104 | CountArea_mini = CountArea - AreaBound[0:2] 105 | cv2.fillConvexPoly(painting, CountArea_mini, (1,)) 106 | 107 | objects = predict.yolo_prediction(self.model,self.device,frame,self.class_names) 108 | objects = filter(lambda x: x[0] in permission, objects) 109 | objects = filter(lambda x: x[1] > 0.5,objects) 110 | objects = list(filter(lambda x: pointInCountArea(painting, AreaBound, [int(x[2][0]), int(x[2][1] + x[2][3] / 2)]),objects)) 111 | 112 | #filter out repeat bbox 113 | objects = filiter_out_repeat(objects) 114 | 115 | detections = [] 116 | for item in objects: 117 | detections.append([int(item[2][0] - item[2][2] / 2), 118 | int(item[2][1] - item[2][3] / 2), 119 | int(item[2][0] + item[2][2] / 2), 120 | int(item[2][1] + item[2][3] / 2), 121 | item[1]]) 122 | track_bbs_ids = mot_tracker.update(np.array(detections)) 123 | 124 | # for i, item in enumerate(objects): 125 | # # x1,y1,x2,y2,id = list(map(lambda x :int(x),item)) 126 | # # id_log.add(id) 127 | # # objectName = get_objName(item, objects) 128 | # 129 | # objectName, province, objectBox = item 130 | # x, y, w, h = objectBox 131 | # x1, y1, x2, y2 = int(x - w / 2), int(y - h / 2), int(x + w / 2), int(y + h / 2) 132 | # 133 | # boxColor = colorDict[objectName] 134 | # cv2.rectangle(frame, (x1, y1), (x2, y2), boxColor, thickness=2) 135 | # cv2.putText(frame, objectName, (x1 - 1, y1 - 3), cv2.FONT_HERSHEY_COMPLEX, 0.7, boxColor, 136 | # thickness=2) 137 | 138 | # painting area 139 | for i in range(len(CountArea)): 140 | cv2.line(frame, tuple(CountArea[i]), tuple(CountArea[(i + 1) % (len(CountArea))]), (0, 0, 255), 2) 141 | 142 | if len(track_bbs_ids) > 0: 143 | for bb in track_bbs_ids: #add all bbox to history 144 | id = int(bb[-1]) 145 | objectName = get_objName(bb, objects) 146 | if id not in self.history.keys(): #add new id 147 | self.history[id] = {} 148 | self.history[id]["no_update_count"] = 0 149 | self.history[id]["his"] = [] 150 | self.history[id]["his"].append(objectName) 151 | else: 152 | self.history[id]["no_update_count"] = 0 153 | self.history[id]["his"].append(objectName) 154 | 155 | for i, item in enumerate(track_bbs_ids): 156 | bb = list(map(lambda x: int(x), item)) 157 | id = bb[-1] 158 | x1, y1, x2, y2 = bb[:4] 159 | 160 | his = self.history[id]["his"] 161 | result = {} 162 | for i in set(his): 163 | result[i] = his.count(i) 164 | res = sorted(result.items(), key=lambda d: d[1], reverse=True) 165 | objectName = res[0][0] 166 | 167 | boxColor = colorDict[objectName] 168 | cv2.rectangle(frame, (x1, y1), (x2, y2), boxColor, thickness=2) 169 | cv2.putText(frame, str(id) + "_" + objectName, (x1 - 1, y1 - 3), cv2.FONT_HERSHEY_COMPLEX, 0.7, 170 | boxColor, 171 | thickness=2) 172 | 173 | 174 | counter_results = [] 175 | videoName = videoName.split('/')[-1] 176 | removed_id_list = [] 177 | for id in self.history.keys(): #extract id after tracking 178 | self.history[id]["no_update_count"] += 1 179 | if self.history[id]["no_update_count"] > 5: 180 | his = self.history[id]["his"] 181 | result = {} 182 | for i in set(his): 183 | result[i] = his.count(i) 184 | res = sorted(result.items(), key=lambda d: d[1], reverse=True) 185 | objectName = res[0][0] 186 | counter_results.append([videoName,id,objectName]) 187 | #del id 188 | removed_id_list.append(id) 189 | 190 | for id in removed_id_list: 191 | _ = self.history.pop(id) 192 | 193 | if len(counter_results): 194 | self.sin_counter_results.emit(counter_results) 195 | 196 | # print(self.history) 197 | 198 | 199 | return frame 200 | 201 | def emit_timeCode(self,time_code): 202 | self.sin_timeCode.emit(time_code) 203 | 204 | def getTwoDimensionListIndex(L,value,pos): 205 | for i in range(len(L)): 206 | if L[i][pos] == value: 207 | return i 208 | return -1 209 | 210 | def filiter_out_repeat(objects): 211 | objects = sorted(objects,key=lambda x: x[1]) 212 | l = len(objects) 213 | new_objects = [] 214 | if l > 1: 215 | for i in range(l-1): 216 | flag = 0 217 | for j in range(i+1,l): 218 | x_i, y_i, w_i, h_i = objects[i][2] 219 | x_j, y_j, w_j, h_j = objects[j][2] 220 | box1 = [int(x_i - w_i / 2), int(y_i - h_i / 2), int(x_i + w_i / 2), int(y_i + h_i / 2)] 221 | box2 = [int(x_j - w_j / 2), int(y_j - h_j / 2), int(x_j + w_j / 2), int(y_j + h_j / 2)] 222 | if cal_iou(box1,box2) >= 0.7: 223 | flag = 1 224 | break 225 | #if no repeat 226 | if not flag: 227 | new_objects.append(objects[i]) 228 | #add the last one 229 | new_objects.append(objects[-1]) 230 | else: 231 | return objects 232 | 233 | return list(tuple(new_objects)) 234 | 235 | 236 | def cal_iou(box1,box2): 237 | x1 = max(box1[0],box2[0]) 238 | y1 = max(box1[1],box2[1]) 239 | x2 = min(box1[2],box2[2]) 240 | y2 = min(box1[3],box2[3]) 241 | i = max(0,(x2-x1))*max(0,(y2-y1)) 242 | u = (box1[2]-box1[0])*(box1[3]-box1[1]) + (box2[2]-box2[0])*(box2[3]-box2[1]) - i 243 | iou = float(i)/float(u) 244 | return iou 245 | 246 | def get_objName(item,objects): 247 | iou_list = [] 248 | for i,object in enumerate(objects): 249 | x, y, w, h = object[2] 250 | x1, y1, x2, y2 = int(x - w / 2), int(y - h / 2), int(x + w / 2), int(y + h / 2) 251 | iou_list.append(cal_iou(item[:4],[x1,y1,x2,y2])) 252 | max_index = iou_list.index(max(iou_list)) 253 | return objects[max_index][0] 254 | 255 | def pointInCountArea(painting, AreaBound, point): 256 | h,w = painting.shape[:2] 257 | point = np.array(point) 258 | point = point - AreaBound[:2] 259 | if point[0] < 0 or point[1] < 0 or point[0] >= w or point[1] >= h: 260 | return 0 261 | else: 262 | return painting[point[1],point[0]] 263 | 264 | 265 | 266 | 267 | 268 | -------------------------------------------------------------------------------- /gui.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Form implementation generated from reading ui file 'gui.ui' 4 | # 5 | # Created by: PyQt5 UI code generator 5.13.1 6 | # 7 | # WARNING! All changes made in this file will be lost! 8 | 9 | 10 | from PyQt5 import QtCore, QtGui, QtWidgets 11 | 12 | 13 | class Ui_mainWindow(object): 14 | def setupUi(self, mainWindow): 15 | mainWindow.setObjectName("mainWindow") 16 | mainWindow.resize(1203, 554) 17 | self.centralwidget = QtWidgets.QWidget(mainWindow) 18 | self.centralwidget.setObjectName("centralwidget") 19 | self.groupBox_count = QtWidgets.QGroupBox(self.centralwidget) 20 | self.groupBox_count.setGeometry(QtCore.QRect(990, 10, 211, 341)) 21 | self.groupBox_count.setObjectName("groupBox_count") 22 | self.verticalLayout_2 = QtWidgets.QVBoxLayout(self.groupBox_count) 23 | self.verticalLayout_2.setObjectName("verticalLayout_2") 24 | self.gridLayout_count = QtWidgets.QGridLayout() 25 | self.gridLayout_count.setContentsMargins(2, 2, 2, 2) 26 | self.gridLayout_count.setSpacing(6) 27 | self.gridLayout_count.setObjectName("gridLayout_count") 28 | self.label_truck = QtWidgets.QLabel(self.groupBox_count) 29 | self.label_truck.setObjectName("label_truck") 30 | self.gridLayout_count.addWidget(self.label_truck, 2, 1, 1, 1, QtCore.Qt.AlignHCenter) 31 | self.label_7 = QtWidgets.QLabel(self.groupBox_count) 32 | self.label_7.setObjectName("label_7") 33 | self.gridLayout_count.addWidget(self.label_7, 4, 0, 1, 1, QtCore.Qt.AlignHCenter) 34 | self.label_5 = QtWidgets.QLabel(self.groupBox_count) 35 | self.label_5.setObjectName("label_5") 36 | self.gridLayout_count.addWidget(self.label_5, 2, 0, 1, 1, QtCore.Qt.AlignHCenter) 37 | self.label_6 = QtWidgets.QLabel(self.groupBox_count) 38 | self.label_6.setObjectName("label_6") 39 | self.gridLayout_count.addWidget(self.label_6, 3, 0, 1, 1, QtCore.Qt.AlignHCenter) 40 | self.label_motorbike = QtWidgets.QLabel(self.groupBox_count) 41 | self.label_motorbike.setObjectName("label_motorbike") 42 | self.gridLayout_count.addWidget(self.label_motorbike, 3, 1, 1, 1, QtCore.Qt.AlignHCenter) 43 | self.label_bus = QtWidgets.QLabel(self.groupBox_count) 44 | self.label_bus.setObjectName("label_bus") 45 | self.gridLayout_count.addWidget(self.label_bus, 1, 1, 1, 1, QtCore.Qt.AlignHCenter) 46 | self.label_bicycle = QtWidgets.QLabel(self.groupBox_count) 47 | self.label_bicycle.setObjectName("label_bicycle") 48 | self.gridLayout_count.addWidget(self.label_bicycle, 4, 1, 1, 1, QtCore.Qt.AlignHCenter) 49 | self.label_12 = QtWidgets.QLabel(self.groupBox_count) 50 | self.label_12.setObjectName("label_12") 51 | self.gridLayout_count.addWidget(self.label_12, 5, 0, 1, 1, QtCore.Qt.AlignHCenter) 52 | self.label_3 = QtWidgets.QLabel(self.groupBox_count) 53 | self.label_3.setObjectName("label_3") 54 | self.gridLayout_count.addWidget(self.label_3, 0, 0, 1, 1, QtCore.Qt.AlignHCenter) 55 | self.label_sum = QtWidgets.QLabel(self.groupBox_count) 56 | self.label_sum.setObjectName("label_sum") 57 | self.gridLayout_count.addWidget(self.label_sum, 5, 1, 1, 1, QtCore.Qt.AlignHCenter) 58 | self.label_car = QtWidgets.QLabel(self.groupBox_count) 59 | self.label_car.setObjectName("label_car") 60 | self.gridLayout_count.addWidget(self.label_car, 0, 1, 1, 1, QtCore.Qt.AlignHCenter) 61 | self.label_4 = QtWidgets.QLabel(self.groupBox_count) 62 | self.label_4.setObjectName("label_4") 63 | self.gridLayout_count.addWidget(self.label_4, 1, 0, 1, 1, QtCore.Qt.AlignHCenter) 64 | self.verticalLayout_2.addLayout(self.gridLayout_count) 65 | self.label_image = QtWidgets.QLabel(self.centralwidget) 66 | self.label_image.setGeometry(QtCore.QRect(10, 10, 960, 540)) 67 | self.label_image.setStyleSheet("background-color: rgb(233, 185, 110);") 68 | self.label_image.setText("") 69 | self.label_image.setAlignment(QtCore.Qt.AlignCenter) 70 | self.label_image.setObjectName("label_image") 71 | self.widget = QtWidgets.QWidget(self.centralwidget) 72 | self.widget.setGeometry(QtCore.QRect(1020, 360, 151, 181)) 73 | self.widget.setObjectName("widget") 74 | self.verticalLayout = QtWidgets.QVBoxLayout(self.widget) 75 | self.verticalLayout.setContentsMargins(0, 0, 0, 0) 76 | self.verticalLayout.setObjectName("verticalLayout") 77 | self.pushButton_openVideo = QtWidgets.QPushButton(self.widget) 78 | self.pushButton_openVideo.setObjectName("pushButton_openVideo") 79 | self.verticalLayout.addWidget(self.pushButton_openVideo) 80 | self.pushButton_selectArea = QtWidgets.QPushButton(self.widget) 81 | self.pushButton_selectArea.setObjectName("pushButton_selectArea") 82 | self.verticalLayout.addWidget(self.pushButton_selectArea) 83 | self.pushButton_start = QtWidgets.QPushButton(self.widget) 84 | self.pushButton_start.setObjectName("pushButton_start") 85 | self.verticalLayout.addWidget(self.pushButton_start) 86 | self.pushButton_pause = QtWidgets.QPushButton(self.widget) 87 | self.pushButton_pause.setObjectName("pushButton_pause") 88 | self.verticalLayout.addWidget(self.pushButton_pause) 89 | mainWindow.setCentralWidget(self.centralwidget) 90 | 91 | self.retranslateUi(mainWindow) 92 | QtCore.QMetaObject.connectSlotsByName(mainWindow) 93 | 94 | def retranslateUi(self, mainWindow): 95 | _translate = QtCore.QCoreApplication.translate 96 | mainWindow.setWindowTitle(_translate("mainWindow", "Car Counter")) 97 | self.groupBox_count.setTitle(_translate("mainWindow", "Counting Results")) 98 | self.label_truck.setText(_translate("mainWindow", "0")) 99 | self.label_7.setText(_translate("mainWindow", "bicycle")) 100 | self.label_5.setText(_translate("mainWindow", "truck")) 101 | self.label_6.setText(_translate("mainWindow", "motorbike")) 102 | self.label_motorbike.setText(_translate("mainWindow", "0")) 103 | self.label_bus.setText(_translate("mainWindow", "0")) 104 | self.label_bicycle.setText(_translate("mainWindow", "0")) 105 | self.label_12.setText(_translate("mainWindow", "sum")) 106 | self.label_3.setText(_translate("mainWindow", "car")) 107 | self.label_sum.setText(_translate("mainWindow", "0")) 108 | self.label_car.setText(_translate("mainWindow", "0")) 109 | self.label_4.setText(_translate("mainWindow", "bus")) 110 | self.pushButton_openVideo.setText(_translate("mainWindow", "Open Video")) 111 | self.pushButton_selectArea.setText(_translate("mainWindow", "Select Area")) 112 | self.pushButton_start.setText(_translate("mainWindow", "Start")) 113 | self.pushButton_pause.setText(_translate("mainWindow", "Pause")) 114 | -------------------------------------------------------------------------------- /gui.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | mainWindow 4 | 5 | 6 | 7 | 0 8 | 0 9 | 1203 10 | 554 11 | 12 | 13 | 14 | Car Counter 15 | 16 | 17 | 18 | 19 | 20 | 990 21 | 10 22 | 211 23 | 341 24 | 25 | 26 | 27 | Counting Results 28 | 29 | 30 | 31 | 32 | 33 | 2 34 | 35 | 36 | 2 37 | 38 | 39 | 2 40 | 41 | 42 | 2 43 | 44 | 45 | 6 46 | 47 | 48 | 49 | 50 | 0 51 | 52 | 53 | 54 | 55 | 56 | 57 | bicycle 58 | 59 | 60 | 61 | 62 | 63 | 64 | truck 65 | 66 | 67 | 68 | 69 | 70 | 71 | motorbike 72 | 73 | 74 | 75 | 76 | 77 | 78 | 0 79 | 80 | 81 | 82 | 83 | 84 | 85 | 0 86 | 87 | 88 | 89 | 90 | 91 | 92 | 0 93 | 94 | 95 | 96 | 97 | 98 | 99 | sum 100 | 101 | 102 | 103 | 104 | 105 | 106 | car 107 | 108 | 109 | 110 | 111 | 112 | 113 | 0 114 | 115 | 116 | 117 | 118 | 119 | 120 | 0 121 | 122 | 123 | 124 | 125 | 126 | 127 | bus 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 10 139 | 10 140 | 960 141 | 540 142 | 143 | 144 | 145 | background-color: rgb(233, 185, 110); 146 | 147 | 148 | 149 | 150 | 151 | Qt::AlignCenter 152 | 153 | 154 | 155 | 156 | 157 | 1020 158 | 360 159 | 151 160 | 181 161 | 162 | 163 | 164 | 165 | 166 | 167 | Open Video 168 | 169 | 170 | 171 | 172 | 173 | 174 | Select Area 175 | 176 | 177 | 178 | 179 | 180 | 181 | Start 182 | 183 | 184 | 185 | 186 | 187 | 188 | Pause 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | -------------------------------------------------------------------------------- /models.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | import numpy as np 8 | 9 | from utils.parse_config import * 10 | from utils.utils import build_targets, to_cpu, non_max_suppression 11 | 12 | import matplotlib.pyplot as plt 13 | import matplotlib.patches as patches 14 | 15 | 16 | def create_modules(module_defs): 17 | """ 18 | Constructs module list of layer blocks from module configuration in module_defs 19 | """ 20 | hyperparams = module_defs.pop(0) 21 | output_filters = [int(hyperparams["channels"])] 22 | module_list = nn.ModuleList() 23 | for module_i, module_def in enumerate(module_defs): 24 | modules = nn.Sequential() 25 | 26 | if module_def["type"] == "convolutional": 27 | bn = int(module_def["batch_normalize"]) 28 | filters = int(module_def["filters"]) 29 | kernel_size = int(module_def["size"]) 30 | pad = (kernel_size - 1) // 2 31 | modules.add_module( 32 | f"conv_{module_i}", 33 | nn.Conv2d( 34 | in_channels=output_filters[-1], 35 | out_channels=filters, 36 | kernel_size=kernel_size, 37 | stride=int(module_def["stride"]), 38 | padding=pad, 39 | bias=not bn, 40 | ), 41 | ) 42 | if bn: 43 | modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5)) 44 | if module_def["activation"] == "leaky": 45 | modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1)) 46 | 47 | elif module_def["type"] == "maxpool": 48 | kernel_size = int(module_def["size"]) 49 | stride = int(module_def["stride"]) 50 | if kernel_size == 2 and stride == 1: 51 | modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1))) 52 | maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2)) 53 | modules.add_module(f"maxpool_{module_i}", maxpool) 54 | 55 | elif module_def["type"] == "upsample": 56 | upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest") 57 | modules.add_module(f"upsample_{module_i}", upsample) 58 | 59 | elif module_def["type"] == "route": 60 | layers = [int(x) for x in module_def["layers"].split(",")] 61 | filters = sum([output_filters[1:][i] for i in layers]) 62 | modules.add_module(f"route_{module_i}", EmptyLayer()) 63 | 64 | elif module_def["type"] == "shortcut": 65 | filters = output_filters[1:][int(module_def["from"])] 66 | modules.add_module(f"shortcut_{module_i}", EmptyLayer()) 67 | 68 | elif module_def["type"] == "yolo": 69 | anchor_idxs = [int(x) for x in module_def["mask"].split(",")] 70 | # Extract anchors 71 | anchors = [int(x) for x in module_def["anchors"].split(",")] 72 | anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] 73 | anchors = [anchors[i] for i in anchor_idxs] 74 | num_classes = int(module_def["classes"]) 75 | img_size = int(hyperparams["height"]) 76 | # Define detection layer 77 | yolo_layer = YOLOLayer(anchors, num_classes, img_size) 78 | modules.add_module(f"yolo_{module_i}", yolo_layer) 79 | # Register module list and number of output filters 80 | module_list.append(modules) 81 | output_filters.append(filters) 82 | 83 | return hyperparams, module_list 84 | 85 | 86 | class Upsample(nn.Module): 87 | """ nn.Upsample is deprecated """ 88 | 89 | def __init__(self, scale_factor, mode="nearest"): 90 | super(Upsample, self).__init__() 91 | self.scale_factor = scale_factor 92 | self.mode = mode 93 | 94 | def forward(self, x): 95 | x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode) 96 | return x 97 | 98 | 99 | class EmptyLayer(nn.Module): 100 | """Placeholder for 'route' and 'shortcut' layers""" 101 | 102 | def __init__(self): 103 | super(EmptyLayer, self).__init__() 104 | 105 | 106 | class YOLOLayer(nn.Module): 107 | """Detection layer""" 108 | 109 | def __init__(self, anchors, num_classes, img_dim=416): 110 | super(YOLOLayer, self).__init__() 111 | self.anchors = anchors 112 | self.num_anchors = len(anchors) 113 | self.num_classes = num_classes 114 | self.ignore_thres = 0.5 115 | self.mse_loss = nn.MSELoss() 116 | self.bce_loss = nn.BCELoss() 117 | self.obj_scale = 1 118 | self.noobj_scale = 100 119 | self.metrics = {} 120 | self.img_dim = img_dim 121 | self.grid_size = 0 # grid size 122 | 123 | def compute_grid_offsets(self, grid_size, cuda=True): 124 | self.grid_size = grid_size 125 | g = self.grid_size 126 | FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor 127 | self.stride = self.img_dim / self.grid_size 128 | # Calculate offsets for each grid 129 | self.grid_x = torch.arange(g).repeat(g, 1).view([1, 1, g, g]).type(FloatTensor) 130 | self.grid_y = torch.arange(g).repeat(g, 1).t().view([1, 1, g, g]).type(FloatTensor) 131 | self.scaled_anchors = FloatTensor([(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors]) 132 | self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1)) 133 | self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1)) 134 | 135 | def forward(self, x, targets=None, img_dim=None): 136 | 137 | # Tensors for cuda support 138 | FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor 139 | LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor 140 | ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor 141 | 142 | self.img_dim = img_dim 143 | num_samples = x.size(0) 144 | grid_size = x.size(2) 145 | 146 | prediction = ( 147 | x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size) 148 | .permute(0, 1, 3, 4, 2) 149 | .contiguous() 150 | ) 151 | 152 | # Get outputs 153 | x = torch.sigmoid(prediction[..., 0]) # Center x 154 | y = torch.sigmoid(prediction[..., 1]) # Center y 155 | w = prediction[..., 2] # Width 156 | h = prediction[..., 3] # Height 157 | pred_conf = torch.sigmoid(prediction[..., 4]) # Conf 158 | pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. 159 | 160 | # If grid size does not match current we compute new offsets 161 | if grid_size != self.grid_size: 162 | self.compute_grid_offsets(grid_size, cuda=x.is_cuda) 163 | 164 | # Add offset and scale with anchors 165 | pred_boxes = FloatTensor(prediction[..., :4].shape) 166 | pred_boxes[..., 0] = x.data + self.grid_x 167 | pred_boxes[..., 1] = y.data + self.grid_y 168 | pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w 169 | pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h 170 | 171 | output = torch.cat( 172 | ( 173 | pred_boxes.view(num_samples, -1, 4) * self.stride, 174 | pred_conf.view(num_samples, -1, 1), 175 | pred_cls.view(num_samples, -1, self.num_classes), 176 | ), 177 | -1, 178 | ) 179 | 180 | if targets is None: 181 | return output, 0 182 | else: 183 | iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( 184 | pred_boxes=pred_boxes, 185 | pred_cls=pred_cls, 186 | target=targets, 187 | anchors=self.scaled_anchors, 188 | ignore_thres=self.ignore_thres, 189 | ) 190 | 191 | # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) 192 | loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) 193 | loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) 194 | loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) 195 | loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) 196 | loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) 197 | loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) 198 | loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj 199 | loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) 200 | total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls 201 | 202 | # Metrics 203 | cls_acc = 100 * class_mask[obj_mask].mean() 204 | conf_obj = pred_conf[obj_mask].mean() 205 | conf_noobj = pred_conf[noobj_mask].mean() 206 | conf50 = (pred_conf > 0.5).float() 207 | iou50 = (iou_scores > 0.5).float() 208 | iou75 = (iou_scores > 0.75).float() 209 | detected_mask = conf50 * class_mask * tconf 210 | precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) 211 | recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) 212 | recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) 213 | 214 | self.metrics = { 215 | "loss": to_cpu(total_loss).item(), 216 | "x": to_cpu(loss_x).item(), 217 | "y": to_cpu(loss_y).item(), 218 | "w": to_cpu(loss_w).item(), 219 | "h": to_cpu(loss_h).item(), 220 | "conf": to_cpu(loss_conf).item(), 221 | "cls": to_cpu(loss_cls).item(), 222 | "cls_acc": to_cpu(cls_acc).item(), 223 | "recall50": to_cpu(recall50).item(), 224 | "recall75": to_cpu(recall75).item(), 225 | "precision": to_cpu(precision).item(), 226 | "conf_obj": to_cpu(conf_obj).item(), 227 | "conf_noobj": to_cpu(conf_noobj).item(), 228 | "grid_size": grid_size, 229 | } 230 | 231 | return output, total_loss 232 | 233 | 234 | class Darknet(nn.Module): 235 | """YOLOv3 object detection model""" 236 | 237 | def __init__(self, config_path, img_size=416): 238 | super(Darknet, self).__init__() 239 | self.module_defs = parse_model_config(config_path) 240 | self.hyperparams, self.module_list = create_modules(self.module_defs) 241 | self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")] 242 | self.img_size = img_size 243 | self.seen = 0 244 | self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32) 245 | 246 | def forward(self, x, targets=None): 247 | img_dim = x.shape[2] 248 | loss = 0 249 | layer_outputs, yolo_outputs = [], [] 250 | for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): 251 | if module_def["type"] in ["convolutional", "upsample", "maxpool"]: 252 | x = module(x) 253 | elif module_def["type"] == "route": 254 | x = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1) 255 | elif module_def["type"] == "shortcut": 256 | layer_i = int(module_def["from"]) 257 | x = layer_outputs[-1] + layer_outputs[layer_i] 258 | elif module_def["type"] == "yolo": 259 | x, layer_loss = module[0](x, targets, img_dim) 260 | loss += layer_loss 261 | yolo_outputs.append(x) 262 | layer_outputs.append(x) 263 | yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1)) 264 | return yolo_outputs if targets is None else (loss, yolo_outputs) 265 | 266 | def load_darknet_weights(self, weights_path): 267 | """Parses and loads the weights stored in 'weights_path'""" 268 | 269 | # Open the weights file 270 | with open(weights_path, "rb") as f: 271 | header = np.fromfile(f, dtype=np.int32, count=5) # First five are header values 272 | self.header_info = header # Needed to write header when saving weights 273 | self.seen = header[3] # number of images seen during training 274 | weights = np.fromfile(f, dtype=np.float32) # The rest are weights 275 | 276 | # Establish cutoff for loading backbone weights 277 | cutoff = None 278 | if "darknet53.conv.74" in weights_path: 279 | cutoff = 75 280 | 281 | ptr = 0 282 | for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): 283 | if i == cutoff: 284 | break 285 | if module_def["type"] == "convolutional": 286 | conv_layer = module[0] 287 | if module_def["batch_normalize"]: 288 | # Load BN bias, weights, running mean and running variance 289 | bn_layer = module[1] 290 | num_b = bn_layer.bias.numel() # Number of biases 291 | # Bias 292 | bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias) 293 | bn_layer.bias.data.copy_(bn_b) 294 | ptr += num_b 295 | # Weight 296 | bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight) 297 | bn_layer.weight.data.copy_(bn_w) 298 | ptr += num_b 299 | # Running Mean 300 | bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean) 301 | bn_layer.running_mean.data.copy_(bn_rm) 302 | ptr += num_b 303 | # Running Var 304 | bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var) 305 | bn_layer.running_var.data.copy_(bn_rv) 306 | ptr += num_b 307 | else: 308 | # Load conv. bias 309 | num_b = conv_layer.bias.numel() 310 | conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias) 311 | conv_layer.bias.data.copy_(conv_b) 312 | ptr += num_b 313 | # Load conv. weights 314 | num_w = conv_layer.weight.numel() 315 | conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight) 316 | conv_layer.weight.data.copy_(conv_w) 317 | ptr += num_w 318 | 319 | def save_darknet_weights(self, path, cutoff=-1): 320 | """ 321 | @:param path - path of the new weights file 322 | @:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved) 323 | """ 324 | fp = open(path, "wb") 325 | self.header_info[3] = self.seen 326 | self.header_info.tofile(fp) 327 | 328 | # Iterate through layers 329 | for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): 330 | if module_def["type"] == "convolutional": 331 | conv_layer = module[0] 332 | # If batch norm, load bn first 333 | if module_def["batch_normalize"]: 334 | bn_layer = module[1] 335 | bn_layer.bias.data.cpu().numpy().tofile(fp) 336 | bn_layer.weight.data.cpu().numpy().tofile(fp) 337 | bn_layer.running_mean.data.cpu().numpy().tofile(fp) 338 | bn_layer.running_var.data.cpu().numpy().tofile(fp) 339 | # Load conv bias 340 | else: 341 | conv_layer.bias.data.cpu().numpy().tofile(fp) 342 | # Load conv weights 343 | conv_layer.weight.data.cpu().numpy().tofile(fp) 344 | 345 | fp.close() 346 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | from utils.utils import * 4 | from utils.datasets import * 5 | import cv2 6 | from PIL import Image 7 | import torch 8 | from torchvision import transforms 9 | 10 | 11 | def resize(image, size): 12 | image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0) 13 | return image 14 | 15 | 16 | def yolo_prediction(model, device, image,class_names): 17 | image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB) 18 | imgs = transforms.ToTensor()(Image.fromarray(image)) 19 | c, h, w = imgs.shape 20 | img_sacle = [w / 416, h / 416, w / 416, h / 416] 21 | imgs = resize(imgs, 416) 22 | imgs = imgs.unsqueeze(0).to(device) 23 | 24 | model.eval() 25 | with torch.no_grad(): 26 | outputs = model(imgs) 27 | outputs = non_max_suppression(outputs, conf_thres=0.5, nms_thres=0.45) 28 | 29 | # print(outputs) 30 | objects = [] 31 | try: 32 | outputs = outputs[0].cpu().data 33 | for i, output in enumerate(outputs): 34 | item = [] 35 | item.append(class_names[int(output[-1])]) 36 | item.append(float(output[4])) 37 | box = [int(value * img_sacle[i]) for i, value in enumerate(output[:4])] 38 | x1,y1,x2,y2 = box 39 | x = int((x2+x1)/2) 40 | y = int((y1+y2)/2) 41 | w = x2-x1 42 | h = y2-y1 43 | item.append([x,y,w,h]) 44 | objects.append(item) 45 | except: 46 | pass 47 | return objects 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scikit_learn==0.21.3 2 | numpy 3 | matplotlib 4 | torch 5 | torchvision 6 | terminaltables 7 | pillow 8 | tqdm 9 | numba 10 | fire 11 | filterpy 12 | opencv-python 13 | scikit-image 14 | pytesseract 15 | PyQt5 16 | pandas -------------------------------------------------------------------------------- /utils/.config: -------------------------------------------------------------------------------- 1 | 3577582247857115766766724119402109753611015352440240992284880932986646512687624561324362551252066885950009004775081309546579712498721403446373694387157090085286231224505645450053725901631232615038604189510648071831312164643845750188640913440678425980773097082810461453835363142185772806767051281389323550048308896544778657224119429085618530915004838231420068047280427430441205992226661770454814476085720924852343305997639832355663375736276454752176015641250406861398123958872477894486967939187447133486257111277501919826706888469965968376719017903135941319967538514819613139151903788015692897242959052053924650580372 2 | 5605377600988577560550065397254225420957436224498576618768516259123867506653695772342324276743084984269870011017428054256364217027220194986462263726663651389623329466400510449729248114987953444512598188059336858299682302760808537370136773326006433438381734159585558284654736919864102681493160466327333511033028058967854838844380496016808629272782632933313912375317044566209141444311502150231852276410702944747944105256818585060877714842901476314844375419924879849300110150679899364755163492641805650724663047662328174625989492058437973343511835728532409474547860996128819129370209775297109033581936985340670034201253 3 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wsh122333/Multi-type_vehicles_flow_statistics/ab5b8b228bc877c6ea69cdb78b704f3e5e1e6d13/utils/__init__.py -------------------------------------------------------------------------------- /utils/augmentations.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import numpy as np 4 | 5 | 6 | def horisontal_flip(images, targets): 7 | images = torch.flip(images, [-1]) 8 | targets[:, 2] = 1 - targets[:, 2] 9 | return images, targets 10 | -------------------------------------------------------------------------------- /utils/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | import sys 5 | import numpy as np 6 | from PIL import Image 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | from utils.augmentations import horisontal_flip 11 | from torch.utils.data import Dataset 12 | import torchvision.transforms as transforms 13 | 14 | 15 | def pad_to_square(img, pad_value): 16 | c, h, w = img.shape 17 | dim_diff = np.abs(h - w) 18 | # (upper / left) padding and (lower / right) padding 19 | pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 20 | # Determine padding 21 | pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0) 22 | # Add padding 23 | img = F.pad(img, pad, "constant", value=pad_value) 24 | 25 | return img, pad 26 | 27 | 28 | def resize(image, size): 29 | image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0) 30 | return image 31 | 32 | 33 | def random_resize(images, min_size=288, max_size=448): 34 | new_size = random.sample(list(range(min_size, max_size + 1, 32)), 1)[0] 35 | images = F.interpolate(images, size=new_size, mode="nearest") 36 | return images 37 | 38 | 39 | class ImageFolder(Dataset): 40 | def __init__(self, folder_path, img_size=416): 41 | self.files = sorted(glob.glob("%s/*.*" % folder_path)) 42 | self.img_size = img_size 43 | 44 | def __getitem__(self, index): 45 | img_path = self.files[index % len(self.files)] 46 | # Extract image as PyTorch tensor 47 | img = transforms.ToTensor()(Image.open(img_path)) 48 | # Pad to square resolution 49 | img, _ = pad_to_square(img, 0) 50 | # Resize 51 | img = resize(img, self.img_size) 52 | 53 | return img_path, img 54 | 55 | def __len__(self): 56 | return len(self.files) 57 | 58 | 59 | class ListDataset(Dataset): 60 | def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True): 61 | with open(list_path, "r") as file: 62 | self.img_files = file.readlines() 63 | 64 | self.label_files = [ 65 | path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt") 66 | for path in self.img_files 67 | ] 68 | self.img_size = img_size 69 | self.max_objects = 100 70 | self.augment = augment 71 | self.multiscale = multiscale 72 | self.normalized_labels = normalized_labels 73 | self.min_size = self.img_size - 3 * 32 74 | self.max_size = self.img_size + 3 * 32 75 | self.batch_count = 0 76 | 77 | def __getitem__(self, index): 78 | 79 | # --------- 80 | # Image 81 | # --------- 82 | 83 | img_path = self.img_files[index % len(self.img_files)].rstrip() 84 | 85 | # Extract image as PyTorch tensor 86 | img = transforms.ToTensor()(Image.open(img_path).convert('RGB')) 87 | 88 | # Handle images with less than three channels 89 | if len(img.shape) != 3: 90 | img = img.unsqueeze(0) 91 | img = img.expand((3, img.shape[1:])) 92 | 93 | _, h, w = img.shape 94 | h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) 95 | # Pad to square resolution 96 | img, pad = pad_to_square(img, 0) 97 | _, padded_h, padded_w = img.shape 98 | 99 | # --------- 100 | # Label 101 | # --------- 102 | 103 | label_path = self.label_files[index % len(self.img_files)].rstrip() 104 | 105 | targets = None 106 | if os.path.exists(label_path): 107 | boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) 108 | # Extract coordinates for unpadded + unscaled image 109 | x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) 110 | y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) 111 | x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) 112 | y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) 113 | # Adjust for added padding 114 | x1 += pad[0] 115 | y1 += pad[2] 116 | x2 += pad[1] 117 | y2 += pad[3] 118 | # Returns (x, y, w, h) 119 | boxes[:, 1] = ((x1 + x2) / 2) / padded_w 120 | boxes[:, 2] = ((y1 + y2) / 2) / padded_h 121 | boxes[:, 3] *= w_factor / padded_w 122 | boxes[:, 4] *= h_factor / padded_h 123 | 124 | targets = torch.zeros((len(boxes), 6)) 125 | targets[:, 1:] = boxes 126 | 127 | # Apply augmentations 128 | if self.augment: 129 | if np.random.random() < 0.5: 130 | img, targets = horisontal_flip(img, targets) 131 | 132 | return img_path, img, targets 133 | 134 | 135 | def collate_fn(self, batch): 136 | paths, imgs, targets = list(zip(*batch)) 137 | # Remove empty placeholder targets 138 | targets = [boxes for boxes in targets if boxes is not None] 139 | # Add sample index to targets 140 | for i, boxes in enumerate(targets): 141 | boxes[:, 0] = i 142 | targets = torch.cat(targets, 0) 143 | # Selects new image size every tenth batch 144 | if self.multiscale and self.batch_count % 10 == 0: 145 | self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32)) 146 | # Resize images to input shape 147 | imgs = torch.stack([resize(img, self.img_size) for img in imgs]) 148 | self.batch_count += 1 149 | return paths, imgs, targets 150 | 151 | def __len__(self): 152 | return len(self.img_files) 153 | -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class Logger(object): 5 | def __init__(self, log_dir): 6 | """Create a summary writer logging to log_dir.""" 7 | self.writer = tf.summary.FileWriter(log_dir) 8 | 9 | def scalar_summary(self, tag, value, step): 10 | """Log a scalar variable.""" 11 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 12 | self.writer.add_summary(summary, step) 13 | 14 | def list_of_scalars_summary(self, tag_value_pairs, step): 15 | """Log scalar variables.""" 16 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value) for tag, value in tag_value_pairs]) 17 | self.writer.add_summary(summary, step) 18 | -------------------------------------------------------------------------------- /utils/parse_config.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def parse_model_config(path): 4 | """Parses the yolo-v3 layer configuration file and returns module definitions""" 5 | file = open(path, 'r') 6 | lines = file.read().split('\n') 7 | lines = [x for x in lines if x and not x.startswith('#')] 8 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 9 | module_defs = [] 10 | for line in lines: 11 | if line.startswith('['): # This marks the start of a new block 12 | module_defs.append({}) 13 | module_defs[-1]['type'] = line[1:-1].rstrip() 14 | if module_defs[-1]['type'] == 'convolutional': 15 | module_defs[-1]['batch_normalize'] = 0 16 | else: 17 | key, value = line.split("=") 18 | value = value.strip() 19 | module_defs[-1][key.rstrip()] = value.strip() 20 | 21 | return module_defs 22 | 23 | def parse_data_config(path): 24 | """Parses the data configuration file""" 25 | options = dict() 26 | options['gpus'] = '0,1,2,3' 27 | options['num_workers'] = '10' 28 | with open(path, 'r') as fp: 29 | lines = fp.readlines() 30 | for line in lines: 31 | line = line.strip() 32 | if line == '' or line.startswith('#'): 33 | continue 34 | key, value = line.split('=') 35 | options[key.strip()] = value.strip() 36 | return options 37 | -------------------------------------------------------------------------------- /utils/sort.py: -------------------------------------------------------------------------------- 1 | """ 2 | SORT: A Simple, Online and Realtime Tracker 3 | Copyright (C) 2016 Alex Bewley alex@dynamicdetection.com 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | You should have received a copy of the GNU General Public License 13 | along with this program. If not, see . 14 | """ 15 | from __future__ import print_function 16 | 17 | from numba import jit 18 | import os.path 19 | import numpy as np 20 | import matplotlib.pyplot as plt 21 | import matplotlib.patches as patches 22 | from skimage import io 23 | import glob 24 | from sklearn.utils.linear_assignment_ import linear_assignment 25 | # from scipy.optimize import linear_sum_assignment 26 | import time 27 | import argparse 28 | from filterpy.kalman import KalmanFilter 29 | 30 | @jit 31 | def iou(bb_test,bb_gt): 32 | """ 33 | Computes IUO between two bboxes in the form [x1,y1,x2,y2] 34 | """ 35 | xx1 = np.maximum(bb_test[0], bb_gt[0]) 36 | yy1 = np.maximum(bb_test[1], bb_gt[1]) 37 | xx2 = np.minimum(bb_test[2], bb_gt[2]) 38 | yy2 = np.minimum(bb_test[3], bb_gt[3]) 39 | w = np.maximum(0., xx2 - xx1) 40 | h = np.maximum(0., yy2 - yy1) 41 | wh = w * h 42 | o = wh / ((bb_test[2]-bb_test[0])*(bb_test[3]-bb_test[1]) 43 | + (bb_gt[2]-bb_gt[0])*(bb_gt[3]-bb_gt[1]) - wh) 44 | return(o) 45 | 46 | def convert_bbox_to_z(bbox): 47 | """ 48 | Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form 49 | [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is 50 | the aspect ratio 51 | """ 52 | w = bbox[2]-bbox[0] 53 | h = bbox[3]-bbox[1] 54 | x = bbox[0]+w/2. 55 | y = bbox[1]+h/2. 56 | s = w*h #scale is just area 57 | r = w/float(h) 58 | return np.array([x,y,s,r]).reshape((4,1)) 59 | 60 | def convert_x_to_bbox(x,score=None): 61 | """ 62 | Takes a bounding box in the centre form [x,y,s,r] and returns it in the form 63 | [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right 64 | """ 65 | w = np.sqrt(x[2]*x[3]) 66 | h = x[2]/w 67 | if(score==None): 68 | return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4)) 69 | else: 70 | return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5)) 71 | 72 | 73 | class KalmanBoxTracker(object): 74 | """ 75 | This class represents the internel state of individual tracked objects observed as bbox. 76 | """ 77 | count = 0 78 | def __init__(self,bbox): 79 | """ 80 | Initialises a tracker using initial bounding box. 81 | """ 82 | #define constant velocity model 83 | self.kf = KalmanFilter(dim_x=7, dim_z=4) 84 | self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0], [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]]) 85 | self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]]) 86 | 87 | self.kf.R[2:,2:] *= 10. 88 | self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities 89 | self.kf.P *= 10. 90 | self.kf.Q[-1,-1] *= 0.01 91 | self.kf.Q[4:,4:] *= 0.01 92 | 93 | self.kf.x[:4] = convert_bbox_to_z(bbox) 94 | self.time_since_update = 0 95 | self.id = KalmanBoxTracker.count 96 | KalmanBoxTracker.count += 1 97 | self.history = [] 98 | self.hits = 0 99 | self.hit_streak = 0 100 | self.age = 0 101 | 102 | def update(self,bbox): 103 | """ 104 | Updates the state vector with observed bbox. 105 | """ 106 | self.time_since_update = 0 107 | self.history = [] 108 | self.hits += 1 109 | self.hit_streak += 1 110 | self.kf.update(convert_bbox_to_z(bbox)) 111 | 112 | def predict(self): 113 | """ 114 | Advances the state vector and returns the predicted bounding box estimate. 115 | """ 116 | if((self.kf.x[6]+self.kf.x[2])<=0): 117 | self.kf.x[6] *= 0.0 118 | self.kf.predict() 119 | self.age += 1 120 | if(self.time_since_update>0): 121 | self.hit_streak = 0 122 | self.time_since_update += 1 123 | self.history.append(convert_x_to_bbox(self.kf.x)) 124 | return self.history[-1] 125 | 126 | def get_state(self): 127 | """ 128 | Returns the current bounding box estimate. 129 | """ 130 | return convert_x_to_bbox(self.kf.x) 131 | 132 | def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3): 133 | """ 134 | Assigns detections to tracked object (both represented as bounding boxes) 135 | Returns 3 lists of matches, unmatched_detections and unmatched_trackers 136 | """ 137 | if(len(trackers)==0): 138 | return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int) 139 | iou_matrix = np.zeros((len(detections),len(trackers)),dtype=np.float32) 140 | 141 | for d,det in enumerate(detections): 142 | for t,trk in enumerate(trackers): 143 | iou_matrix[d,t] = iou(det,trk) 144 | matched_indices = linear_assignment(-iou_matrix) 145 | 146 | # matched_indices = np.array(matched_indices).reshape((-1,2)) 147 | # print(iou_matrix.shape,matched_indices.shape) 148 | unmatched_detections = [] 149 | for d,det in enumerate(detections): 150 | if(d not in matched_indices[:,0]): 151 | unmatched_detections.append(d) 152 | unmatched_trackers = [] 153 | for t,trk in enumerate(trackers): 154 | if(t not in matched_indices[:,1]): 155 | unmatched_trackers.append(t) 156 | 157 | #filter out matched with low IOU 158 | matches = [] 159 | for m in matched_indices: 160 | if(iou_matrix[m[0],m[1]]= self.min_hits or self.frame_count <= self.min_hits)): 223 | ret.append(np.concatenate((d,[trk.id+1])).reshape(1,-1)) # +1 as MOT benchmark requires positive 224 | 225 | i -= 1 226 | #remove dead tracklet 227 | if(trk.time_since_update > self.max_age): 228 | self.trackers.pop(i) 229 | self.counts = KalmanBoxTracker.count 230 | if(len(ret)>0): 231 | return np.concatenate(ret) 232 | 233 | return np.empty((0,5)) 234 | 235 | def parse_args(): 236 | """Parse input arguments.""" 237 | parser = argparse.ArgumentParser(description='SORT demo') 238 | parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true') 239 | args = parser.parse_args() 240 | return args 241 | 242 | if __name__ == '__main__': 243 | # all train 244 | sequences = ['PETS09-S2L1','TUD-Campus','TUD-Stadtmitte','ETH-Bahnhof','ETH-Sunnyday','ETH-Pedcross2','KITTI-13','KITTI-17','ADL-Rundle-6','ADL-Rundle-8','Venice-2'] 245 | args = parse_args() 246 | display = args.display 247 | phase = 'train' 248 | total_time = 0.0 249 | total_frames = 0 250 | colours = np.random.rand(32,3) #used only for display 251 | if(display): 252 | if not os.path.exists('mot_benchmark'): 253 | print('\n\tERROR: mot_benchmark link not found!\n\n Create a symbolic link to the MOT benchmark\n (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n') 254 | exit() 255 | plt.ion() 256 | fig = plt.figure() 257 | 258 | if not os.path.exists('output'): 259 | os.makedirs('output') 260 | 261 | for seq in sequences: 262 | mot_tracker = Sort() #create instance of the SORT tracker 263 | seq_dets = np.loadtxt('data/%s/det.txt'%(seq),delimiter=',') #load detections 264 | with open('output/%s.txt'%(seq),'w') as out_file: 265 | print("Processing %s."%(seq)) 266 | for frame in range(int(seq_dets[:,0].max())): 267 | frame += 1 #detection and frame numbers begin at 1 268 | dets = seq_dets[seq_dets[:,0]==frame,2:7] 269 | dets[:,2:4] += dets[:,0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2] 270 | total_frames += 1 271 | 272 | if(display): 273 | ax1 = fig.add_subplot(111, aspect='equal') 274 | fn = 'mot_benchmark/%s/%s/img1/%06d.jpg'%(phase,seq,frame) 275 | im =io.imread(fn) 276 | ax1.imshow(im) 277 | plt.title(seq+' Tracked Targets') 278 | 279 | start_time = time.time() 280 | trackers = mot_tracker.update(dets) 281 | cycle_time = time.time() - start_time 282 | total_time += cycle_time 283 | 284 | for d in trackers: 285 | print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file) 286 | if(display): 287 | d = d.astype(np.int32) 288 | ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:])) 289 | ax1.set_adjustable('box-forced') 290 | 291 | if(display): 292 | fig.canvas.flush_events() 293 | plt.draw() 294 | ax1.cla() 295 | 296 | print("Total Tracking took: %.3f for %d frames or %.1f FPS"%(total_time,total_frames,total_frames/total_time)) 297 | if(display): 298 | print("Note: to get real runtime results run without the option: --display") 299 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import math 3 | import time 4 | import tqdm 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from torch.autograd import Variable 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import matplotlib.patches as patches 12 | 13 | 14 | def to_cpu(tensor): 15 | return tensor.detach().cpu() 16 | 17 | 18 | def load_classes(path): 19 | """ 20 | Loads class labels at 'path' 21 | """ 22 | fp = open(path, "r") 23 | names = fp.read().split("\n")[:-1] 24 | return names 25 | 26 | 27 | def weights_init_normal(m): 28 | classname = m.__class__.__name__ 29 | if classname.find("Conv") != -1: 30 | torch.nn.init.normal_(m.weight.data, 0.0, 0.02) 31 | elif classname.find("BatchNorm2d") != -1: 32 | torch.nn.init.normal_(m.weight.data, 1.0, 0.02) 33 | torch.nn.init.constant_(m.bias.data, 0.0) 34 | 35 | 36 | def rescale_boxes(boxes, current_dim, original_shape): 37 | """ Rescales bounding boxes to the original shape """ 38 | orig_h, orig_w = original_shape 39 | # The amount of padding that was added 40 | pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape)) 41 | pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape)) 42 | # Image height and width after padding is removed 43 | unpad_h = current_dim - pad_y 44 | unpad_w = current_dim - pad_x 45 | # Rescale bounding boxes to dimension of original image 46 | boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w 47 | boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h 48 | boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w 49 | boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h 50 | return boxes 51 | 52 | 53 | def xywh2xyxy(x): 54 | y = x.new(x.shape) 55 | y[..., 0] = x[..., 0] - x[..., 2] / 2 56 | y[..., 1] = x[..., 1] - x[..., 3] / 2 57 | y[..., 2] = x[..., 0] + x[..., 2] / 2 58 | y[..., 3] = x[..., 1] + x[..., 3] / 2 59 | return y 60 | 61 | 62 | def ap_per_class(tp, conf, pred_cls, target_cls): 63 | """ Compute the average precision, given the recall and precision curves. 64 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. 65 | # Arguments 66 | tp: True positives (list). 67 | conf: Objectness value from 0-1 (list). 68 | pred_cls: Predicted object classes (list). 69 | target_cls: True object classes (list). 70 | # Returns 71 | The average precision as computed in py-faster-rcnn. 72 | """ 73 | 74 | # Sort by objectness 75 | i = np.argsort(-conf) 76 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] 77 | 78 | # Find unique classes 79 | unique_classes = np.unique(target_cls) 80 | 81 | # Create Precision-Recall curve and compute AP for each class 82 | ap, p, r = [], [], [] 83 | for c in tqdm.tqdm(unique_classes, desc="Computing AP"): 84 | i = pred_cls == c 85 | n_gt = (target_cls == c).sum() # Number of ground truth objects 86 | n_p = i.sum() # Number of predicted objects 87 | 88 | if n_p == 0 and n_gt == 0: 89 | continue 90 | elif n_p == 0 or n_gt == 0: 91 | ap.append(0) 92 | r.append(0) 93 | p.append(0) 94 | else: 95 | # Accumulate FPs and TPs 96 | fpc = (1 - tp[i]).cumsum() 97 | tpc = (tp[i]).cumsum() 98 | 99 | # Recall 100 | recall_curve = tpc / (n_gt + 1e-16) 101 | r.append(recall_curve[-1]) 102 | 103 | # Precision 104 | precision_curve = tpc / (tpc + fpc) 105 | p.append(precision_curve[-1]) 106 | 107 | # AP from recall-precision curve 108 | ap.append(compute_ap(recall_curve, precision_curve)) 109 | 110 | # Compute F1 score (harmonic mean of precision and recall) 111 | p, r, ap = np.array(p), np.array(r), np.array(ap) 112 | f1 = 2 * p * r / (p + r + 1e-16) 113 | 114 | return p, r, ap, f1, unique_classes.astype("int32") 115 | 116 | 117 | def compute_ap(recall, precision): 118 | """ Compute the average precision, given the recall and precision curves. 119 | Code originally from https://github.com/rbgirshick/py-faster-rcnn. 120 | 121 | # Arguments 122 | recall: The recall curve (list). 123 | precision: The precision curve (list). 124 | # Returns 125 | The average precision as computed in py-faster-rcnn. 126 | """ 127 | # correct AP calculation 128 | # first append sentinel values at the end 129 | mrec = np.concatenate(([0.0], recall, [1.0])) 130 | mpre = np.concatenate(([0.0], precision, [0.0])) 131 | 132 | # compute the precision envelope 133 | for i in range(mpre.size - 1, 0, -1): 134 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 135 | 136 | # to calculate area under PR curve, look for points 137 | # where X axis (recall) changes value 138 | i = np.where(mrec[1:] != mrec[:-1])[0] 139 | 140 | # and sum (\Delta recall) * prec 141 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 142 | return ap 143 | 144 | 145 | def get_batch_statistics(outputs, targets, iou_threshold): 146 | """ Compute true positives, predicted scores and predicted labels per sample """ 147 | batch_metrics = [] 148 | for sample_i in range(len(outputs)): 149 | 150 | if outputs[sample_i] is None: 151 | continue 152 | 153 | output = outputs[sample_i] 154 | pred_boxes = output[:, :4] 155 | pred_scores = output[:, 4] 156 | pred_labels = output[:, -1] 157 | 158 | true_positives = np.zeros(pred_boxes.shape[0]) 159 | 160 | annotations = targets[targets[:, 0] == sample_i][:, 1:] 161 | target_labels = annotations[:, 0] if len(annotations) else [] 162 | if len(annotations): 163 | detected_boxes = [] 164 | target_boxes = annotations[:, 1:] 165 | 166 | for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)): 167 | 168 | # If targets are found break 169 | if len(detected_boxes) == len(annotations): 170 | break 171 | 172 | # Ignore if label is not one of the target labels 173 | if pred_label not in target_labels: 174 | continue 175 | 176 | iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0) 177 | if iou >= iou_threshold and box_index not in detected_boxes: 178 | true_positives[pred_i] = 1 179 | detected_boxes += [box_index] 180 | batch_metrics.append([true_positives, pred_scores, pred_labels]) 181 | return batch_metrics 182 | 183 | 184 | def bbox_wh_iou(wh1, wh2): 185 | wh2 = wh2.t() 186 | w1, h1 = wh1[0], wh1[1] 187 | w2, h2 = wh2[0], wh2[1] 188 | inter_area = torch.min(w1, w2) * torch.min(h1, h2) 189 | union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area 190 | return inter_area / union_area 191 | 192 | 193 | def bbox_iou(box1, box2, x1y1x2y2=True): 194 | """ 195 | Returns the IoU of two bounding boxes 196 | """ 197 | if not x1y1x2y2: 198 | # Transform from center and width to exact coordinates 199 | b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 200 | b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 201 | b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 202 | b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 203 | else: 204 | # Get the coordinates of bounding boxes 205 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] 206 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] 207 | 208 | # get the corrdinates of the intersection rectangle 209 | inter_rect_x1 = torch.max(b1_x1, b2_x1) 210 | inter_rect_y1 = torch.max(b1_y1, b2_y1) 211 | inter_rect_x2 = torch.min(b1_x2, b2_x2) 212 | inter_rect_y2 = torch.min(b1_y2, b2_y2) 213 | # Intersection area 214 | inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp( 215 | inter_rect_y2 - inter_rect_y1 + 1, min=0 216 | ) 217 | # Union Area 218 | b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) 219 | b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) 220 | 221 | iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) 222 | 223 | return iou 224 | 225 | 226 | def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): 227 | """ 228 | Removes detections with lower object confidence score than 'conf_thres' and performs 229 | Non-Maximum Suppression to further filter detections. 230 | Returns detections with shape: 231 | (x1, y1, x2, y2, object_conf, class_score, class_pred) 232 | """ 233 | 234 | # From (center x, center y, width, height) to (x1, y1, x2, y2) 235 | prediction[..., :4] = xywh2xyxy(prediction[..., :4]) 236 | output = [None for _ in range(len(prediction))] 237 | for image_i, image_pred in enumerate(prediction): 238 | # Filter out confidence scores below threshold 239 | image_pred = image_pred[image_pred[:, 4] >= conf_thres] 240 | # If none are remaining => process next image 241 | if not image_pred.size(0): 242 | continue 243 | # Object confidence times class confidence 244 | score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0] 245 | # Sort by it 246 | image_pred = image_pred[(-score).argsort()] 247 | class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True) 248 | detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1) 249 | # Perform non-maximum suppression 250 | keep_boxes = [] 251 | while detections.size(0): 252 | large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres 253 | label_match = detections[0, -1] == detections[:, -1] 254 | # Indices of boxes with lower confidence scores, large IOUs and matching labels 255 | invalid = large_overlap & label_match 256 | weights = detections[invalid, 4:5] 257 | # Merge overlapping bboxes by order of confidence 258 | detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum() 259 | keep_boxes += [detections[0]] 260 | detections = detections[~invalid] 261 | if keep_boxes: 262 | output[image_i] = torch.stack(keep_boxes) 263 | 264 | return output 265 | 266 | 267 | def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres): 268 | 269 | ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor 270 | FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor 271 | 272 | nB = pred_boxes.size(0) 273 | nA = pred_boxes.size(1) 274 | nC = pred_cls.size(-1) 275 | nG = pred_boxes.size(2) 276 | 277 | # Output tensors 278 | obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0) 279 | noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1) 280 | class_mask = FloatTensor(nB, nA, nG, nG).fill_(0) 281 | iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0) 282 | tx = FloatTensor(nB, nA, nG, nG).fill_(0) 283 | ty = FloatTensor(nB, nA, nG, nG).fill_(0) 284 | tw = FloatTensor(nB, nA, nG, nG).fill_(0) 285 | th = FloatTensor(nB, nA, nG, nG).fill_(0) 286 | tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0) 287 | 288 | # Convert to position relative to box 289 | target_boxes = target[:, 2:6] * nG 290 | gxy = target_boxes[:, :2] 291 | gwh = target_boxes[:, 2:] 292 | # Get anchors with best iou 293 | ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors]) 294 | best_ious, best_n = ious.max(0) 295 | # Separate target values 296 | b, target_labels = target[:, :2].long().t() 297 | gx, gy = gxy.t() 298 | gw, gh = gwh.t() 299 | gi, gj = gxy.long().t() 300 | # Set masks 301 | obj_mask[b, best_n, gj, gi] = 1 302 | noobj_mask[b, best_n, gj, gi] = 0 303 | 304 | # Set noobj mask to zero where iou exceeds ignore threshold 305 | for i, anchor_ious in enumerate(ious.t()): 306 | noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0 307 | 308 | # Coordinates 309 | tx[b, best_n, gj, gi] = gx - gx.floor() 310 | ty[b, best_n, gj, gi] = gy - gy.floor() 311 | # Width and height 312 | tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16) 313 | th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16) 314 | # One-hot encoding of label 315 | tcls[b, best_n, gj, gi, target_labels] = 1 316 | # Compute label correctness and iou at best anchor 317 | class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float() 318 | iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False) 319 | 320 | tconf = obj_mask.float() 321 | return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf 322 | --------------------------------------------------------------------------------