├── .idea ├── .gitignore ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── object_DNF.iml ├── README.md ├── __pycache__ ├── direction_move.cpython-38.pyc ├── directkeys.cpython-38.pyc ├── getkeys.cpython-38.pyc ├── grabscreen.cpython-38.pyc ├── skill_recgnize.cpython-38.pyc └── small_recgonize.cpython-38.pyc ├── datasets_utils.py ├── direction_move.py ├── directkeys.py ├── getkeys.py ├── grabscreen.py ├── image_grab.py ├── json2yolo.py ├── main2.py ├── models ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── __init__.cpython-37.pyc │ ├── __init__.cpython-38.pyc │ ├── common.cpython-36.pyc │ ├── common.cpython-37.pyc │ ├── common.cpython-38.pyc │ ├── experimental.cpython-36.pyc │ ├── experimental.cpython-37.pyc │ ├── experimental.cpython-38.pyc │ ├── yolo.cpython-36.pyc │ ├── yolo.cpython-37.pyc │ └── yolo.cpython-38.pyc ├── common.py ├── experimental.py ├── export.py ├── hub │ ├── yolov3-spp.yaml │ ├── yolov5-fpn.yaml │ └── yolov5-panet.yaml ├── tmpvzcovfjn ├── yolo.py ├── yolov5l.yaml ├── yolov5m.yaml ├── yolov5s.yaml ├── yolov5s.yaml.bak └── yolov5x.yaml ├── skill_recgnize.py ├── small_recgonize.py ├── utils ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── __init__.cpython-37.pyc │ ├── __init__.cpython-38.pyc │ ├── activations.cpython-36.pyc │ ├── datasets.cpython-36.pyc │ ├── datasets.cpython-37.pyc │ ├── datasets.cpython-38.pyc │ ├── general.cpython-36.pyc │ ├── general.cpython-37.pyc │ ├── general.cpython-38.pyc │ ├── google_utils.cpython-36.pyc │ ├── google_utils.cpython-37.pyc │ ├── google_utils.cpython-38.pyc │ ├── torch_utils.cpython-36.pyc │ ├── torch_utils.cpython-37.pyc │ └── torch_utils.cpython-38.pyc ├── activations.py ├── datasets.py ├── evolve.sh ├── general.py ├── google_app_engine │ ├── Dockerfile │ ├── additional_requirements.txt │ └── app.yaml ├── google_utils.py └── torch_utils.py ├── yolo5_detect.py └── 问号模板.npy /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /../../../../:\Computer_vision\object_DNF\.idea/dataSources/ 6 | /dataSources.local.xml 7 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/object_DNF.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # yolov5-DNF 2 | 使用yolov5检测DNF游戏画面,通过设计相应的算法来控制角色自动打怪。 3 | 4 | 详细教学请移步b站,有很详细的讲解:https://www.bilibili.com/video/BV18r4y1A7BF/ 5 | 6 | 对于代码的使用,有几点要注意: 7 | 8 | 1. 代码中涉及到使用opencv对小地图和技能栏进行模板匹配和二值化等操作,注意,不同游戏分辨率和电脑显示器分辨率是不一致的,代码中给出的(0,0,1280,800)是本人游戏中的分辨率,而small_recgonize.py和skill_recgonize.py中的img[45:65, 1107:1270], img[733: 793, 538:750, 2]是根据不同显示器的分辨率决定的,使用时需要自己调整。 9 | 10 | 2. 本人训练的yolov5模型还有待提高,我的训练集只有294张图片,因此效果一般。 11 | -------------------------------------------------------------------------------- /__pycache__/direction_move.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/__pycache__/direction_move.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/directkeys.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/__pycache__/directkeys.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/getkeys.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/__pycache__/getkeys.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/grabscreen.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/__pycache__/grabscreen.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/skill_recgnize.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/__pycache__/skill_recgnize.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/small_recgonize.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/__pycache__/small_recgonize.cpython-38.pyc -------------------------------------------------------------------------------- /datasets_utils.py: -------------------------------------------------------------------------------- 1 | import cv2 as cv 2 | import os 3 | import shutil 4 | 5 | # 将标注好的图像和标签转移到新路径下 6 | root_path = "datasets/guiqi/patch1" 7 | yolo5_data_dir = "datasets/guiqi/patch1_yolo5" 8 | 9 | json_list = [] 10 | imgs_list = [] 11 | dir = os.listdir(root_path) 12 | for d in dir: 13 | if d.endswith(".json"): 14 | imgs_list.append(d.strip().split(".")[0] + ".jpg") 15 | json_list.append(d) 16 | print(imgs_list) 17 | print(json_list) 18 | 19 | for img_name, json in zip(imgs_list, json_list): 20 | shutil.copy(os.path.join(root_path + "/" + img_name), os.path.join(yolo5_data_dir + '/imgs')) 21 | shutil.copy(os.path.join(root_path + "/" + json), os.path.join(yolo5_data_dir + '/labels_json')) 22 | 23 | # # 选一部分数据作为验证集 24 | # img_train_path = r"F:\Computer_vision\yolov5\YOLO5\DNF\train\images" 25 | # img_valid_path = r"F:\Computer_vision\yolov5\YOLO5\DNF\valid\images" 26 | # label_train_path = r"F:\Computer_vision\yolov5\YOLO5\DNF\train\labels" 27 | # label_valid_path = r"F:\Computer_vision\yolov5\YOLO5\DNF\valid\labels" 28 | # eval_ratio = 0.1 29 | # dir = os.listdir(img_train_path) 30 | # eval_nums = int(eval_ratio * len(dir)) 31 | # import random 32 | # random.shuffle(dir) 33 | # for d in dir[:eval_nums]: 34 | # shutil.move(os.path.join(img_train_path + "\\" + d), os.path.join(img_valid_path + "\\" + d)) 35 | # shutil.move(os.path.join(label_train_path + "\\" + d.strip().split(".")[0] + ".txt"), 36 | # os.path.join(label_valid_path + "\\" + d.strip().split(".")[0] + ".txt")) 37 | 38 | # undict生成 39 | # 40 | # name2id = {'hero': 0, 'small_map': 1, "monster": 2, 'money': 3, 'material': 4, 'door': 5, 'BOSS': 6, 'box': 7, 'options': 8} 41 | # id2name = {} 42 | # for key, val in name2id.items(): 43 | # id2name[val] = key 44 | # print(id2name) -------------------------------------------------------------------------------- /direction_move.py: -------------------------------------------------------------------------------- 1 | import time 2 | from directkeys import PressKey, ReleaseKey, key_down, key_up 3 | 4 | direct_dic = {"UP": 0xC8, "DOWN": 0xD0, "LEFT": 0xCB, "RIGHT": 0xCD} 5 | 6 | def move(direct, material=False, action_cache=None, press_delay=0.1, release_delay=0.1): 7 | if direct == "RIGHT": 8 | if action_cache != None: 9 | if action_cache != "RIGHT": 10 | if action_cache not in ["LEFT", "RIGHT", "UP", "DOWN"]: 11 | ReleaseKey(direct_dic[action_cache.strip().split("_")[0]]) 12 | ReleaseKey(direct_dic[action_cache.strip().split("_")[1]]) 13 | else: 14 | ReleaseKey(direct_dic[action_cache]) 15 | PressKey(direct_dic["RIGHT"]) 16 | if not material: 17 | time.sleep(press_delay) 18 | ReleaseKey(direct_dic["RIGHT"]) 19 | time.sleep(release_delay) 20 | PressKey(direct_dic["RIGHT"]) 21 | action_cache = "RIGHT" 22 | print("向右移动") 23 | else: 24 | print("向右移动") 25 | else: 26 | PressKey(direct_dic["RIGHT"]) 27 | if not material: 28 | time.sleep(press_delay) 29 | ReleaseKey(direct_dic["RIGHT"]) 30 | time.sleep(release_delay) 31 | PressKey(direct_dic["RIGHT"]) 32 | action_cache = "RIGHT" 33 | print("向右移动") 34 | return action_cache 35 | 36 | elif direct == "LEFT": 37 | if action_cache != None: 38 | if action_cache != "LEFT": 39 | if action_cache not in ["LEFT", "RIGHT", "UP", "DOWN"]: 40 | ReleaseKey(direct_dic[action_cache.strip().split("_")[0]]) 41 | ReleaseKey(direct_dic[action_cache.strip().split("_")[1]]) 42 | else: 43 | ReleaseKey(direct_dic[action_cache]) 44 | PressKey(direct_dic["LEFT"]) 45 | if not material: 46 | time.sleep(press_delay) 47 | ReleaseKey(direct_dic["LEFT"]) 48 | time.sleep(release_delay) 49 | PressKey(direct_dic["LEFT"]) 50 | action_cache = "LEFT" 51 | print("向左移动") 52 | else: 53 | print("向左移动") 54 | else: 55 | PressKey(direct_dic["LEFT"]) 56 | if not material: 57 | time.sleep(press_delay) 58 | ReleaseKey(direct_dic["LEFT"]) 59 | time.sleep(release_delay) 60 | PressKey(direct_dic["LEFT"]) 61 | action_cache = "LEFT" 62 | print("向左移动") 63 | return action_cache 64 | 65 | elif direct == "UP": 66 | if action_cache != None: 67 | if action_cache != "UP": 68 | if action_cache not in ["LEFT", "RIGHT", "UP", "DOWN"]: 69 | ReleaseKey(direct_dic[action_cache.strip().split("_")[0]]) 70 | ReleaseKey(direct_dic[action_cache.strip().split("_")[1]]) 71 | else: 72 | ReleaseKey(direct_dic[action_cache]) 73 | PressKey(direct_dic["UP"]) 74 | # time.sleep(press_delay) 75 | # ReleaseKey(direct_dic["UP"]) 76 | # time.sleep(release_delay) 77 | # PressKey(direct_dic["UP"]) 78 | action_cache = "UP" 79 | print("向上移动") 80 | else: 81 | print("向上移动") 82 | else: 83 | PressKey(direct_dic["UP"]) 84 | # time.sleep(press_delay) 85 | # ReleaseKey(direct_dic["UP"]) 86 | # time.sleep(release_delay) 87 | # PressKey(direct_dic["UP"]) 88 | action_cache = "UP" 89 | print("向上移动") 90 | return action_cache 91 | 92 | elif direct == "DOWN": 93 | if action_cache != None: 94 | if action_cache != "DOWN": 95 | if action_cache not in ["LEFT", "RIGHT", "UP", "DOWN"]: 96 | ReleaseKey(direct_dic[action_cache.strip().split("_")[0]]) 97 | ReleaseKey(direct_dic[action_cache.strip().split("_")[1]]) 98 | else: 99 | ReleaseKey(direct_dic[action_cache]) 100 | PressKey(direct_dic["DOWN"]) 101 | # time.sleep(press_delay) 102 | # ReleaseKey(direct_dic["DOWN"]) 103 | # time.sleep(release_delay) 104 | # PressKey(direct_dic["DOWN"]) 105 | action_cache = "DOWN" 106 | print("向下移动") 107 | else: 108 | print("向下移动") 109 | else: 110 | PressKey(direct_dic["DOWN"]) 111 | # time.sleep(press_delay) 112 | # ReleaseKey(direct_dic["DOWN"]) 113 | # time.sleep(release_delay) 114 | # PressKey(direct_dic["DOWN"]) 115 | action_cache = "DOWN" 116 | print("向下移动") 117 | return action_cache 118 | 119 | elif direct == "RIGHT_UP": 120 | if action_cache != None: 121 | if action_cache != "RIGHT_UP": 122 | if action_cache not in ["LEFT", "RIGHT", "UP", "DOWN"]: 123 | ReleaseKey(direct_dic[action_cache.strip().split("_")[0]]) 124 | ReleaseKey(direct_dic[action_cache.strip().split("_")[1]]) 125 | else: 126 | ReleaseKey(direct_dic[action_cache]) 127 | if not material: 128 | PressKey(direct_dic["RIGHT"]) 129 | time.sleep(press_delay) 130 | ReleaseKey(direct_dic["RIGHT"]) 131 | time.sleep(release_delay) 132 | PressKey(direct_dic["RIGHT"]) 133 | time.sleep(press_delay) 134 | if material: 135 | PressKey(direct_dic["RIGHT"]) 136 | PressKey(direct_dic["UP"]) 137 | # time.sleep(release_delay) 138 | action_cache = "RIGHT_UP" 139 | print("右上移动") 140 | else: 141 | print("右上移动") 142 | else: 143 | if not material: 144 | PressKey(direct_dic["RIGHT"]) 145 | time.sleep(press_delay) 146 | ReleaseKey(direct_dic["RIGHT"]) 147 | time.sleep(release_delay) 148 | PressKey(direct_dic["RIGHT"]) 149 | time.sleep(press_delay) 150 | if material: 151 | PressKey(direct_dic["RIGHT"]) 152 | PressKey(direct_dic["UP"]) 153 | # time.sleep(press_delay) 154 | action_cache = "RIGHT_UP" 155 | print("右上移动") 156 | return action_cache 157 | 158 | elif direct == "RIGHT_DOWN": 159 | if action_cache != None: 160 | if action_cache != "RIGHT_DOWN": 161 | if action_cache not in ["LEFT", "RIGHT", "UP", "DOWN"]: 162 | ReleaseKey(direct_dic[action_cache.strip().split("_")[0]]) 163 | ReleaseKey(direct_dic[action_cache.strip().split("_")[1]]) 164 | else: 165 | ReleaseKey(direct_dic[action_cache]) 166 | if not material: 167 | PressKey(direct_dic["RIGHT"]) 168 | time.sleep(press_delay) 169 | ReleaseKey(direct_dic["RIGHT"]) 170 | time.sleep(release_delay) 171 | PressKey(direct_dic["RIGHT"]) 172 | time.sleep(press_delay) 173 | if material: 174 | PressKey(direct_dic["RIGHT"]) 175 | PressKey(direct_dic["DOWN"]) 176 | # time.sleep(press_delay) 177 | action_cache = "RIGHT_DOWN" 178 | print("右上移动") 179 | else: 180 | print("右上移动") 181 | else: 182 | if not material: 183 | PressKey(direct_dic["RIGHT"]) 184 | time.sleep(press_delay) 185 | ReleaseKey(direct_dic["RIGHT"]) 186 | time.sleep(release_delay) 187 | PressKey(direct_dic["RIGHT"]) 188 | time.sleep(press_delay) 189 | if material: 190 | PressKey(direct_dic["RIGHT"]) 191 | PressKey(direct_dic["DOWN"]) 192 | # time.sleep(press_delay) 193 | action_cache = "RIGHT_DOWN" 194 | print("右上移动") 195 | return action_cache 196 | 197 | elif direct == "LEFT_UP": 198 | if action_cache != None: 199 | if action_cache != "LEFT_UP": 200 | if action_cache not in ["LEFT", "RIGHT", "UP", "DOWN"]: 201 | ReleaseKey(direct_dic[action_cache.strip().split("_")[0]]) 202 | ReleaseKey(direct_dic[action_cache.strip().split("_")[1]]) 203 | else: 204 | ReleaseKey(direct_dic[action_cache]) 205 | if not material: 206 | PressKey(direct_dic["LEFT"]) 207 | time.sleep(press_delay) 208 | ReleaseKey(direct_dic["LEFT"]) 209 | time.sleep(release_delay) 210 | PressKey(direct_dic["LEFT"]) 211 | time.sleep(press_delay) 212 | if material: 213 | PressKey(direct_dic["LEFT"]) 214 | PressKey(direct_dic["UP"]) 215 | # time.sleep(press_delay) 216 | action_cache = "LEFT_UP" 217 | print("左上移动") 218 | else: 219 | print("左上移动") 220 | else: 221 | if not material: 222 | PressKey(direct_dic["LEFT"]) 223 | time.sleep(press_delay) 224 | ReleaseKey(direct_dic["LEFT"]) 225 | time.sleep(release_delay) 226 | PressKey(direct_dic["LEFT"]) 227 | time.sleep(press_delay) 228 | if material: 229 | PressKey(direct_dic["LEFT"]) 230 | PressKey(direct_dic["UP"]) 231 | # time.sleep(press_delay) 232 | action_cache = "LEFT_UP" 233 | print("左上移动") 234 | return action_cache 235 | 236 | elif direct == "LEFT_DOWN": 237 | if action_cache != None: 238 | if action_cache != "LEFT_DOWN": 239 | if action_cache not in ["LEFT", "RIGHT", "UP", "DOWN"]: 240 | ReleaseKey(direct_dic[action_cache.strip().split("_")[0]]) 241 | ReleaseKey(direct_dic[action_cache.strip().split("_")[1]]) 242 | else: 243 | ReleaseKey(direct_dic[action_cache]) 244 | if not material: 245 | PressKey(direct_dic["LEFT"]) 246 | time.sleep(press_delay) 247 | ReleaseKey(direct_dic["LEFT"]) 248 | time.sleep(release_delay) 249 | PressKey(direct_dic["LEFT"]) 250 | time.sleep(press_delay) 251 | if material: 252 | PressKey(direct_dic["LEFT"]) 253 | PressKey(direct_dic["DOWN"]) 254 | # time.sleep(press_delay) 255 | action_cache = "LEFT_DOWN" 256 | print("左下移动") 257 | else: 258 | print("左下移动") 259 | else: 260 | if not material: 261 | PressKey(direct_dic["LEFT"]) 262 | time.sleep(press_delay) 263 | ReleaseKey(direct_dic["LEFT"]) 264 | time.sleep(release_delay) 265 | PressKey(direct_dic["LEFT"]) 266 | time.sleep(press_delay) 267 | if material: 268 | PressKey(direct_dic["LEFT"]) 269 | PressKey(direct_dic["DOWN"]) 270 | # time.sleep(press_delay) 271 | action_cache = "LEFT_DOWN" 272 | print("左下移动") 273 | return action_cache 274 | 275 | 276 | if __name__ == "__main__": 277 | action_cache = None 278 | t1 = time.time() 279 | # while True: 280 | # if int(time.time() - t1) % 2 == 0: 281 | # action_cache = move("LEFT_DOWN", material=False, action_cache=action_cache, press_delay=0.1, release_delay=0.1) 282 | # else: 283 | action_cache = move("RIGHT_UP", material=True, action_cache=action_cache, press_delay=0.1, release_delay=0.1) -------------------------------------------------------------------------------- /directkeys.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import win32con 3 | import win32api 4 | import time 5 | 6 | key_map = { 7 | "0": 49, "1": 50, "2": 51, "3": 52, "4": 53, "5": 54, "6": 55, "7": 56, "8": 57, "9": 58, 8 | "A": 65, "B": 66, "C": 67, "D": 68, "E": 69, "F": 70, "G": 71, "H": 72, "I": 73, "J": 74, 9 | "K": 75, "L": 76, "M": 77, "N": 78, "O": 79, "P": 80, "Q": 81, "R": 82, "S": 83, "T": 84, 10 | "U": 85, "V": 86, "W": 87, "X": 88, "Y": 89, "Z": 90, "LEFT": 37, "UP": 38, "RIGHT": 39, 11 | "DOWN": 40, "CTRL": 17, "ALT": 18, "F2": 113, "ESC": 27, "SPACE": 32, "NUM0": 96 12 | } 13 | 14 | 15 | def key_down(key): 16 | """ 17 | 函数功能:按下按键 18 | 参 数:key:按键值 19 | """ 20 | key = key.upper() 21 | vk_code = key_map[key] 22 | win32api.keybd_event(vk_code, win32api.MapVirtualKey(vk_code, 0), 0, 0) 23 | 24 | 25 | def key_up(key): 26 | """ 27 | 函数功能:抬起按键 28 | 参 数:key:按键值 29 | """ 30 | key = key.upper() 31 | vk_code = key_map[key] 32 | win32api.keybd_event(vk_code, win32api.MapVirtualKey(vk_code, 0), win32con.KEYEVENTF_KEYUP, 0) 33 | 34 | 35 | def key_press(key): 36 | """ 37 | 函数功能:点击按键(按下并抬起) 38 | 参 数:key:按键值 39 | """ 40 | key_down(key) 41 | time.sleep(0.02) 42 | key_up(key) 43 | time.sleep(0.01) 44 | 45 | #################################### 46 | import ctypes 47 | import time 48 | 49 | SendInput = ctypes.windll.user32.SendInput 50 | 51 | W = 0x11 52 | A = 0x1E 53 | S = 0x1F 54 | D = 0x20 55 | 56 | M = 0x32 57 | J = 0x24 58 | K = 0x25 59 | LSHIFT = 0x2A 60 | R = 0x13 # 用R代替识破 61 | V = 0x2F 62 | 63 | Q = 0x10 64 | I = 0x17 65 | O = 0x18 66 | P = 0x19 67 | C = 0x2E 68 | F = 0x21 69 | 70 | up = 0xC8 71 | down = 0xD0 72 | left = 0xCB 73 | right = 0xCD 74 | 75 | direct_dic = {"UP": 0xC8, "DOWN": 0xD0, "LEFT": 0xCB, "RIGHT": 0xCD} 76 | 77 | esc = 0x01 78 | 79 | # C struct redefinitions 80 | PUL = ctypes.POINTER(ctypes.c_ulong) 81 | 82 | 83 | class KeyBdInput(ctypes.Structure): 84 | _fields_ = [("wVk", ctypes.c_ushort), 85 | ("wScan", ctypes.c_ushort), 86 | ("dwFlags", ctypes.c_ulong), 87 | ("time", ctypes.c_ulong), 88 | ("dwExtraInfo", PUL)] 89 | 90 | 91 | class HardwareInput(ctypes.Structure): 92 | _fields_ = [("uMsg", ctypes.c_ulong), 93 | ("wParamL", ctypes.c_short), 94 | ("wParamH", ctypes.c_ushort)] 95 | 96 | 97 | class MouseInput(ctypes.Structure): 98 | _fields_ = [("dx", ctypes.c_long), 99 | ("dy", ctypes.c_long), 100 | ("mouseData", ctypes.c_ulong), 101 | ("dwFlags", ctypes.c_ulong), 102 | ("time", ctypes.c_ulong), 103 | ("dwExtraInfo", PUL)] 104 | 105 | 106 | class Input_I(ctypes.Union): 107 | _fields_ = [("ki", KeyBdInput), 108 | ("mi", MouseInput), 109 | ("hi", HardwareInput)] 110 | 111 | 112 | class Input(ctypes.Structure): 113 | _fields_ = [("type", ctypes.c_ulong), 114 | ("ii", Input_I)] 115 | 116 | 117 | # Actuals Functions 118 | 119 | def PressKey(hexKeyCode): 120 | extra = ctypes.c_ulong(0) 121 | ii_ = Input_I() 122 | ii_.ki = KeyBdInput(0, hexKeyCode, 0x0008, 0, ctypes.pointer(extra)) 123 | x = Input(ctypes.c_ulong(1), ii_) 124 | ctypes.windll.user32.SendInput(1, ctypes.pointer(x), ctypes.sizeof(x)) 125 | 126 | 127 | def ReleaseKey(hexKeyCode): 128 | extra = ctypes.c_ulong(0) 129 | ii_ = Input_I() 130 | ii_.ki = KeyBdInput(0, hexKeyCode, 0x0008 | 0x0002, 0, ctypes.pointer(extra)) 131 | x = Input(ctypes.c_ulong(1), ii_) 132 | ctypes.windll.user32.SendInput(1, ctypes.pointer(x), ctypes.sizeof(x)) 133 | 134 | 135 | def defense(): 136 | PressKey(M) 137 | time.sleep(0.05) 138 | ReleaseKey(M) 139 | # time.sleep(0.1) 140 | 141 | 142 | def attack(): 143 | PressKey(J) 144 | time.sleep(0.05) 145 | ReleaseKey(J) 146 | # time.sleep(0.1) 147 | 148 | 149 | def go_forward(): 150 | PressKey(W) 151 | time.sleep(0.4) 152 | ReleaseKey(W) 153 | 154 | 155 | def go_back(): 156 | PressKey(S) 157 | time.sleep(0.4) 158 | ReleaseKey(S) 159 | 160 | 161 | def go_left(): 162 | PressKey(A) 163 | time.sleep(0.4) 164 | ReleaseKey(A) 165 | 166 | 167 | def go_right(): 168 | PressKey(D) 169 | time.sleep(0.4) 170 | ReleaseKey(D) 171 | 172 | 173 | def jump(): 174 | PressKey(K) 175 | time.sleep(0.1) 176 | ReleaseKey(K) 177 | # time.sleep(0.1) 178 | 179 | 180 | def dodge(): # 闪避 181 | PressKey(R) 182 | time.sleep(0.1) 183 | ReleaseKey(R) 184 | # time.sleep(0.1) 185 | 186 | 187 | def lock_vision(): 188 | PressKey(V) 189 | time.sleep(0.3) 190 | ReleaseKey(V) 191 | time.sleep(0.1) 192 | 193 | 194 | def go_forward_QL(t): 195 | PressKey(W) 196 | time.sleep(t) 197 | ReleaseKey(W) 198 | 199 | 200 | def turn_left(t): 201 | PressKey(left) 202 | time.sleep(t) 203 | ReleaseKey(left) 204 | 205 | 206 | def turn_up(t): 207 | PressKey(up) 208 | time.sleep(t) 209 | ReleaseKey(up) 210 | 211 | 212 | def turn_right(t): 213 | PressKey(right) 214 | time.sleep(t) 215 | ReleaseKey(right) 216 | 217 | 218 | def F_go(): 219 | PressKey(F) 220 | time.sleep(0.5) 221 | ReleaseKey(F) 222 | 223 | 224 | def forward_jump(t): 225 | PressKey(W) 226 | time.sleep(t) 227 | PressKey(K) 228 | ReleaseKey(W) 229 | ReleaseKey(K) 230 | 231 | 232 | def press_esc(): 233 | PressKey(esc) 234 | time.sleep(0.3) 235 | ReleaseKey(esc) 236 | 237 | 238 | def dead(): 239 | PressKey(M) 240 | time.sleep(0.5) 241 | ReleaseKey(M) 242 | 243 | if __name__ == "__main__": 244 | time1 = time.time() 245 | k = "LEFT" 246 | s = "D" 247 | while True: 248 | if abs(time.time() - time1) > 10: 249 | break 250 | else: 251 | # if k not in ["LEFT", "RIGHT", "UP", "DOWN"]: 252 | # key_press(k) 253 | # else: 254 | # PressKey(direct_dic[k]) 255 | # time.sleep(0.1) 256 | # ReleaseKey(direct_dic[k]) 257 | # time.sleep(0.2) 258 | PressKey(direct_dic[k]) 259 | key_down(s) 260 | time.sleep(0.02) 261 | key_up(s) 262 | ReleaseKey(direct_dic[k]) 263 | time.sleep(0.02) 264 | 265 | -------------------------------------------------------------------------------- /getkeys.py: -------------------------------------------------------------------------------- 1 | import win32api as wapi 2 | import time 3 | 4 | dict = {"A": 0, "S": 1,"D": 2, "F": 3, "G": 4, "H": 5, "Q": 6, "W": 7, "E": 8, "R": 9, "T": 10, "Y": 11, "up": 12, 5 | "down": 13, "left": 14, "right":15, "ctrl": 16, "alt": 17, "Z":18, "X":19, "C": 20, "esc": 21, "f2": 22, 6 | "space": 23, "num0": 24, "left_up": 25, "left down": 26, "right_up": 27, "right_down": 28, "left_A": 29, 7 | "left_S": 30, "left_D": 31, "left_F": 32, "left_G": 33, "left_H": 34,"left_Q": 35, "left_W": 36, "left_E": 37, 8 | "left_R": 38, "left_T": 39, "left_Y": 40, "up_A": 41,"up_S": 42, "up_D": 43, "up_F": 44, "up_G": 45, 9 | "up_H": 46,"up_Q": 47, "up_W": 48, "up_E": 49, "up_R": 50, "up_T": 51, "up_Y": 52,"down_A": 53, 10 | "down_S": 54, "down_D": 55, "down_F": 56, "down_G": 57, "down_H": 58,"down_Q": 59, "down_W": 60, "down_E": 61, 11 | "down_R": 62, "down_T": 63, "down_Y": 64, "right_A": 65, "right_S": 66, "right_D": 67, "right_F": 68, "right_G": 69, 12 | "right_H": 70,"right_Q": 71, "right_W": 72, "right_E": 73, "right_R": 74, "right_T": 75, "right_Y": 76, "left_z": 77, 13 | "left_x": 78, "left_c": 79, "up_z": 80,"up_x": 81, "up_c": 82, "down_z": 83, "down_x": 84, "down_c": 85, "right_z": 86, 14 | "right_x": 87, "right_c": 88, "left_ctrl": 89, "up_ctrl": 90, "down_ctrl": 91, "right_ctrl": 92, "P": 100} 15 | 16 | keyList = [] 17 | for char in "ASDFGHQWERTYZXCP": 18 | keyList.append(char) 19 | 20 | def key_check(): 21 | keys = [] 22 | for key in keyList: 23 | if wapi.GetAsyncKeyState(ord(key)): 24 | keys.append(key) 25 | if wapi.GetAsyncKeyState(37): 26 | keys.append("left") 27 | if wapi.GetAsyncKeyState(39): 28 | keys.append("right") 29 | if wapi.GetAsyncKeyState(38): 30 | keys.append("up") 31 | if wapi.GetAsyncKeyState(40): 32 | keys.append("down") 33 | if wapi.GetAsyncKeyState(17): 34 | keys.append("ctrl") 35 | if wapi.GetAsyncKeyState(18): 36 | keys.append("alt") 37 | if wapi.GetAsyncKeyState(27): 38 | keys.append("esc") 39 | if wapi.GetAsyncKeyState(113): 40 | keys.append("f2") 41 | if wapi.GetAsyncKeyState(32): 42 | keys.append("space") 43 | if wapi.GetAsyncKeyState(96): 44 | keys.append("num0") 45 | return keys 46 | 47 | 48 | def get_key(keys): 49 | if len(keys) == 1: 50 | output = dict[keys[0]] 51 | elif len(keys) == 2: 52 | for k in keys: 53 | if k == "left" or k == "up" or k == "down" or k == "right": 54 | keys.pop(keys.index(k)) 55 | key_name = k + "_" + keys[0] 56 | if key_name in dict.keys(): 57 | output = dict[key_name] 58 | else: 59 | output = dict[keys[0]] 60 | else: 61 | output = dict[keys[0]] 62 | elif len(keys) > 2: 63 | output = dict[keys[0]] 64 | else: 65 | output = 93 # 不做任何动作 66 | return output 67 | 68 | if __name__ == '__main__': 69 | # while True: 70 | # if get_key(key_check()) != 100: 71 | # print(key_check()) 72 | # print(get_key(key_check())) 73 | # else: 74 | # print("stop listen keyboard") 75 | # break 76 | undict = {} 77 | for key, val in dict.items(): 78 | undict[val] = key 79 | print(undict) 80 | -------------------------------------------------------------------------------- /grabscreen.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Apr 8 12:14:29 2020 4 | 5 | @author: analoganddigital ( GitHub ) 6 | """ 7 | import cv2 8 | import numpy as np 9 | import win32gui, win32ui, win32con, win32api 10 | 11 | def grab_screen(region=None): 12 | 13 | hwin = win32gui.GetDesktopWindow() 14 | 15 | if region: 16 | left,top,x2,y2 = region 17 | width = x2 - left + 1 18 | height = y2 - top + 1 19 | else: 20 | width = win32api.GetSystemMetrics(win32con.SM_CXVIRTUALSCREEN) 21 | height = win32api.GetSystemMetrics(win32con.SM_CYVIRTUALSCREEN) 22 | left = win32api.GetSystemMetrics(win32con.SM_XVIRTUALSCREEN) 23 | top = win32api.GetSystemMetrics(win32con.SM_YVIRTUALSCREEN) 24 | 25 | hwindc = win32gui.GetWindowDC(hwin) 26 | srcdc = win32ui.CreateDCFromHandle(hwindc) 27 | memdc = srcdc.CreateCompatibleDC() 28 | bmp = win32ui.CreateBitmap() 29 | bmp.CreateCompatibleBitmap(srcdc, width, height) 30 | memdc.SelectObject(bmp) 31 | memdc.BitBlt((0, 0), (width, height), srcdc, (left, top), win32con.SRCCOPY) 32 | 33 | signedIntsArray = bmp.GetBitmapBits(True) 34 | img = np.fromstring(signedIntsArray, dtype='uint8') 35 | img.shape = (height,width,4) 36 | 37 | srcdc.DeleteDC() 38 | memdc.DeleteDC() 39 | win32gui.ReleaseDC(hwin, hwindc) 40 | win32gui.DeleteObject(bmp.GetHandle()) 41 | 42 | return img -------------------------------------------------------------------------------- /image_grab.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Apr 8 09:45:04 2020 4 | 5 | @author: analoganddigital ( GitHub ) 6 | """ 7 | 8 | import numpy as np 9 | from PIL import ImageGrab 10 | import cv2 11 | import time 12 | import directkeys 13 | import grabscreen 14 | import getkeys 15 | import os 16 | 17 | wait_time = 5 18 | L_t = 3 19 | save_step = 200 20 | # file_name = 'training_data_2_3.npy' 21 | data_path = 'datasets/guiqi/material' 22 | window_size = (0,0,1280,800)#384,344 192,172 96,86 23 | 24 | # if os.path.isfile(file_name): 25 | # print("file exists , loading previous data") 26 | # training_data = list(np.load(file_name,allow_pickle=True)) 27 | # else: 28 | # print("file don't exists , create new one") 29 | # training_data = [] 30 | 31 | training_data = [] 32 | save = True 33 | for i in list(range(wait_time))[::-1]: 34 | print(i+1) 35 | time.sleep(1) 36 | 37 | last_time = time.time() 38 | counter = 0 39 | 40 | org_num = len(os.listdir(data_path)) 41 | while(True): 42 | output_key = getkeys.get_key(getkeys.key_check())#按键收集 43 | if output_key == 100: 44 | if save: 45 | print(len(training_data) + counter*save_step) 46 | for i, d in enumerate(training_data): 47 | file_name = os.path.join(data_path, str(org_num + counter*save_step + i) + "_" + str(d[1]) + '.jpg') 48 | cv2.imwrite(file_name, d[0]) 49 | print("save finish") 50 | break 51 | 52 | screen_gray = cv2.cvtColor(grabscreen.grab_screen(window_size),cv2.COLOR_BGRA2BGR)#灰度图像收集 53 | screen_reshape = cv2.resize(screen_gray,(1280,800)) # 1200, 750 600, 375 54 | 55 | training_data.append([screen_reshape,output_key]) 56 | 57 | if len(training_data) % save_step == 0 and save: 58 | print(len(training_data)) 59 | for i, d in enumerate(training_data): 60 | file_name = os.path.join(data_path, str(org_num + counter*save_step + i) + "_" + str(d[1]) + '.jpg') 61 | cv2.imwrite(file_name, d[0]) 62 | training_data.clear() 63 | counter += 1 64 | cv2.imshow('window1',screen_reshape) 65 | 66 | #测试时间用 67 | print('每帧用时 {} 秒'.format(time.time()-last_time)) 68 | print("瞬时fps:", 1/(time.time()-last_time)) 69 | last_time = time.time() 70 | 71 | if cv2.waitKey(5) & 0xFF == ord('q'): 72 | break 73 | cv2.waitKey()# 视频结束后,按任意键退出 74 | cv2.destroyAllWindows() 75 | -------------------------------------------------------------------------------- /json2yolo.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | import os 4 | 5 | # name2id = {'person':0,'helmet':1,'Fire extinguisher':2,'Hook':3,'Gas cylinder':4} 6 | name2id = {'hero': 0, 'small_map': 1, "monster": 2, 'money': 3, 'material': 4, 'door': 5, 'BOSS': 6, 'box': 7, 'options': 8} 7 | 8 | def convert(img_size, box): 9 | dw = 1./(img_size[0]) 10 | dh = 1./(img_size[1]) 11 | x = (box[0] + box[2])/2.0 - 1 12 | y = (box[1] + box[3])/2.0 - 1 13 | w = box[2] - box[0] 14 | h = box[3] - box[1] 15 | x = x*dw 16 | w = w*dw 17 | y = y*dh 18 | h = h*dh 19 | return (x,y,w,h) 20 | 21 | 22 | def decode_json(json_floder_path,json_name): 23 | 24 | txt_name = 'E:\\Computer_vision\\object_DNF\\datasets\\guiqi\\yolo5_datasets\\labels\\' + json_name[0:-5] + '.txt' 25 | txt_file = open(txt_name, 'w') 26 | 27 | json_path = os.path.join(json_floder_path, json_name) 28 | data = json.load(open(json_path, 'r', encoding='gb2312')) 29 | 30 | img_w = data['imageWidth'] 31 | img_h = data['imageHeight'] 32 | 33 | for i in data['shapes']: 34 | 35 | label_name = i['label'] 36 | if (i['shape_type'] == 'rectangle'): 37 | print(txt_name) 38 | x1 = int(i['points'][0][0]) 39 | y1 = int(i['points'][0][1]) 40 | x2 = int(i['points'][1][0]) 41 | y2 = int(i['points'][1][1]) 42 | 43 | bb = (x1,y1,x2,y2) 44 | bbox = convert((img_w,img_h),bb) 45 | txt_file.write(str(name2id[label_name]) + " " + " ".join([str(a) for a in bbox]) + '\n') 46 | 47 | if __name__ == "__main__": 48 | 49 | json_floder_path = r'E:\Computer_vision\object_DNF\datasets\guiqi\yolo5_datasets\labels_json' 50 | json_names = os.listdir(json_floder_path) 51 | for json_name in json_names: 52 | decode_json(json_floder_path,json_name) 53 | -------------------------------------------------------------------------------- /main2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from grabscreen import grab_screen 3 | import cv2 4 | import time 5 | import directkeys 6 | import torch 7 | from torch.autograd import Variable 8 | from directkeys import PressKey, ReleaseKey, key_down, key_up 9 | from getkeys import key_check 10 | from utils.torch_utils import select_device, load_classifier, time_synchronized 11 | from utils.general import ( 12 | check_img_size, non_max_suppression, apply_classifier, scale_coords, 13 | xyxy2xywh, xywh2xyxy, plot_one_box, strip_optimizer, set_logging) 14 | from models.experimental import attempt_load 15 | from direction_move import move 16 | from small_recgonize import current_door, next_door 17 | from skill_recgnize import skill_rec 18 | import random 19 | 20 | def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=False, scaleFill=False, scaleup=True): 21 | # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232 22 | shape = img.shape[:2] # current shape [height, width] 23 | if isinstance(new_shape, int): 24 | new_shape = (new_shape, new_shape) 25 | 26 | # Scale ratio (new / old) 27 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 28 | if not scaleup: # only scale down, do not scale up (for better test mAP) 29 | r = min(r, 1.0) 30 | 31 | # Compute padding 32 | ratio = r, r # width, height ratios 33 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 34 | dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding 35 | if auto: # minimum rectangle 36 | dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding 37 | elif scaleFill: # stretch 38 | dw, dh = 0.0, 0.0 39 | new_unpad = (new_shape[1], new_shape[0]) 40 | ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios 41 | 42 | dw /= 2 # divide padding into 2 sides 43 | dh /= 2 44 | 45 | if shape[::-1] != new_unpad: # resize 46 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 47 | top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) 48 | left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) 49 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border 50 | return img, ratio, (dw, dh) 51 | 52 | # 设置所有用到的参数 53 | weights = r'E:\Computer_vision\yolov5\YOLO5\yolov5-master\DNF_runs\4s\weights\best.pt' #yolo5 模型存放的位置 54 | # weights = r'F:\Computer_vision\yolov5\YOLO5\yolov5-master\runs\exp0\weights\best.pt' 55 | device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") 56 | # device = torch.device("cpu") 57 | model = attempt_load(weights, map_location=device) # load FP32 model 58 | window_size = (0,0,1280,800) # 截屏的位置 59 | img_size = 800 # 输入到yolo5中的模型尺寸 60 | paused = False 61 | half = device.type != 'cpu' 62 | view_img = True # 是否观看目标检测结果 63 | save_txt = False 64 | conf_thres = 0.3 # NMS的置信度过滤 65 | iou_thres = 0.2 # NMS的IOU阈值 66 | classes = None 67 | agnostic_nms = False # 不同类别的NMS时也参数过滤 68 | skill_char = "XYHGXFAXDSWXETX" # 技能按键,使用均匀分布随机抽取 69 | direct_dic = {"UP": 0xC8, "DOWN": 0xD0, "LEFT": 0xCB, "RIGHT": 0xCD} # 上下左右的键码 70 | names = ['hero', 'small_map', "monster", 'money', 'material', 'door', 'BOSS', 'box', 'options'] # 所有类别名 71 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] 72 | if half: 73 | model.half() # to FP16 74 | action_cache = None # 动作标记 75 | press_delay = 0.1 # 按压时间 76 | release_delay = 0.1 # 释放时间 77 | # last_time = time.time() 78 | frame = 0 # 帧 79 | door1_time_start = -20 80 | next_door_time = -20 81 | fs = 1 # 每四帧处理一次 82 | 83 | # 倒计时 84 | for i in list(range(5))[::-1]: 85 | print(i + 1) 86 | time.sleep(1) 87 | 88 | # 捕捉画面+目标检测+玩游戏 89 | while True: 90 | if not paused: 91 | t_start = time.time() 92 | img0 = grab_screen(window_size) 93 | frame += 1 94 | if frame % fs == 0: 95 | # img0 = cv2.imread("datasets/guiqi/yolo5_datasets/imgs/1004_14.jpg") 96 | img0 = cv2.cvtColor(img0, cv2.COLOR_BGRA2BGR) 97 | # Padded resize 98 | img = letterbox(img0, new_shape=img_size)[0] 99 | 100 | # Convert 101 | img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB 102 | img = np.ascontiguousarray(img) 103 | 104 | img = torch.from_numpy(img).to(device).unsqueeze(0) 105 | img = img.half() if half else img.float() # uint8 to fp16/32 106 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 107 | 108 | pred = model(img, augment=False)[0] 109 | 110 | # Apply NMS 111 | det = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) 112 | gn = torch.tensor(img0.shape)[[1, 0, 1, 0]] 113 | det = det[0] 114 | if det is not None and len(det): 115 | # Rescale boxes from img_size to im0 size 116 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() 117 | 118 | # Print results 119 | for c in det[:, -1].unique(): 120 | n = (det[:, -1] == c).sum() # detections per class 121 | 122 | img_object = [] 123 | cls_object = [] 124 | # Write results 125 | hero_conf = 0 126 | hero_index = 0 127 | for idx, (*xyxy, conf, cls) in enumerate(reversed(det)): 128 | # if save_txt: # Write to file 129 | # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh 130 | # with open(txt_path + '.txt', 'a') as f: 131 | # f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format 132 | 133 | xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4))).view(-1).tolist() 134 | cls = int(cls) 135 | img_object.append(xywh) 136 | cls_object.append(names[cls]) 137 | 138 | if names[cls] == "hero" and conf > hero_conf: 139 | hero_conf = conf 140 | hero_index = idx 141 | 142 | 143 | if view_img: # Add bbox to image 144 | label = '%s %.2f' % (names[int(cls)], conf) 145 | plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=2) 146 | 147 | # 游戏 148 | thx = 30 # 捡东西时,x方向的阈值 149 | thy = 30 # 捡东西时,y方向的阈值 150 | attx = 150 # 攻击时,x方向的阈值 151 | atty = 50 # 攻击时,y方向的阈值 152 | 153 | if current_door(img0) == 1 and time.time() - door1_time_start > 10: 154 | door1_time_start = time.time() 155 | # move(direct="RIGHT", action_cache=action_cache, press_delay=press_delay, 156 | # release_delay=release_delay) 157 | # ReleaseKey(direct_dic["RIGHT"]) 158 | # directkeys.key_press("SPACE") 159 | directkeys.key_press("CTRL") 160 | time.sleep(1) 161 | directkeys.key_press("ALT") 162 | time.sleep(0.5) 163 | action_cache = None 164 | # 扫描英雄 165 | if "hero" in cls_object: 166 | # hero_xywh = img_object[cls_object.index("hero")] 167 | hero_xywh = img_object[hero_index] 168 | cv2.circle(img0, (int(hero_xywh[0]), int(hero_xywh[1])), 1, (0,0,255), 10) 169 | # print(hero_index) 170 | # print(cls_object.index("hero")) 171 | else: 172 | continue 173 | # 打怪 174 | if "monster" in cls_object or "BOSS" in cls_object: 175 | min_distance = float("inf") 176 | for idx, (c, box) in enumerate(zip(cls_object, img_object)): 177 | if c == 'monster' or c == "BOSS": 178 | dis = ((hero_xywh[0] - box[0])**2 + (hero_xywh[1] - box[1])**2)**0.5 179 | if dis < min_distance: 180 | monster_box = box 181 | monster_index = idx 182 | min_distance = dis 183 | if abs(hero_xywh[0] - monster_box[0]) < attx and abs(hero_xywh[1] - monster_box[1]) < atty: 184 | if "BOSS" in cls_object: 185 | directkeys.key_press("R") 186 | directkeys.key_press("Q") 187 | # time.sleep(0.5) 188 | skill_name = skill_char[int(np.random.randint(len(skill_char), size=1)[0])] 189 | while True: 190 | if skill_rec(skill_name, img0): 191 | directkeys.key_press(skill_name) 192 | directkeys.key_press(skill_name) 193 | directkeys.key_press(skill_name) 194 | break 195 | else: 196 | skill_name = skill_char[int(np.random.randint(len(skill_char), size=1)[0])] 197 | 198 | else: 199 | skill_name = skill_char[int(np.random.randint(len(skill_char), size=1)[0])] 200 | while True: 201 | if skill_rec(skill_name, img0): 202 | directkeys.key_press(skill_name) 203 | directkeys.key_press(skill_name) 204 | directkeys.key_press(skill_name) 205 | break 206 | else: 207 | skill_name = skill_char[int(np.random.randint(len(skill_char), size=1)[0])] 208 | print("释放技能攻击") 209 | if not action_cache: 210 | pass 211 | elif action_cache not in ["LEFT", "RIGHT", "UP", "DOWN"]: 212 | ReleaseKey(direct_dic[action_cache.strip().split("_")[0]]) 213 | ReleaseKey(direct_dic[action_cache.strip().split("_")[1]]) 214 | action_cache = None 215 | elif action_cache: 216 | ReleaseKey(direct_dic[action_cache]) 217 | action_cache = None 218 | # break 219 | elif monster_box[1] - hero_xywh[1] < 0 and monster_box[0] - hero_xywh[0] > 0: 220 | if abs(monster_box[1] - hero_xywh[1]) < thy: 221 | action_cache = move(direct="RIGHT", material=True, action_cache=action_cache, press_delay=press_delay, 222 | release_delay=release_delay) 223 | # break 224 | elif hero_xywh[1] - monster_box[1] < monster_box[0] - hero_xywh[0]: 225 | action_cache = move(direct="RIGHT_UP", material=True, action_cache=action_cache, 226 | press_delay=press_delay, 227 | release_delay=release_delay) 228 | # break 229 | elif hero_xywh[1] - monster_box[1] >= monster_box[0] - hero_xywh[0]: 230 | action_cache = move(direct="UP", material=True, action_cache=action_cache, 231 | press_delay=press_delay, 232 | release_delay=release_delay) 233 | # break 234 | elif monster_box[1] - hero_xywh[1] < 0 and monster_box[0] - hero_xywh[0] < 0: 235 | if abs(monster_box[1] - hero_xywh[1]) < thy: 236 | action_cache = move(direct="LEFT", material=True, action_cache=action_cache, press_delay=press_delay, 237 | release_delay=release_delay) 238 | # break 239 | elif hero_xywh[1] - monster_box[1] < hero_xywh[0] - monster_box[0]: 240 | action_cache = move(direct="LEFT_UP", material=True, action_cache=action_cache, 241 | press_delay=press_delay, 242 | release_delay=release_delay) 243 | # break 244 | elif hero_xywh[1] - monster_box[1] >= hero_xywh[0] - monster_box[0]: 245 | action_cache = move(direct="UP", material=True, action_cache=action_cache, 246 | press_delay=press_delay, 247 | release_delay=release_delay) 248 | # break 249 | elif monster_box[1] - hero_xywh[1] > 0 and monster_box[0] - hero_xywh[0] < 0: 250 | if abs(monster_box[1] - hero_xywh[1]) < thy: 251 | action_cache = move(direct="LEFT", material=True, action_cache=action_cache, press_delay=press_delay, 252 | release_delay=release_delay) 253 | # break 254 | elif monster_box[1] - hero_xywh[1] < hero_xywh[0] - monster_box[0]: 255 | action_cache = move(direct="LEFT_DOWN", material=True, action_cache=action_cache, 256 | press_delay=press_delay, 257 | release_delay=release_delay) 258 | # break 259 | elif monster_box[1] - hero_xywh[1] >= hero_xywh[0] - monster_box[0]: 260 | action_cache = move(direct="DOWN", material=True, action_cache=action_cache, 261 | press_delay=press_delay, 262 | release_delay=release_delay) 263 | # break 264 | elif monster_box[1] - hero_xywh[1] > 0 and monster_box[0] - hero_xywh[0] > 0: 265 | if abs(monster_box[1] - hero_xywh[1]) < thy: 266 | action_cache = move(direct="RIGHT", material=True, action_cache=action_cache, press_delay=press_delay, 267 | release_delay=release_delay) 268 | # break 269 | elif monster_box[1] - hero_xywh[1] < monster_box[0] - hero_xywh[0]: 270 | action_cache = move(direct="RIGHT_DOWN", material=True, action_cache=action_cache, 271 | press_delay=press_delay, 272 | release_delay=release_delay) 273 | # break 274 | elif monster_box[1] - hero_xywh[1] >= monster_box[0] - hero_xywh[0]: 275 | action_cache = move(direct="DOWN", material=True, action_cache=action_cache, 276 | press_delay=press_delay, 277 | release_delay=release_delay) 278 | # break 279 | 280 | # 移动到下一个地图 281 | if "door" in cls_object and "monster" not in cls_object and "BOSS" not in cls_object and "material" not in cls_object and "money" not in cls_object: 282 | for idx, (c, box) in enumerate(zip(cls_object, img_object)): 283 | if c == 'door': 284 | door_box = box 285 | door_index = idx 286 | if door_box[0] < img0.shape[0] // 2: 287 | action_cache = move(direct="RIGHT", action_cache=action_cache, press_delay=press_delay, 288 | release_delay=release_delay) 289 | # break 290 | elif door_box[1] - hero_xywh[1] < 0 and door_box[0] - hero_xywh[0] > 0: 291 | if abs(door_box[1] - hero_xywh[1]) < thy and abs(door_box[0] - hero_xywh[0]) < thx: 292 | action_cache = None 293 | print("进入下一地图") 294 | # break 295 | elif abs(door_box[1] - hero_xywh[1]) < thy: 296 | action_cache = move(direct="RIGHT", action_cache=action_cache, press_delay=press_delay, 297 | release_delay=release_delay) 298 | # break 299 | elif hero_xywh[1] - door_box[1] < door_box[0] - hero_xywh[0]: 300 | action_cache = move(direct="RIGHT_UP", action_cache=action_cache, press_delay=press_delay, 301 | release_delay=release_delay) 302 | # break 303 | elif hero_xywh[1] - door_box[1] >= door_box[0] - hero_xywh[0]: 304 | action_cache = move(direct="UP", action_cache=action_cache, press_delay=press_delay, 305 | release_delay=release_delay) 306 | # break 307 | elif door_box[1] - hero_xywh[1] < 0 and door_box[0] - hero_xywh[0] < 0: 308 | if abs(door_box[1] - hero_xywh[1]) < thy and abs(door_box[0] - hero_xywh[0]) < thx: 309 | action_cache = None 310 | print("进入下一地图") 311 | # break 312 | elif abs(door_box[1] - hero_xywh[1]) < thy: 313 | action_cache = move(direct="LEFT", action_cache=action_cache, press_delay=press_delay, 314 | release_delay=release_delay) 315 | # break 316 | elif hero_xywh[1] - door_box[1] < hero_xywh[0] - door_box[0]: 317 | action_cache = move(direct="LEFT_UP", action_cache=action_cache, press_delay=press_delay, 318 | release_delay=release_delay) 319 | # break 320 | elif hero_xywh[1] - door_box[1] >= hero_xywh[0] - door_box[0]: 321 | action_cache = move(direct="UP", action_cache=action_cache, press_delay=press_delay, 322 | release_delay=release_delay) 323 | # break 324 | elif door_box[1] - hero_xywh[1] > 0 and door_box[0] - hero_xywh[0] < 0: 325 | if abs(door_box[1] - hero_xywh[1]) < thy and abs(door_box[0] - hero_xywh[0]) < thx: 326 | action_cache = None 327 | print("进入下一地图") 328 | # break 329 | elif abs(door_box[1] - hero_xywh[1]) < thy: 330 | action_cache = move(direct="LEFT", action_cache=action_cache, press_delay=press_delay, 331 | release_delay=release_delay) 332 | # break 333 | elif door_box[1] - hero_xywh[1] < hero_xywh[0] - door_box[0]: 334 | action_cache = move(direct="LEFT_DOWN", action_cache=action_cache, press_delay=press_delay, 335 | release_delay=release_delay) 336 | # break 337 | elif door_box[1] - hero_xywh[1] >= hero_xywh[0] - door_box[0]: 338 | action_cache = move(direct="DOWN", action_cache=action_cache, press_delay=press_delay, 339 | release_delay=release_delay) 340 | # break 341 | elif door_box[1] - hero_xywh[1] > 0 and door_box[0] - hero_xywh[0] > 0: 342 | if abs(door_box[1] - hero_xywh[1]) < thy and abs(door_box[0] - hero_xywh[0]) < thx: 343 | action_cache = None 344 | print("进入下一地图") 345 | # break 346 | elif abs(door_box[1] - hero_xywh[1]) < thy: 347 | action_cache = move(direct="RIGHT", action_cache=action_cache, press_delay=press_delay, 348 | release_delay=release_delay) 349 | # break 350 | elif door_box[1] - hero_xywh[1] < door_box[0] - hero_xywh[0]: 351 | action_cache = move(direct="RIGHT_DOWN", action_cache=action_cache, press_delay=press_delay, 352 | release_delay=release_delay) 353 | # break 354 | elif door_box[1] - hero_xywh[1] >= door_box[0] - hero_xywh[0]: 355 | action_cache = move(direct="DOWN", action_cache=action_cache, press_delay=press_delay, 356 | release_delay=release_delay) 357 | # break 358 | if "money" not in cls_object and "material" not in cls_object and "monster" not in cls_object \ 359 | and "BOSS" not in cls_object and "door" not in cls_object and 'box' not in cls_object \ 360 | and 'options' not in cls_object: 361 | # if next_door(img0) == 0 and abs(time.time()) - next_door_time > 10: 362 | # next_door_time = time.time() 363 | # action_cache = move(direct="LEFT", action_cache=action_cache, press_delay=press_delay, 364 | # release_delay=release_delay) 365 | # # time.sleep(3) 366 | # else: 367 | # action_cache = move(direct="RIGHT", action_cache=action_cache, press_delay=press_delay, 368 | # release_delay=release_delay) 369 | 370 | action_cache = move(direct="RIGHT", action_cache=action_cache, press_delay=press_delay, 371 | release_delay=release_delay) 372 | # break 373 | 374 | # 捡材料 375 | if "monster" not in cls_object and "hero" in cls_object and ("material" in cls_object or "money" in cls_object): 376 | min_distance = float("inf") 377 | hero_xywh[1] = hero_xywh[1] + (hero_xywh[3] // 2) * 0.7 378 | thx = thx / 2 379 | thy = thy / 2 380 | for idx, (c, box) in enumerate(zip(cls_object, img_object)): 381 | if c == 'material' or c == "money": 382 | dis = ((hero_xywh[0] - box[0]) ** 2 + (hero_xywh[1] - box[1]) ** 2) ** 0.5 383 | if dis < min_distance: 384 | material_box = box 385 | material_index = idx 386 | min_distance = dis 387 | if abs(material_box[1] - hero_xywh[1]) < thy and abs(material_box[0] - hero_xywh[0]) < thx: 388 | if not action_cache: 389 | pass 390 | elif action_cache not in ["LEFT", "RIGHT", "UP", "DOWN"]: 391 | ReleaseKey(direct_dic[action_cache.strip().split("_")[0]]) 392 | ReleaseKey(direct_dic[action_cache.strip().split("_")[1]]) 393 | action_cache = None 394 | else: 395 | ReleaseKey(direct_dic[action_cache]) 396 | action_cache = None 397 | time.sleep(1) 398 | directkeys.key_press("X") 399 | print("捡东西") 400 | # break 401 | 402 | elif material_box[1] - hero_xywh[1] < 0 and material_box[0] - hero_xywh[0] > 0: 403 | 404 | if abs(material_box[1] - hero_xywh[1]) < thy: 405 | action_cache = move(direct="RIGHT", material=True, action_cache=action_cache, press_delay=press_delay, 406 | release_delay=release_delay) 407 | # break 408 | elif hero_xywh[1] - material_box[1] < material_box[0] - hero_xywh[0]: 409 | action_cache = move(direct="RIGHT_UP", material=True, action_cache=action_cache, press_delay=press_delay, 410 | release_delay=release_delay) 411 | # break 412 | elif hero_xywh[1] - material_box[1] >= material_box[0] - hero_xywh[0]: 413 | action_cache = move(direct="UP", material=True, action_cache=action_cache, press_delay=press_delay, 414 | release_delay=release_delay) 415 | # break 416 | elif material_box[1] - hero_xywh[1] < 0 and material_box[0] - hero_xywh[0] < 0: 417 | if abs(material_box[1] - hero_xywh[1]) < thy: 418 | action_cache = move(direct="LEFT", material=True, action_cache=action_cache, press_delay=press_delay, 419 | release_delay=release_delay) 420 | # break 421 | elif hero_xywh[1] - material_box[1] < hero_xywh[0] - material_box[0]: 422 | action_cache = move(direct="LEFT_UP", material=True, action_cache=action_cache, press_delay=press_delay, 423 | release_delay=release_delay) 424 | # break 425 | elif hero_xywh[1] - material_box[1] >= hero_xywh[0] - material_box[0]: 426 | action_cache = move(direct="UP", material=True, action_cache=action_cache, press_delay=press_delay, 427 | release_delay=release_delay) 428 | # break 429 | elif material_box[1] - hero_xywh[1] > 0 and material_box[0] - hero_xywh[0] < 0: 430 | if abs(material_box[1] - hero_xywh[1]) < thy: 431 | action_cache = move(direct="LEFT", material=True, action_cache=action_cache, press_delay=press_delay, 432 | release_delay=release_delay) 433 | # break 434 | elif material_box[1] - hero_xywh[1] < hero_xywh[0] - material_box[0]: 435 | action_cache = move(direct="LEFT_DOWN", material=True, action_cache=action_cache, press_delay=press_delay, 436 | release_delay=release_delay) 437 | # break 438 | elif material_box[1] - hero_xywh[1] >= hero_xywh[0] - material_box[0]: 439 | action_cache = move(direct="DOWN", material=True, action_cache=action_cache, press_delay=press_delay, 440 | release_delay=release_delay) 441 | # break 442 | elif material_box[1] - hero_xywh[1] > 0 and material_box[0] - hero_xywh[0] > 0: 443 | if abs(material_box[1] - hero_xywh[1]) < thy: 444 | action_cache = move(direct="RIGHT", material=True, action_cache=action_cache, press_delay=press_delay, 445 | release_delay=release_delay) 446 | # break 447 | elif material_box[1] - hero_xywh[1] < material_box[0] - hero_xywh[0]: 448 | action_cache = move(direct="RIGHT_DOWN", material=True, action_cache=action_cache, press_delay=press_delay, 449 | release_delay=release_delay) 450 | # break 451 | elif material_box[1] - hero_xywh[1] >= material_box[0] - hero_xywh[0]: 452 | action_cache = move(direct="DOWN", material=True, action_cache=action_cache, press_delay=press_delay, 453 | release_delay=release_delay) 454 | # break 455 | # 开箱子 456 | if "box" in cls_object: 457 | box_num = 0 458 | for b in cls_object: 459 | if b == "box": 460 | box_num += 1 461 | if box_num >= 4: 462 | directkeys.key_press("ESC") 463 | print("打开箱子ESC") 464 | # break62 465 | 466 | # 重新开始 467 | time_option = -20 468 | if "options" in cls_object: 469 | if not action_cache: 470 | pass 471 | elif action_cache not in ["LEFT", "RIGHT", "UP", "DOWN"]: 472 | ReleaseKey(direct_dic[action_cache.strip().split("_")[0]]) 473 | ReleaseKey(direct_dic[action_cache.strip().split("_")[1]]) 474 | action_cache = None 475 | else: 476 | ReleaseKey(direct_dic[action_cache]) 477 | action_cache = None 478 | if time.time() - time_option > 10: 479 | directkeys.key_press("NUM0") 480 | print("移动物品到脚下") 481 | directkeys.key_press("X") 482 | time_option = time.time() 483 | directkeys.key_press("F2") 484 | print("重新开始F2") 485 | # break 486 | t_end = time.time() 487 | print("一帧游戏操作所用时间:", (t_end - t_start)/fs) 488 | 489 | img0 = cv2.resize(img0, (600, 375)) 490 | # Stream results 491 | if view_img: 492 | cv2.imshow('window', img0) 493 | # cv2.waitKey(0) 494 | # cv2.destroyAllWindows() 495 | if cv2.waitKey(5) & 0xFF == ord('q'): 496 | raise StopIteration 497 | 498 | 499 | # Setting pause and unpause 500 | keys = key_check() 501 | if 'P' in keys: 502 | if not action_cache: 503 | pass 504 | elif action_cache not in ["LEFT", "RIGHT", "UP", "DOWN"]: 505 | ReleaseKey(direct_dic[action_cache.strip().split("_")[0]]) 506 | ReleaseKey(direct_dic[action_cache.strip().split("_")[1]]) 507 | action_cache = None 508 | else: 509 | ReleaseKey(direct_dic[action_cache]) 510 | action_cache = None 511 | if paused: 512 | paused = False 513 | time.sleep(1) 514 | else: 515 | paused = True 516 | time.sleep(1) 517 | 518 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/models/__init__.py -------------------------------------------------------------------------------- /models/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/models/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /models/__pycache__/common.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/models/__pycache__/common.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/common.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/models/__pycache__/common.cpython-37.pyc -------------------------------------------------------------------------------- /models/__pycache__/common.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/models/__pycache__/common.cpython-38.pyc -------------------------------------------------------------------------------- /models/__pycache__/experimental.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/models/__pycache__/experimental.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/experimental.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/models/__pycache__/experimental.cpython-37.pyc -------------------------------------------------------------------------------- /models/__pycache__/experimental.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/models/__pycache__/experimental.cpython-38.pyc -------------------------------------------------------------------------------- /models/__pycache__/yolo.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/models/__pycache__/yolo.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/yolo.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/models/__pycache__/yolo.cpython-37.pyc -------------------------------------------------------------------------------- /models/__pycache__/yolo.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/models/__pycache__/yolo.cpython-38.pyc -------------------------------------------------------------------------------- /models/common.py: -------------------------------------------------------------------------------- 1 | # This file contains modules common to various models 2 | import math 3 | 4 | import torch 5 | import torch.nn as nn 6 | from torch.nn import functional as F 7 | from utils.general import non_max_suppression 8 | 9 | 10 | def autopad(k, p=None): # kernel, padding 11 | # Pad to 'same' 12 | if p is None: 13 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 14 | return p 15 | 16 | 17 | def DWConv(c1, c2, k=1, s=1, act=True): 18 | # Depthwise convolution 19 | return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) 20 | 21 | 22 | class Conv(nn.Module): 23 | # Standard convolution 24 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 25 | super(Conv, self).__init__() 26 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) 27 | self.bn = nn.BatchNorm2d(c2) 28 | self.act = nn.Hardswish() if act else nn.Identity() 29 | 30 | def forward(self, x): 31 | return self.act(self.bn(self.conv(x))) 32 | 33 | def fuseforward(self, x): 34 | return self.act(self.conv(x)) 35 | 36 | 37 | class Bottleneck(nn.Module): 38 | # Standard bottleneck 39 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion 40 | super(Bottleneck, self).__init__() 41 | c_ = int(c2 * e) # hidden channels 42 | self.cv1 = Conv(c1, c_, 1, 1) 43 | self.cv2 = Conv(c_, c2, 3, 1, g=g) 44 | self.add = shortcut and c1 == c2 45 | 46 | def forward(self, x): 47 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 48 | 49 | 50 | class BottleneckCSP(nn.Module): 51 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks 52 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 53 | super(BottleneckCSP, self).__init__() 54 | c_ = int(c2 * e) # hidden channels 55 | self.cv1 = Conv(c1, c_, 1, 1) 56 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 57 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 58 | self.cv4 = Conv(2 * c_, c2, 1, 1) 59 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 60 | self.act = nn.LeakyReLU(0.1, inplace=True) 61 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 62 | 63 | def forward(self, x): 64 | y1 = self.cv3(self.m(self.cv1(x))) 65 | y2 = self.cv2(x) 66 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 67 | 68 | 69 | class SPP(nn.Module): 70 | # Spatial pyramid pooling layer used in YOLOv3-SPP 71 | def __init__(self, c1, c2, k=(5, 9, 13)): 72 | super(SPP, self).__init__() 73 | c_ = c1 // 2 # hidden channels 74 | self.cv1 = Conv(c1, c_, 1, 1) 75 | self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) 76 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) 77 | 78 | def forward(self, x): 79 | x = self.cv1(x) 80 | return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) 81 | 82 | 83 | class Focus(nn.Module): 84 | # Focus wh information into c-space 85 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 86 | super(Focus, self).__init__() 87 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act) 88 | 89 | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) 90 | return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) 91 | 92 | 93 | class Concat(nn.Module): 94 | # Concatenate a list of tensors along dimension 95 | def __init__(self, dimension=1): 96 | super(Concat, self).__init__() 97 | self.d = dimension 98 | 99 | def forward(self, x): 100 | return torch.cat(x, self.d) 101 | 102 | 103 | class NMS(nn.Module): 104 | # Non-Maximum Suppression (NMS) module 105 | conf = 0.3 # confidence threshold 106 | iou = 0.6 # IoU threshold 107 | classes = None # (optional list) filter by class 108 | 109 | def __init__(self, dimension=1): 110 | super(NMS, self).__init__() 111 | 112 | def forward(self, x): 113 | return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) 114 | 115 | 116 | class Flatten(nn.Module): 117 | # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions 118 | @staticmethod 119 | def forward(x): 120 | return x.view(x.size(0), -1) 121 | 122 | 123 | class Classify(nn.Module): 124 | # Classification head, i.e. x(b,c1,20,20) to x(b,c2) 125 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups 126 | super(Classify, self).__init__() 127 | self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) 128 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) # to x(b,c2,1,1) 129 | self.flat = Flatten() 130 | 131 | def forward(self, x): 132 | z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list 133 | return self.flat(self.conv(z)) # flatten to x(b,c2) 134 | 135 | 136 | # =================== 137 | # RGA Module 138 | # =================== 139 | 140 | class RGA_Module(nn.Module): 141 | def __init__(self, in_channel, in_spatial, use_spatial=True, use_channel=True, \ 142 | cha_ratio=8, spa_ratio=8, down_ratio=8): 143 | super(RGA_Module, self).__init__() 144 | 145 | self.in_channel = in_channel 146 | self.in_spatial = in_spatial 147 | 148 | self.use_spatial = use_spatial 149 | self.use_channel = use_channel 150 | 151 | # print('Use_Spatial_Att: {};\tUse_Channel_Att: {}.'.format(self.use_spatial, self.use_channel)) 152 | 153 | self.inter_channel = in_channel // cha_ratio 154 | self.inter_spatial = in_spatial // spa_ratio 155 | 156 | # Embedding functions for original features 157 | if self.use_spatial: 158 | self.gx_spatial = nn.Sequential( 159 | nn.Conv2d(in_channels=self.in_channel, out_channels=self.inter_channel, 160 | kernel_size=1, stride=1, padding=0, bias=False), 161 | nn.BatchNorm2d(self.inter_channel), 162 | nn.ReLU() 163 | ) 164 | if self.use_channel: 165 | self.gx_channel = nn.Sequential( 166 | nn.Conv2d(in_channels=self.in_spatial, out_channels=self.inter_spatial, 167 | kernel_size=1, stride=1, padding=0, bias=False), 168 | nn.BatchNorm2d(self.inter_spatial), 169 | nn.ReLU() 170 | ) 171 | 172 | # Embedding functions for relation features 173 | if self.use_spatial: 174 | self.gg_spatial = nn.Sequential( 175 | nn.Conv2d(in_channels=self.in_spatial * 2, out_channels=self.inter_spatial, 176 | kernel_size=1, stride=1, padding=0, bias=False), 177 | nn.BatchNorm2d(self.inter_spatial), 178 | nn.ReLU() 179 | ) 180 | if self.use_channel: 181 | self.gg_channel = nn.Sequential( 182 | nn.Conv2d(in_channels=self.in_channel * 2, out_channels=self.inter_channel, 183 | kernel_size=1, stride=1, padding=0, bias=False), 184 | nn.BatchNorm2d(self.inter_channel), 185 | nn.ReLU() 186 | ) 187 | 188 | # Networks for learning attention weights 189 | if self.use_spatial: 190 | num_channel_s = 1 + self.inter_spatial 191 | self.W_spatial = nn.Sequential( 192 | nn.Conv2d(in_channels=num_channel_s, out_channels=num_channel_s // down_ratio, 193 | kernel_size=1, stride=1, padding=0, bias=False), 194 | nn.BatchNorm2d(num_channel_s // down_ratio), 195 | nn.ReLU(), 196 | nn.Conv2d(in_channels=num_channel_s // down_ratio, out_channels=1, 197 | kernel_size=1, stride=1, padding=0, bias=False), 198 | nn.BatchNorm2d(1) 199 | ) 200 | if self.use_channel: 201 | num_channel_c = 1 + self.inter_channel 202 | self.W_channel = nn.Sequential( 203 | nn.Conv2d(in_channels=num_channel_c, out_channels=num_channel_c // down_ratio, 204 | kernel_size=1, stride=1, padding=0, bias=False), 205 | nn.BatchNorm2d(num_channel_c // down_ratio), 206 | nn.ReLU(), 207 | nn.Conv2d(in_channels=num_channel_c // down_ratio, out_channels=1, 208 | kernel_size=1, stride=1, padding=0, bias=False), 209 | nn.BatchNorm2d(1) 210 | ) 211 | 212 | # Embedding functions for modeling relations 213 | if self.use_spatial: 214 | self.theta_spatial = nn.Sequential( 215 | nn.Conv2d(in_channels=self.in_channel, out_channels=self.inter_channel, 216 | kernel_size=1, stride=1, padding=0, bias=False), 217 | nn.BatchNorm2d(self.inter_channel), 218 | nn.ReLU() 219 | ) 220 | self.phi_spatial = nn.Sequential( 221 | nn.Conv2d(in_channels=self.in_channel, out_channels=self.inter_channel, 222 | kernel_size=1, stride=1, padding=0, bias=False), 223 | nn.BatchNorm2d(self.inter_channel), 224 | nn.ReLU() 225 | ) 226 | if self.use_channel: 227 | self.theta_channel = nn.Sequential( 228 | nn.Conv2d(in_channels=self.in_spatial, out_channels=self.inter_spatial, 229 | kernel_size=1, stride=1, padding=0, bias=False), 230 | nn.BatchNorm2d(self.inter_spatial), 231 | nn.ReLU() 232 | ) 233 | self.phi_channel = nn.Sequential( 234 | nn.Conv2d(in_channels=self.in_spatial, out_channels=self.inter_spatial, 235 | kernel_size=1, stride=1, padding=0, bias=False), 236 | nn.BatchNorm2d(self.inter_spatial), 237 | nn.ReLU() 238 | ) 239 | 240 | def forward(self, x): 241 | b, c, h, w = x.size() 242 | 243 | if self.use_spatial: 244 | # spatial attention 245 | theta_xs = self.theta_spatial(x) # 1 20 32 32 246 | phi_xs = self.phi_spatial(x) # 1 20 32 32 247 | theta_xs = theta_xs.view(b, self.inter_channel, -1) # 1 20 32*32 248 | theta_xs = theta_xs.permute(0, 2, 1) # 1 32*32 20 249 | phi_xs = phi_xs.view(b, self.inter_channel, -1) # 1 20 32*32 250 | Gs = torch.matmul(theta_xs, phi_xs) # 1 1024 1024 251 | Gs_in = Gs.permute(0, 2, 1).view(b, h * w, h, w) # 1 1024 32 32 调换下顺序 252 | Gs_out = Gs.view(b, h * w, h, w) # 1 1024 32 32 253 | Gs_joint = torch.cat((Gs_in, Gs_out), 1) # 8 4096 64 32 254 | Gs_joint = self.gg_spatial(Gs_joint) # 8 256 64 32 255 | 256 | g_xs = self.gx_spatial(x) # 8 32 64 32 257 | g_xs = torch.mean(g_xs, dim=1, keepdim=True) # 8 1 64 32 258 | ys = torch.cat((g_xs, Gs_joint), 1) # 8 257 64 32 259 | 260 | W_ys = self.W_spatial(ys) # 8 1 64 32 261 | if not self.use_channel: 262 | out = F.sigmoid(W_ys.expand_as(x)) * x # 位置特征,不同特征图,位置相同的 263 | return out 264 | else: 265 | x = F.sigmoid(W_ys.expand_as(x)) * x 266 | if self.use_channel: 267 | # channel attention 268 | xc = x.view(b, c, -1).permute(0, 2, 1).unsqueeze(-1) # 8 2048 256 1 269 | theta_xc = self.theta_channel(xc).squeeze(-1).permute(0, 2, 1) # 8 256 256 270 | phi_xc = self.phi_channel(xc).squeeze(-1) # 8 256 256 271 | Gc = torch.matmul(theta_xc, phi_xc) # 8 256 256 272 | Gc_in = Gc.permute(0, 2, 1).unsqueeze(-1) # 8 256 256 1 273 | Gc_out = Gc.unsqueeze(-1) # 8 256 256 1 274 | Gc_joint = torch.cat((Gc_in, Gc_out), 1) # 8 512 256 1 275 | Gc_joint = self.gg_channel(Gc_joint) # 8 32 256 1 276 | 277 | g_xc = self.gx_channel(xc) # 8 256 256 1 278 | g_xc = torch.mean(g_xc, dim=1, keepdim=True) # 8 1 256 1 279 | yc = torch.cat((g_xc, Gc_joint), 1) # 8 33 256 1 280 | W_yc = self.W_channel(yc).transpose(1, 2) # 8 256 1 1 得到权重分配 281 | out = F.sigmoid(W_yc) * x 282 | 283 | return out -------------------------------------------------------------------------------- /models/experimental.py: -------------------------------------------------------------------------------- 1 | # This file contains experimental modules 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from models.common import Conv, DWConv 8 | from utils.google_utils import attempt_download 9 | 10 | 11 | class CrossConv(nn.Module): 12 | # Cross Convolution Downsample 13 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 14 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 15 | super(CrossConv, self).__init__() 16 | c_ = int(c2 * e) # hidden channels 17 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 18 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 19 | self.add = shortcut and c1 == c2 20 | 21 | def forward(self, x): 22 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 23 | 24 | 25 | class C3(nn.Module): 26 | # Cross Convolution CSP 27 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 28 | super(C3, self).__init__() 29 | c_ = int(c2 * e) # hidden channels 30 | self.cv1 = Conv(c1, c_, 1, 1) 31 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 32 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 33 | self.cv4 = Conv(2 * c_, c2, 1, 1) 34 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 35 | self.act = nn.LeakyReLU(0.1, inplace=True) 36 | self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) 37 | 38 | def forward(self, x): 39 | y1 = self.cv3(self.m(self.cv1(x))) 40 | y2 = self.cv2(x) 41 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 42 | 43 | 44 | class Sum(nn.Module): 45 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 46 | def __init__(self, n, weight=False): # n: number of inputs 47 | super(Sum, self).__init__() 48 | self.weight = weight # apply weights boolean 49 | self.iter = range(n - 1) # iter object 50 | if weight: 51 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights 52 | 53 | def forward(self, x): 54 | y = x[0] # no weight 55 | if self.weight: 56 | w = torch.sigmoid(self.w) * 2 57 | for i in self.iter: 58 | y = y + x[i + 1] * w[i] 59 | else: 60 | for i in self.iter: 61 | y = y + x[i + 1] 62 | return y 63 | 64 | 65 | class GhostConv(nn.Module): 66 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 67 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups 68 | super(GhostConv, self).__init__() 69 | c_ = c2 // 2 # hidden channels 70 | self.cv1 = Conv(c1, c_, k, s, g, act) 71 | self.cv2 = Conv(c_, c_, 5, 1, c_, act) 72 | 73 | def forward(self, x): 74 | y = self.cv1(x) 75 | return torch.cat([y, self.cv2(y)], 1) 76 | 77 | 78 | class GhostBottleneck(nn.Module): 79 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 80 | def __init__(self, c1, c2, k, s): 81 | super(GhostBottleneck, self).__init__() 82 | c_ = c2 // 2 83 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw 84 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 85 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear 86 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), 87 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() 88 | 89 | def forward(self, x): 90 | return self.conv(x) + self.shortcut(x) 91 | 92 | 93 | class MixConv2d(nn.Module): 94 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 95 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 96 | super(MixConv2d, self).__init__() 97 | groups = len(k) 98 | if equal_ch: # equal c_ per group 99 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices 100 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 101 | else: # equal weight.numel() per group 102 | b = [c2] + [0] * groups 103 | a = np.eye(groups + 1, groups, k=-1) 104 | a -= np.roll(a, 1, axis=1) 105 | a *= np.array(k) ** 2 106 | a[0] = 1 107 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 108 | 109 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) 110 | self.bn = nn.BatchNorm2d(c2) 111 | self.act = nn.LeakyReLU(0.1, inplace=True) 112 | 113 | def forward(self, x): 114 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 115 | 116 | 117 | class Ensemble(nn.ModuleList): 118 | # Ensemble of models 119 | def __init__(self): 120 | super(Ensemble, self).__init__() 121 | 122 | def forward(self, x, augment=False): 123 | y = [] 124 | for module in self: 125 | y.append(module(x, augment)[0]) 126 | # y = torch.stack(y).max(0)[0] # max ensemble 127 | y = torch.cat(y, 1) # nms ensemble 128 | # y = torch.stack(y).mean(0) # mean ensemble 129 | return y, None # inference, train output 130 | 131 | 132 | def attempt_load(weights, map_location=None): 133 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 134 | model = Ensemble() 135 | for w in weights if isinstance(weights, list) else [weights]: 136 | attempt_download(w) 137 | model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model 138 | 139 | if len(model) == 1: 140 | return model[-1] # return model 141 | else: 142 | print('Ensemble created with %s\n' % weights) 143 | for k in ['names', 'stride']: 144 | setattr(model, k, getattr(model[-1], k)) 145 | return model # return ensemble 146 | -------------------------------------------------------------------------------- /models/export.py: -------------------------------------------------------------------------------- 1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats 2 | 3 | Usage: 4 | $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1 5 | """ 6 | #首先pip install onnx 7 | import argparse 8 | import sys 9 | import time 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | sys.path.append('../') 13 | import torch 14 | import torch.nn as nn 15 | 16 | import models 17 | from models.experimental import attempt_load 18 | from utils.activations import Hardswish 19 | from utils.general import set_logging, check_img_size 20 | 21 | if __name__ == '__main__': 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/ 24 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width 25 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 26 | opt = parser.parse_args() 27 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand 28 | print(opt) 29 | set_logging() 30 | t = time.time() 31 | 32 | # Load PyTorch model 33 | model = attempt_load(opt.weights, map_location=torch.device('cpu')) # load FP32 model 34 | labels = model.names 35 | 36 | # Checks 37 | gs = int(max(model.stride)) # grid size (max stride) 38 | opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples 39 | 40 | # Input 41 | img = torch.zeros(opt.batch_size, 3, *opt.img_size) # image size(1,3,320,192) iDetection 42 | 43 | # Update model 44 | for k, m in model.named_modules(): 45 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 46 | if isinstance(m, models.common.Conv) and isinstance(m.act, nn.Hardswish): 47 | m.act = Hardswish() # assign activation 48 | # if isinstance(m, models.yolo.Detect): 49 | # m.forward = m.forward_export # assign forward (optional) 50 | model.model[-1].export = True # set Detect() layer export=True 51 | y = model(img) # dry run 52 | 53 | # TorchScript export 54 | try: 55 | print('\nStarting TorchScript export with torch %s...' % torch.__version__) 56 | f = opt.weights.replace('.pt', '.torchscript.pt') # filename 57 | ts = torch.jit.trace(model, img) 58 | ts.save(f) 59 | print('TorchScript export success, saved as %s' % f) 60 | except Exception as e: 61 | print('TorchScript export failure: %s' % e) 62 | 63 | # ONNX export 64 | try: 65 | import onnx 66 | 67 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__) 68 | f = opt.weights.replace('.pt', '.onnx') # filename 69 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], 70 | output_names=['classes', 'boxes'] if y is None else ['output']) 71 | 72 | # Checks 73 | onnx_model = onnx.load(f) # load onnx model 74 | onnx.checker.check_model(onnx_model) # check onnx model 75 | # print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model 76 | print('ONNX export success, saved as %s' % f) 77 | except Exception as e: 78 | print('ONNX export failure: %s' % e) 79 | 80 | # CoreML export 81 | try: 82 | import coremltools as ct 83 | 84 | print('\nStarting CoreML export with coremltools %s...' % ct.__version__) 85 | # convert model from torchscript and apply pixel scaling as per detect.py 86 | model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) 87 | f = opt.weights.replace('.pt', '.mlmodel') # filename 88 | model.save(f) 89 | print('CoreML export success, saved as %s' % f) 90 | except Exception as e: 91 | print('CoreML export failure: %s' % e) 92 | 93 | # Finish 94 | print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t)) 95 | -------------------------------------------------------------------------------- /models/hub/yolov3-spp.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3-SPP head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, SPP, [512, [5, 9, 13]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /models/hub/yolov5-fpn.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, Bottleneck, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 6, BottleneckCSP, [1024]], # 9 25 | ] 26 | 27 | # YOLOv5 FPN head 28 | head: 29 | [[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large) 30 | 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium) 35 | 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 38 | [-1, 1, Conv, [256, 1, 1]], 39 | [-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small) 40 | 41 | [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 42 | ] 43 | -------------------------------------------------------------------------------- /models/hub/yolov5-panet.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [116,90, 156,198, 373,326] # P5/32 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [10,13, 16,30, 33,23] # P3/8 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 PANet head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P5, P4, P3) 48 | ] 49 | -------------------------------------------------------------------------------- /models/tmpvzcovfjn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/models/tmpvzcovfjn -------------------------------------------------------------------------------- /models/yolo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import math 4 | import sys 5 | from copy import deepcopy 6 | from pathlib import Path 7 | 8 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 9 | logger = logging.getLogger(__name__) 10 | 11 | import torch 12 | import torch.nn as nn 13 | 14 | from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, Concat, NMS, RGA_Module 15 | from models.experimental import MixConv2d, CrossConv, C3 16 | from utils.general import check_anchor_order, make_divisible, check_file, set_logging 17 | from utils.torch_utils import ( 18 | time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, select_device) 19 | 20 | 21 | class Detect(nn.Module): 22 | stride = None # strides computed during build 23 | export = False # onnx export 24 | 25 | def __init__(self, nc=80, anchors=(), ch=()): # detection layer 26 | super(Detect, self).__init__() 27 | self.nc = nc # number of classes 28 | self.no = nc + 5 # number of outputs per anchor 29 | self.nl = len(anchors) # number of detection layers 30 | self.na = len(anchors[0]) // 2 # number of anchors 31 | self.grid = [torch.zeros(1)] * self.nl # init grid 32 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 33 | self.register_buffer('anchors', a) # shape(nl,na,2) 34 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 35 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 36 | 37 | def forward(self, x): 38 | # x = x.copy() # for profiling 39 | z = [] # inference output 40 | self.training |= self.export 41 | for i in range(self.nl): 42 | x[i] = self.m[i](x[i]) # conv 43 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 44 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 45 | 46 | if not self.training: # inference 47 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 48 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 49 | 50 | y = x[i].sigmoid() 51 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy 52 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 53 | z.append(y.view(bs, -1, self.no)) 54 | 55 | return x if self.training else (torch.cat(z, 1), x) 56 | 57 | @staticmethod 58 | def _make_grid(nx=20, ny=20): 59 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 60 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 61 | 62 | 63 | class Model(nn.Module): 64 | def __init__(self, img_size, cfg='yolov5s.yaml', ch=3, nc=None): # model, input channels, number of classes 65 | super(Model, self).__init__() 66 | if isinstance(cfg, dict): 67 | self.yaml = cfg # model dict 68 | else: # is *.yaml 69 | import yaml # for torch hub 70 | self.yaml_file = Path(cfg).name 71 | with open(cfg) as f: 72 | self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict 73 | 74 | # Define model 75 | if nc and nc != self.yaml['nc']: 76 | print('Overriding model.yaml nc=%g with nc=%g' % (self.yaml['nc'], nc)) 77 | self.yaml['nc'] = nc # override yaml value 78 | self.model, self.save = parse_model(deepcopy(self.yaml), img_size, ch=[ch]) # model, savelist, ch_out 79 | # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) 80 | 81 | # Build strides, anchors 82 | m = self.model[-1] # Detect() 83 | if isinstance(m, Detect): 84 | s = img_size[0] # 2x min stride 85 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward 86 | m.anchors /= m.stride.view(-1, 1, 1) 87 | check_anchor_order(m) 88 | self.stride = m.stride 89 | self._initialize_biases() # only run once 90 | # print('Strides: %s' % m.stride.tolist()) 91 | 92 | # Init weights, biases 93 | initialize_weights(self) 94 | self.info() 95 | print('') 96 | 97 | def forward(self, x, augment=False, profile=False): 98 | if augment: 99 | img_size = x.shape[-2:] # height, width 100 | s = [1, 0.83, 0.67] # scales 101 | f = [None, 3, None] # flips (2-ud, 3-lr) 102 | y = [] # outputs 103 | for si, fi in zip(s, f): 104 | xi = scale_img(x.flip(fi) if fi else x, si) 105 | yi = self.forward_once(xi)[0] # forward 106 | # cv2.imwrite('img%g.jpg' % s, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1]) # save 107 | yi[..., :4] /= si # de-scale 108 | if fi == 2: 109 | yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud 110 | elif fi == 3: 111 | yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr 112 | y.append(yi) 113 | return torch.cat(y, 1), None # augmented inference, train 114 | else: 115 | return self.forward_once(x, profile) # single-scale inference, train 116 | 117 | def forward_once(self, x, profile=False): 118 | y, dt = [], [] # outputs 119 | i = 1 120 | for m in self.model: 121 | if m.f != -1: # if not from previous layer 122 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 123 | 124 | if profile: 125 | try: 126 | import thop 127 | o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # FLOPS 128 | except: 129 | o = 0 130 | t = time_synchronized() 131 | for _ in range(10): 132 | _ = m(x) 133 | dt.append((time_synchronized() - t) * 100) 134 | print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type)) 135 | x = m(x) # run 136 | #print('层数:',i,'特征图大小:',x.shape) 137 | i+=1 138 | y.append(x if m.i in self.save else None) # save output 139 | 140 | if profile: 141 | print('%.1fms total' % sum(dt)) 142 | return x 143 | 144 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 145 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 146 | m = self.model[-1] # Detect() module 147 | for mi, s in zip(m.m, m.stride): # from 148 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 149 | b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 150 | b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 151 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 152 | 153 | def _print_biases(self): 154 | m = self.model[-1] # Detect() module 155 | for mi in m.m: # from 156 | b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) 157 | print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) 158 | 159 | # def _print_weights(self): 160 | # for m in self.model.modules(): 161 | # if type(m) is Bottleneck: 162 | # print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights 163 | 164 | def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers 165 | print('Fusing layers... ') 166 | for m in self.model.modules(): 167 | if type(m) is Conv and hasattr(m, 'bn'): 168 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatability 169 | m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv 170 | delattr(m, 'bn') # remove batchnorm 171 | m.forward = m.fuseforward # update forward 172 | self.info() 173 | return self 174 | 175 | def add_nms(self): # fuse model Conv2d() + BatchNorm2d() layers 176 | if type(self.model[-1]) is not NMS: # if missing NMS 177 | print('Adding NMS module... ') 178 | m = NMS() # module 179 | m.f = -1 # from 180 | m.i = self.model[-1].i + 1 # index 181 | self.model.add_module(name='%s' % m.i, module=m) # add 182 | return self 183 | 184 | def info(self, verbose=False): # print model information 185 | model_info(self, verbose) 186 | 187 | 188 | def parse_model(d, img_size, ch): # model_dict, input_channels(3) 189 | logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) 190 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] 191 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 192 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 193 | 194 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 195 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args 196 | m = eval(m) if isinstance(m, str) else m # eval strings 197 | for j, a in enumerate(args): 198 | try: 199 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 200 | except: 201 | pass 202 | 203 | n = max(round(n * gd), 1) if n > 1 else n # depth gain 204 | if m in [Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]: 205 | c1, c2 = ch[f], args[0] 206 | 207 | # Normal 208 | # if i > 0 and args[0] != no: # channel expansion factor 209 | # ex = 1.75 # exponential (default 2.0) 210 | # e = math.log(c2 / ch[1]) / math.log(2) 211 | # c2 = int(ch[1] * ex ** e) 212 | # if m != Focus: 213 | 214 | c2 = make_divisible(c2 * gw, 8) if c2 != no else c2 215 | 216 | # Experimental 217 | # if i > 0 and args[0] != no: # channel expansion factor 218 | # ex = 1 + gw # exponential (default 2.0) 219 | # ch1 = 32 # ch[1] 220 | # e = math.log(c2 / ch1) / math.log(2) # level 1-n 221 | # c2 = int(ch1 * ex ** e) 222 | # if m != Focus: 223 | # c2 = make_divisible(c2, 8) if c2 != no else c2 224 | 225 | args = [c1, c2, *args[1:]] 226 | if m in [BottleneckCSP, C3]: 227 | args.insert(2, n) 228 | n = 1 229 | elif m is RGA_Module: 230 | args = [round(gw * args[0]), (img_size[0]//args[1])*(img_size[1]//args[1])] 231 | elif m is nn.BatchNorm2d: 232 | args = [ch[f]] 233 | elif m is Concat: 234 | c2 = sum([ch[-1 if x == -1 else x + 1] for x in f]) 235 | elif m is Detect: 236 | args.append([ch[x + 1] for x in f]) 237 | if isinstance(args[1], int): # number of anchors 238 | args[1] = [list(range(args[1] * 2))] * len(f) 239 | else: 240 | c2 = ch[f] 241 | 242 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module 243 | t = str(m)[8:-2].replace('__main__.', '') # module type 244 | np = sum([x.numel() for x in m_.parameters()]) # number params 245 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 246 | logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print 247 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 248 | layers.append(m_) 249 | ch.append(c2) 250 | return nn.Sequential(*layers), sorted(save) 251 | 252 | 253 | if __name__ == '__main__': 254 | parser = argparse.ArgumentParser() 255 | parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml') 256 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 257 | opt = parser.parse_args() 258 | opt.cfg = check_file(opt.cfg) # check file 259 | set_logging() 260 | device = select_device(opt.device) 261 | 262 | # Create model 263 | model = Model(opt.cfg).to(device) 264 | model.train() 265 | 266 | # Profile 267 | # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device) 268 | # y = model(img, profile=True) 269 | 270 | # ONNX export 271 | # model.model[-1].export = True 272 | # torch.onnx.export(model, img, opt.cfg.replace('.yaml', '.onnx'), verbose=True, opset_version=11) 273 | 274 | # Tensorboard 275 | # from torch.utils.tensorboard import SummaryWriter 276 | # tb_writer = SummaryWriter() 277 | # print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/") 278 | # tb_writer.add_graph(model.model, img) # add model to tensorboard 279 | # tb_writer.add_image('test', img[0], dataformats='CWH') # add model to tensorboard 280 | -------------------------------------------------------------------------------- /models/yolov5l.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5m.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.67 # model depth multiple 4 | width_multiple: 0.75 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 2 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], # *** 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], # *** 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], # *** 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], # *** 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s.yaml.bak: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5x.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.33 # model depth multiple 4 | width_multiple: 1.25 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], # *** 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], # *** 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], # *** 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], # *** 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /skill_recgnize.py: -------------------------------------------------------------------------------- 1 | import cv2 as cv 2 | import numpy as np 3 | 4 | def score(img): 5 | counter = 0 6 | for i in range(img.shape[0]): 7 | for j in range(img.shape[1]): 8 | if img[i,j] > 127: 9 | counter += 1 10 | return counter/(img.shape[0] * img.shape[1]) 11 | 12 | def img_show(img): 13 | cv.imshow("win", img) 14 | cv.waitKey(0) 15 | cv.destroyAllWindows() 16 | 17 | skill_height = int((793-733)/2) 18 | skill_width = int((750-538)/7) 19 | 20 | dict = {"A": (733+skill_height, 538), "S": (733+skill_height, 538+skill_width), "D": (733+skill_height, 538+2*skill_width), 21 | "F": (733+skill_height, 538+3*skill_width), "G": (733+skill_height, 538+4*skill_width), 22 | "H": (733+skill_height, 538+5*skill_width), "Q": (733, 538), "W": (733, 538+skill_width), "E": (733, 538+2*skill_width), 23 | "R": (733, 538+3*skill_width), "T": (733, 538+4*skill_width), "Y": (733, 538+5*skill_width)} 24 | 25 | 26 | def skill_rec(skill_name, img): 27 | if skill_name == "X": 28 | return True 29 | skill_img = img[dict[skill_name][0]: dict[skill_name][0]+skill_height, 30 | dict[skill_name][1]: dict[skill_name][1]+skill_width, 2] 31 | if score(skill_img) > 0.1: 32 | return True 33 | else: 34 | return False 35 | 36 | if __name__ == "__main__": 37 | img_path = "datasets/guiqi/test/20_93.jpg" 38 | img = cv.imread(img_path) 39 | print(skill_height, skill_width) 40 | print(img.shape) 41 | skill_img = img[733: 793, 538:750, 2] 42 | img_show(skill_img) 43 | 44 | 45 | skill_imgA = img[dict["A"][0]: dict["A"][0]+skill_height, dict["A"][1]: dict["A"][1]+skill_width, 2] 46 | skill_imgH= img[dict["H"][0]: dict["H"][0]+skill_height, dict["H"][1]: dict["H"][1]+skill_width, 2] 47 | skill_imgG= img[dict["G"][0]: dict["G"][0]+skill_height, dict["G"][1]: dict["G"][1]+skill_width, 2] 48 | skill_imgE= img[dict["E"][0]: dict["E"][0]+skill_height, dict["E"][1]: dict["E"][1]+skill_width, 2] 49 | skill_imgQ= img[dict["Q"][0]: dict["Q"][0]+skill_height, dict["Q"][1]: dict["Q"][1]+skill_width, 2] 50 | skill_imgS= img[dict["S"][0]: dict["S"][0]+skill_height, dict["S"][1]: dict["S"][1]+skill_width, 2] 51 | skill_imgY= img[dict["Y"][0]: dict["Y"][0]+skill_height, dict["Y"][1]: dict["Y"][1]+skill_width, 2] 52 | skill_imgD = img[dict["D"][0]: dict["D"][0]+skill_height, dict["D"][1]: dict["D"][1]+skill_width, 2] 53 | skill_imgF = img[dict["F"][0]: dict["F"][0]+skill_height, dict["F"][1]: dict["F"][1]+skill_width, 2] 54 | skill_imgW = img[dict["W"][0]: dict["W"][0]+skill_height, dict["W"][1]: dict["W"][1]+skill_width, 2] 55 | skill_imgR = img[dict["R"][0]: dict["R"][0]+skill_height, dict["R"][1]: dict["R"][1]+skill_width, 2] 56 | 57 | # print("A", np.mean(skill_imgA)) 58 | # print("H", np.mean(skill_imgH)) 59 | # print("G", np.mean(skill_imgG)) 60 | # print("E", np.mean(skill_imgE)) 61 | # print("Q", np.mean(skill_imgQ)) 62 | # print("S", np.mean(skill_imgS)) 63 | # print("Y", np.mean(skill_imgY)) 64 | 65 | print("A", score(skill_imgA)) 66 | print("Q", score(skill_imgQ)) 67 | print("S", score(skill_imgS)) 68 | print("D", score(skill_imgD)) 69 | print("F", score(skill_imgF)) 70 | print("W", score(skill_imgW)) 71 | print("R", score(skill_imgR)) 72 | print("Y", score(skill_imgY)) 73 | print("H", score(skill_imgH)) 74 | print("G", score(skill_imgG)) 75 | print("E", score(skill_imgE)) 76 | 77 | print(skill_rec("W", img)) 78 | 79 | -------------------------------------------------------------------------------- /small_recgonize.py: -------------------------------------------------------------------------------- 1 | import cv2 as cv 2 | import numpy as np 3 | 4 | 5 | img_path = "datasets/guiqi/test/61_93.jpg" 6 | img = cv.imread(img_path) 7 | 8 | def img_show(img): 9 | cv.imshow("win", img) 10 | cv.waitKey(0) 11 | cv.destroyAllWindows() 12 | 13 | def current_door(img, stride = 17): 14 | crop = img[45:65, 1107:1270, 0] 15 | # img_show(crop) 16 | index = np.unravel_index(crop.argmax(), crop.shape) 17 | i = int((index[1] // stride) + 1) 18 | return i # 返回的是在第几个房间 19 | 20 | def next_door(img): 21 | img_temp = np.load("问号模板.npy") 22 | # img_show(img_temp) 23 | target = img[45:65, 1107:1270] 24 | result = cv.matchTemplate(target, img_temp, cv.TM_SQDIFF_NORMED) 25 | cv.normalize(result, result, 0, 1, cv.NORM_MINMAX, -1) 26 | min_val, max_val, min_loc, max_loc = cv.minMaxLoc(result) 27 | next_door_id = 0 28 | if min_val < 1e-10: 29 | # print(min_val, max_val, min_loc, max_loc) 30 | strmin_val = str(min_val) 31 | theight, twidth = img_temp.shape[:2] 32 | # cv.rectangle(target, min_loc, (min_loc[0] + twidth, min_loc[1] + theight), (225, 0, 0), 2) 33 | # cv.imshow("MatchResult----MatchingValue=" + strmin_val, target) 34 | # cv.waitKey() 35 | # cv.destroyAllWindows() 36 | next_door_id = int(((min_loc[0] + 0.5 * twidth) // 18.11) + 1) 37 | return next_door_id 38 | 39 | if __name__ == "__main__": 40 | print(current_door(img)) 41 | print(next_door(img)) 42 | # img_show(img[45:65, 1144:1162]) 43 | # np.save("问号模板", img[45:65, 1144:1162]) 44 | 45 | 46 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__init__.py -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/activations.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__pycache__/activations.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/datasets.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__pycache__/datasets.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/datasets.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__pycache__/datasets.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/datasets.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__pycache__/datasets.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/general.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__pycache__/general.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/general.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__pycache__/general.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/general.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__pycache__/general.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/google_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__pycache__/google_utils.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/google_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__pycache__/google_utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/google_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__pycache__/google_utils.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/torch_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__pycache__/torch_utils.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/torch_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__pycache__/torch_utils.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/torch_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/__pycache__/torch_utils.cpython-38.pyc -------------------------------------------------------------------------------- /utils/activations.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | # Swish https://arxiv.org/pdf/1905.02244.pdf --------------------------------------------------------------------------- 7 | class Swish(nn.Module): # 8 | @staticmethod 9 | def forward(x): 10 | return x * torch.sigmoid(x) 11 | 12 | 13 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() 14 | @staticmethod 15 | def forward(x): 16 | # return x * F.hardsigmoid(x) # for torchscript and CoreML 17 | return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX 18 | 19 | 20 | class MemoryEfficientSwish(nn.Module): 21 | class F(torch.autograd.Function): 22 | @staticmethod 23 | def forward(ctx, x): 24 | ctx.save_for_backward(x) 25 | return x * torch.sigmoid(x) 26 | 27 | @staticmethod 28 | def backward(ctx, grad_output): 29 | x = ctx.saved_tensors[0] 30 | sx = torch.sigmoid(x) 31 | return grad_output * (sx * (1 + x * (1 - sx))) 32 | 33 | def forward(self, x): 34 | return self.F.apply(x) 35 | 36 | 37 | # Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- 38 | class Mish(nn.Module): 39 | @staticmethod 40 | def forward(x): 41 | return x * F.softplus(x).tanh() 42 | 43 | 44 | class MemoryEfficientMish(nn.Module): 45 | class F(torch.autograd.Function): 46 | @staticmethod 47 | def forward(ctx, x): 48 | ctx.save_for_backward(x) 49 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 50 | 51 | @staticmethod 52 | def backward(ctx, grad_output): 53 | x = ctx.saved_tensors[0] 54 | sx = torch.sigmoid(x) 55 | fx = F.softplus(x).tanh() 56 | return grad_output * (fx + x * sx * (1 - fx * fx)) 57 | 58 | def forward(self, x): 59 | return self.F.apply(x) 60 | 61 | 62 | # FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- 63 | class FReLU(nn.Module): 64 | def __init__(self, c1, k=3): # ch_in, kernel 65 | super().__init__() 66 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1) 67 | self.bn = nn.BatchNorm2d(c1) 68 | 69 | def forward(self, x): 70 | return torch.max(x, self.bn(self.conv(x))) 71 | -------------------------------------------------------------------------------- /utils/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/utils/datasets.py -------------------------------------------------------------------------------- /utils/evolve.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Hyperparameter evolution commands (avoids CUDA memory leakage issues) 3 | # Replaces train.py python generations 'for' loop with a bash 'for' loop 4 | 5 | # Start on 4-GPU machine 6 | #for i in 0 1 2 3; do 7 | # t=ultralytics/yolov5:evolve && sudo docker pull $t && sudo docker run -d --ipc=host --gpus all -v "$(pwd)"/VOC:/usr/src/VOC $t bash utils/evolve.sh $i 8 | # sleep 60 # avoid simultaneous evolve.txt read/write 9 | #done 10 | 11 | # Hyperparameter evolution commands 12 | while true; do 13 | # python train.py --batch 64 --weights yolov5m.pt --data voc.yaml --img 512 --epochs 50 --evolve --bucket ult/evolve/voc --device $1 14 | python train.py --batch 40 --weights yolov5m.pt --data coco.yaml --img 640 --epochs 30 --evolve --bucket ult/evolve/coco --device $1 15 | done 16 | -------------------------------------------------------------------------------- /utils/general.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import logging 3 | import math 4 | import os 5 | import platform 6 | import random 7 | import shutil 8 | import subprocess 9 | import time 10 | import re 11 | from contextlib import contextmanager 12 | from copy import copy 13 | from pathlib import Path 14 | 15 | import cv2 16 | import matplotlib 17 | import matplotlib.pyplot as plt 18 | import numpy as np 19 | import torch 20 | import torch.nn as nn 21 | import yaml 22 | from scipy.cluster.vq import kmeans 23 | from scipy.signal import butter, filtfilt 24 | from tqdm import tqdm 25 | 26 | from utils.google_utils import gsutil_getsize 27 | from utils.torch_utils import is_parallel, init_torch_seeds 28 | 29 | # Set printoptions 30 | torch.set_printoptions(linewidth=320, precision=5, profile='long') 31 | np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5 32 | matplotlib.rc('font', **{'size': 11}) 33 | 34 | # Prevent OpenCV from multithreading (to use PyTorch DataLoader) 35 | cv2.setNumThreads(0) 36 | 37 | 38 | @contextmanager 39 | def torch_distributed_zero_first(local_rank: int): 40 | """ 41 | Decorator to make all processes in distributed training wait for each local_master to do something. 42 | """ 43 | if local_rank not in [-1, 0]: 44 | torch.distributed.barrier() 45 | yield 46 | if local_rank == 0: 47 | torch.distributed.barrier() 48 | 49 | 50 | def set_logging(rank=-1): 51 | logging.basicConfig( 52 | format="%(message)s", 53 | level=logging.INFO if rank in [-1, 0] else logging.WARN) 54 | 55 | 56 | def init_seeds(seed=0): 57 | random.seed(seed) 58 | np.random.seed(seed) 59 | init_torch_seeds(seed) 60 | 61 | 62 | def get_latest_run(search_dir='./runs'): 63 | # Return path to most recent 'last.pt' in /runs (i.e. to --resume from) 64 | last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True) 65 | return max(last_list, key=os.path.getctime) if last_list else '' 66 | 67 | 68 | def check_git_status(): 69 | # Suggest 'git pull' if repo is out of date 70 | if platform.system() in ['Linux', 'Darwin'] and not os.path.isfile('/.dockerenv'): 71 | s = subprocess.check_output('if [ -d .git ]; then git fetch && git status -uno; fi', shell=True).decode('utf-8') 72 | if 'Your branch is behind' in s: 73 | print(s[s.find('Your branch is behind'):s.find('\n\n')] + '\n') 74 | 75 | 76 | def check_img_size(img_size, s=32): 77 | # Verify img_size is a multiple of stride s 78 | new_size = make_divisible(img_size, int(s)) # ceil gs-multiple 79 | if new_size != img_size: 80 | print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size)) 81 | return new_size 82 | 83 | 84 | def check_anchors(dataset, model, thr=4.0, imgsz=640): 85 | # Check anchor fit to data, recompute if necessary 86 | print('\nAnalyzing anchors... ', end='') 87 | m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() 88 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) 89 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale 90 | wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh 91 | 92 | def metric(k): # compute metric 93 | r = wh[:, None] / k[None] 94 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 95 | best = x.max(1)[0] # best_x 96 | aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold 97 | bpr = (best > 1. / thr).float().mean() # best possible recall 98 | return bpr, aat 99 | 100 | bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2)) 101 | print('anchors/target = %.2f, Best Possible Recall (BPR) = %.4f' % (aat, bpr), end='') 102 | if bpr < 0.98: # threshold to recompute 103 | print('. Attempting to generate improved anchors, please wait...' % bpr) 104 | na = m.anchor_grid.numel() // 2 # number of anchors 105 | new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) 106 | new_bpr = metric(new_anchors.reshape(-1, 2))[0] 107 | if new_bpr > bpr: # replace anchors 108 | new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors) 109 | m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid) # for inference 110 | m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss 111 | check_anchor_order(m) 112 | print('New anchors saved to model. Update model *.yaml to use these anchors in the future.') 113 | else: 114 | print('Original anchors better than new anchors. Proceeding with original anchors.') 115 | print('') # newline 116 | 117 | 118 | def check_anchor_order(m): 119 | # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary 120 | a = m.anchor_grid.prod(-1).view(-1) # anchor area 121 | da = a[-1] - a[0] # delta a 122 | ds = m.stride[-1] - m.stride[0] # delta s 123 | if da.sign() != ds.sign(): # same order 124 | print('Reversing anchor order') 125 | m.anchors[:] = m.anchors.flip(0) 126 | m.anchor_grid[:] = m.anchor_grid.flip(0) 127 | 128 | 129 | def check_file(file): 130 | # Search for file if not found 131 | if os.path.isfile(file) or file == '': 132 | return file 133 | else: 134 | files = glob.glob('./**/' + file, recursive=True) # find file 135 | assert len(files), 'File Not Found: %s' % file # assert file was found 136 | assert len(files) == 1, "Multiple files match '%s', specify exact path: %s" % (file, files) # assert unique 137 | return files[0] # return file 138 | 139 | 140 | def check_dataset(dict): 141 | # Download dataset if not found 142 | val, s = dict.get('val'), dict.get('download') 143 | if val and len(val): 144 | val = [os.path.abspath(x) for x in (val if isinstance(val, list) else [val])] # val path 145 | if not all(os.path.exists(x) for x in val): 146 | print('\nWARNING: Dataset not found, nonexistent paths: %s' % [*val]) 147 | if s and len(s): # download script 148 | print('Downloading %s ...' % s) 149 | if s.startswith('http') and s.endswith('.zip'): # URL 150 | f = Path(s).name # filename 151 | torch.hub.download_url_to_file(s, f) 152 | r = os.system('unzip -q %s -d ../ && rm %s' % (f, f)) # unzip 153 | else: # bash script 154 | r = os.system(s) 155 | print('Dataset autodownload %s\n' % ('success' if r == 0 else 'failure')) # analyze return value 156 | else: 157 | raise Exception('Dataset not found.') 158 | 159 | 160 | def make_divisible(x, divisor): 161 | # Returns x evenly divisible by divisor 162 | return math.ceil(x / divisor) * divisor 163 | 164 | 165 | def labels_to_class_weights(labels, nc=80): 166 | # Get class weights (inverse frequency) from training labels 167 | if labels[0] is None: # no labels loaded 168 | return torch.Tensor() 169 | 170 | labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO 171 | classes = labels[:, 0].astype(np.int) # labels = [class xywh] 172 | weights = np.bincount(classes, minlength=nc) # occurrences per class 173 | 174 | # Prepend gridpoint count (for uCE training) 175 | # gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image 176 | # weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start 177 | 178 | weights[weights == 0] = 1 # replace empty bins with 1 179 | weights = 1 / weights # number of targets per class 180 | weights /= weights.sum() # normalize 181 | return torch.from_numpy(weights) 182 | 183 | 184 | def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)): 185 | # Produces image weights based on class mAPs 186 | n = len(labels) 187 | class_counts = np.array([np.bincount(labels[i][:, 0].astype(np.int), minlength=nc) for i in range(n)]) 188 | image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1) 189 | # index = random.choices(range(n), weights=image_weights, k=1) # weight image sample 190 | return image_weights 191 | 192 | 193 | def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) 194 | # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ 195 | # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n') 196 | # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n') 197 | # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco 198 | # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet 199 | x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 200 | 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 201 | 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] 202 | return x 203 | 204 | 205 | def xyxy2xywh(x): 206 | # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right 207 | y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) 208 | y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center 209 | y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center 210 | y[:, 2] = x[:, 2] - x[:, 0] # width 211 | y[:, 3] = x[:, 3] - x[:, 1] # height 212 | return y 213 | 214 | 215 | def xywh2xyxy(x): 216 | # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right 217 | y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) 218 | y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x 219 | y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y 220 | y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x 221 | y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y 222 | return y 223 | 224 | 225 | def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): 226 | # Rescale coords (xyxy) from img1_shape to img0_shape 227 | if ratio_pad is None: # calculate from img0_shape 228 | gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new 229 | pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding 230 | else: 231 | gain = ratio_pad[0][0] 232 | pad = ratio_pad[1] 233 | 234 | coords[:, [0, 2]] -= pad[0] # x padding 235 | coords[:, [1, 3]] -= pad[1] # y padding 236 | coords[:, :4] /= gain 237 | clip_coords(coords, img0_shape) 238 | return coords 239 | 240 | 241 | def clip_coords(boxes, img_shape): 242 | # Clip bounding xyxy bounding boxes to image shape (height, width) 243 | boxes[:, 0].clamp_(0, img_shape[1]) # x1 244 | boxes[:, 1].clamp_(0, img_shape[0]) # y1 245 | boxes[:, 2].clamp_(0, img_shape[1]) # x2 246 | boxes[:, 3].clamp_(0, img_shape[0]) # y2 247 | 248 | 249 | def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, fname='precision-recall_curve.png'): 250 | """ Compute the average precision, given the recall and precision curves. 251 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. 252 | # Arguments 253 | tp: True positives (nparray, nx1 or nx10). 254 | conf: Objectness value from 0-1 (nparray). 255 | pred_cls: Predicted object classes (nparray). 256 | target_cls: True object classes (nparray). 257 | plot: Plot precision-recall curve at mAP@0.5 258 | fname: Plot filename 259 | # Returns 260 | The average precision as computed in py-faster-rcnn. 261 | """ 262 | 263 | # Sort by objectness 264 | i = np.argsort(-conf) 265 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] 266 | 267 | # Find unique classes 268 | unique_classes = np.unique(target_cls) 269 | 270 | # Create Precision-Recall curve and compute AP for each class 271 | px, py = np.linspace(0, 1, 1000), [] # for plotting 272 | pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898 273 | s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95) 274 | ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s) 275 | for ci, c in enumerate(unique_classes): 276 | i = pred_cls == c 277 | n_gt = (target_cls == c).sum() # Number of ground truth objects 278 | n_p = i.sum() # Number of predicted objects 279 | 280 | if n_p == 0 or n_gt == 0: 281 | continue 282 | else: 283 | # Accumulate FPs and TPs 284 | fpc = (1 - tp[i]).cumsum(0) 285 | tpc = tp[i].cumsum(0) 286 | 287 | # Recall 288 | recall = tpc / (n_gt + 1e-16) # recall curve 289 | r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0]) # r at pr_score, negative x, xp because xp decreases 290 | 291 | # Precision 292 | precision = tpc / (tpc + fpc) # precision curve 293 | p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) # p at pr_score 294 | 295 | # AP from recall-precision curve 296 | py.append(np.interp(px, recall[:, 0], precision[:, 0])) # precision at mAP@0.5 297 | for j in range(tp.shape[1]): 298 | ap[ci, j] = compute_ap(recall[:, j], precision[:, j]) 299 | 300 | # Compute F1 score (harmonic mean of precision and recall) 301 | f1 = 2 * p * r / (p + r + 1e-16) 302 | 303 | if plot: 304 | py = np.stack(py, axis=1) 305 | fig, ax = plt.subplots(1, 1, figsize=(5, 5)) 306 | ax.plot(px, py, linewidth=0.5, color='grey') # plot(recall, precision) 307 | ax.plot(px, py.mean(1), linewidth=2, color='blue', label='all classes') 308 | ax.set_xlabel('Recall') 309 | ax.set_ylabel('Precision') 310 | ax.set_xlim(0, 1) 311 | ax.set_ylim(0, 1) 312 | plt.legend() 313 | fig.tight_layout() 314 | fig.savefig(fname, dpi=200) 315 | 316 | return p, r, ap, f1, unique_classes.astype('int32') 317 | 318 | 319 | def compute_ap(recall, precision): 320 | """ Compute the average precision, given the recall and precision curves. 321 | Source: https://github.com/rbgirshick/py-faster-rcnn. 322 | # Arguments 323 | recall: The recall curve (list). 324 | precision: The precision curve (list). 325 | # Returns 326 | The average precision as computed in py-faster-rcnn. 327 | """ 328 | 329 | # Append sentinel values to beginning and end 330 | mrec = np.concatenate(([0.], recall, [min(recall[-1] + 1E-3, 1.)])) 331 | mpre = np.concatenate(([0.], precision, [0.])) 332 | 333 | # Compute the precision envelope 334 | mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) 335 | 336 | # Integrate area under curve 337 | method = 'interp' # methods: 'continuous', 'interp' 338 | if method == 'interp': 339 | x = np.linspace(0, 1, 101) # 101-point interp (COCO) 340 | ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate 341 | else: # 'continuous' 342 | i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes 343 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve 344 | 345 | return ap 346 | 347 | 348 | def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-9): 349 | # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 350 | box2 = box2.T 351 | 352 | # Get the coordinates of bounding boxes 353 | if x1y1x2y2: # x1, y1, x2, y2 = box1 354 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] 355 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] 356 | else: # transform from xywh to xyxy 357 | b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 358 | b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 359 | b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 360 | b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 361 | 362 | # Intersection area 363 | inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ 364 | (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) 365 | 366 | # Union Area 367 | w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps 368 | w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps 369 | union = w1 * h1 + w2 * h2 - inter + eps 370 | 371 | iou = inter / union 372 | if GIoU or DIoU or CIoU: 373 | cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width 374 | ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height 375 | if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 376 | c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared 377 | rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + 378 | (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared 379 | if DIoU: 380 | return iou - rho2 / c2 # DIoU 381 | elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 382 | v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) 383 | with torch.no_grad(): 384 | alpha = v / ((1 + eps) - iou + v) 385 | return iou - (rho2 / c2 + v * alpha) # CIoU 386 | else: # GIoU https://arxiv.org/pdf/1902.09630.pdf 387 | c_area = cw * ch + eps # convex area 388 | return iou - (c_area - union) / c_area # GIoU 389 | else: 390 | return iou # IoU 391 | 392 | 393 | def box_iou(box1, box2): 394 | # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py 395 | """ 396 | Return intersection-over-union (Jaccard index) of boxes. 397 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 398 | Arguments: 399 | box1 (Tensor[N, 4]) 400 | box2 (Tensor[M, 4]) 401 | Returns: 402 | iou (Tensor[N, M]): the NxM matrix containing the pairwise 403 | IoU values for every element in boxes1 and boxes2 404 | """ 405 | 406 | def box_area(box): 407 | # box = 4xn 408 | return (box[2] - box[0]) * (box[3] - box[1]) 409 | 410 | area1 = box_area(box1.T) 411 | area2 = box_area(box2.T) 412 | 413 | # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) 414 | inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) 415 | return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) 416 | 417 | 418 | def wh_iou(wh1, wh2): 419 | # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 420 | wh1 = wh1[:, None] # [N,1,2] 421 | wh2 = wh2[None] # [1,M,2] 422 | inter = torch.min(wh1, wh2).prod(2) # [N,M] 423 | return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter) 424 | 425 | 426 | class FocalLoss(nn.Module): 427 | # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) 428 | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): 429 | super(FocalLoss, self).__init__() 430 | self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() 431 | self.gamma = gamma 432 | self.alpha = alpha 433 | self.reduction = loss_fcn.reduction 434 | self.loss_fcn.reduction = 'none' # required to apply FL to each element 435 | 436 | def forward(self, pred, true): 437 | loss = self.loss_fcn(pred, true) 438 | # p_t = torch.exp(-loss) 439 | # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability 440 | 441 | # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py 442 | pred_prob = torch.sigmoid(pred) # prob from logits 443 | p_t = true * pred_prob + (1 - true) * (1 - pred_prob) 444 | alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) 445 | modulating_factor = (1.0 - p_t) ** self.gamma 446 | loss *= alpha_factor * modulating_factor 447 | 448 | if self.reduction == 'mean': 449 | return loss.mean() 450 | elif self.reduction == 'sum': 451 | return loss.sum() 452 | else: # 'none' 453 | return loss 454 | 455 | 456 | def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 457 | # return positive, negative label smoothing BCE targets 458 | return 1.0 - 0.5 * eps, 0.5 * eps 459 | 460 | 461 | class BCEBlurWithLogitsLoss(nn.Module): 462 | # BCEwithLogitLoss() with reduced missing label effects. 463 | def __init__(self, alpha=0.05): 464 | super(BCEBlurWithLogitsLoss, self).__init__() 465 | self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss() 466 | self.alpha = alpha 467 | 468 | def forward(self, pred, true): 469 | loss = self.loss_fcn(pred, true) 470 | pred = torch.sigmoid(pred) # prob from logits 471 | dx = pred - true # reduce only missing label effects 472 | # dx = (pred - true).abs() # reduce missing label and false label effects 473 | alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4)) 474 | loss *= alpha_factor 475 | return loss.mean() 476 | 477 | 478 | def compute_loss(p, targets, model): # predictions, targets, model 479 | device = targets.device 480 | lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device) 481 | tcls, tbox, indices, anchors = build_targets(p, targets, model) # targets 482 | h = model.hyp # hyperparameters 483 | 484 | # Define criteria 485 | BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([h['cls_pw']])).to(device) 486 | BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([h['obj_pw']])).to(device) 487 | 488 | # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 489 | cp, cn = smooth_BCE(eps=0.0) 490 | 491 | # Focal loss 492 | g = h['fl_gamma'] # focal loss gamma 493 | if g > 0: 494 | BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) 495 | 496 | # Losses 497 | nt = 0 # number of targets 498 | np = len(p) # number of outputs 499 | balance = [4.0, 1.0, 0.4] if np == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6 500 | for i, pi in enumerate(p): # layer index, layer predictions 501 | b, a, gj, gi = indices[i] # image, anchor, gridy, gridx 502 | tobj = torch.zeros_like(pi[..., 0], device=device) # target obj 503 | 504 | n = b.shape[0] # number of targets 505 | if n: 506 | nt += n # cumulative targets 507 | ps = pi[b, a, gj, gi] # prediction subset corresponding to targets 508 | 509 | # Regression 510 | pxy = ps[:, :2].sigmoid() * 2. - 0.5 511 | pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] 512 | pbox = torch.cat((pxy, pwh), 1).to(device) # predicted box 513 | iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) 514 | lbox += (1.0 - iou).mean() # iou loss 515 | 516 | # Objectness 517 | tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio 518 | 519 | # Classification 520 | if model.nc > 1: # cls loss (only if multiple classes) 521 | t = torch.full_like(ps[:, 5:], cn, device=device) # targets 522 | t[range(n), tcls[i]] = cp 523 | lcls += BCEcls(ps[:, 5:], t) # BCE 524 | 525 | # Append targets to text file 526 | # with open('targets.txt', 'a') as file: 527 | # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] 528 | 529 | lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss 530 | 531 | s = 3 / np # output count scaling 532 | lbox *= h['box'] * s 533 | lobj *= h['obj'] * s * (1.4 if np == 4 else 1.) 534 | lcls *= h['cls'] * s 535 | bs = tobj.shape[0] # batch size 536 | 537 | loss = lbox + lobj + lcls 538 | return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach() 539 | 540 | 541 | def build_targets(p, targets, model): 542 | # Build targets for compute_loss(), input targets(image,class,x,y,w,h) 543 | det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module 544 | na, nt = det.na, targets.shape[0] # number of anchors, targets 545 | tcls, tbox, indices, anch = [], [], [], [] 546 | gain = torch.ones(7, device=targets.device) # normalized to gridspace gain 547 | ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) 548 | targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices 549 | 550 | g = 0.5 # bias 551 | off = torch.tensor([[0, 0], 552 | [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m 553 | # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm 554 | ], device=targets.device).float() * g # offsets 555 | 556 | for i in range(det.nl): 557 | anchors = det.anchors[i] 558 | gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain 559 | 560 | # Match targets to anchors 561 | t = targets * gain 562 | if nt: 563 | # Matches 564 | r = t[:, :, 4:6] / anchors[:, None] # wh ratio 565 | j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t'] # compare 566 | # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) 567 | t = t[j] # filter 568 | 569 | # Offsets 570 | gxy = t[:, 2:4] # grid xy 571 | gxi = gain[[2, 3]] - gxy # inverse 572 | j, k = ((gxy % 1. < g) & (gxy > 1.)).T 573 | l, m = ((gxi % 1. < g) & (gxi > 1.)).T 574 | j = torch.stack((torch.ones_like(j), j, k, l, m)) 575 | t = t.repeat((5, 1, 1))[j] 576 | offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] 577 | else: 578 | t = targets[0] 579 | offsets = 0 580 | 581 | # Define 582 | b, c = t[:, :2].long().T # image, class 583 | gxy = t[:, 2:4] # grid xy 584 | gwh = t[:, 4:6] # grid wh 585 | gij = (gxy - offsets).long() 586 | gi, gj = gij.T # grid xy indices 587 | 588 | # Append 589 | a = t[:, 6].long() # anchor indices 590 | indices.append((b, a, gj, gi)) # image, anchor, grid indices 591 | tbox.append(torch.cat((gxy - gij, gwh), 1)) # box 592 | anch.append(anchors[a]) # anchors 593 | tcls.append(c) # class 594 | 595 | return tcls, tbox, indices, anch 596 | 597 | 598 | def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, merge=False, classes=None, agnostic=False): 599 | """Performs Non-Maximum Suppression (NMS) on inference results 600 | 601 | Returns: 602 | detections with shape: nx6 (x1, y1, x2, y2, conf, cls) 603 | """ 604 | 605 | nc = prediction[0].shape[1] - 5 # number of classes 606 | xc = prediction[..., 4] > conf_thres # candidates 607 | 608 | # Settings 609 | min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height 610 | max_det = 300 # maximum number of detections per image 611 | time_limit = 10.0 # seconds to quit after 612 | redundant = True # require redundant detections 613 | multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) 614 | 615 | t = time.time() 616 | output = [None] * prediction.shape[0] 617 | for xi, x in enumerate(prediction): # image index, image inference 618 | # Apply constraints 619 | # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height 620 | x = x[xc[xi]] # confidence 621 | 622 | # If none remain process next image 623 | if not x.shape[0]: 624 | continue 625 | 626 | # Compute conf 627 | x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf 628 | 629 | # Box (center x, center y, width, height) to (x1, y1, x2, y2) 630 | box = xywh2xyxy(x[:, :4]) 631 | 632 | # Detections matrix nx6 (xyxy, conf, cls) 633 | if multi_label: 634 | i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T 635 | x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) 636 | else: # best class only 637 | conf, j = x[:, 5:].max(1, keepdim=True) 638 | x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] 639 | 640 | # Filter by class 641 | if classes: 642 | x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] 643 | 644 | # Apply finite constraint 645 | # if not torch.isfinite(x).all(): 646 | # x = x[torch.isfinite(x).all(1)] 647 | 648 | # If none remain process next image 649 | n = x.shape[0] # number of boxes 650 | if not n: 651 | continue 652 | 653 | # Sort by confidence 654 | # x = x[x[:, 4].argsort(descending=True)] 655 | 656 | # Batched NMS 657 | c = x[:, 5:6] * (0 if agnostic else max_wh) # classes 658 | boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores 659 | i = torch.ops.torchvision.nms(boxes, scores, iou_thres) 660 | if i.shape[0] > max_det: # limit detections 661 | i = i[:max_det] 662 | if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) 663 | try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) 664 | iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix 665 | weights = iou * scores[None] # box weights 666 | x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes 667 | if redundant: 668 | i = i[iou.sum(1) > 1] # require redundancy 669 | except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139 670 | print(x, i, x.shape, i.shape) 671 | pass 672 | 673 | output[xi] = x[i] 674 | if (time.time() - t) > time_limit: 675 | break # time limit exceeded 676 | 677 | return output 678 | 679 | 680 | def strip_optimizer(f='weights/best.pt', s=''): # from utils.general import *; strip_optimizer() 681 | # Strip optimizer from 'f' to finalize training, optionally save as 's' 682 | x = torch.load(f, map_location=torch.device('cpu')) 683 | x['optimizer'] = None 684 | x['training_results'] = None 685 | x['epoch'] = -1 686 | x['model'].half() # to FP16 687 | for p in x['model'].parameters(): 688 | p.requires_grad = False 689 | torch.save(x, s or f) 690 | mb = os.path.getsize(s or f) / 1E6 # filesize 691 | print('Optimizer stripped from %s,%s %.1fMB' % (f, (' saved as %s,' % s) if s else '', mb)) 692 | 693 | 694 | def coco_class_count(path='../coco/labels/train2014/'): 695 | # Histogram of occurrences per class 696 | nc = 80 # number classes 697 | x = np.zeros(nc, dtype='int32') 698 | files = sorted(glob.glob('%s/*.*' % path)) 699 | for i, file in enumerate(files): 700 | labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5) 701 | x += np.bincount(labels[:, 0].astype('int32'), minlength=nc) 702 | print(i, len(files)) 703 | 704 | 705 | def coco_only_people(path='../coco/labels/train2017/'): # from utils.general import *; coco_only_people() 706 | # Find images with only people 707 | files = sorted(glob.glob('%s/*.*' % path)) 708 | for i, file in enumerate(files): 709 | labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5) 710 | if all(labels[:, 0] == 0): 711 | print(labels.shape[0], file) 712 | 713 | 714 | def crop_images_random(path='../images/', scale=0.50): # from utils.general import *; crop_images_random() 715 | # crops images into random squares up to scale fraction 716 | # WARNING: overwrites images! 717 | for file in tqdm(sorted(glob.glob('%s/*.*' % path))): 718 | img = cv2.imread(file) # BGR 719 | if img is not None: 720 | h, w = img.shape[:2] 721 | 722 | # create random mask 723 | a = 30 # minimum size (pixels) 724 | mask_h = random.randint(a, int(max(a, h * scale))) # mask height 725 | mask_w = mask_h # mask width 726 | 727 | # box 728 | xmin = max(0, random.randint(0, w) - mask_w // 2) 729 | ymin = max(0, random.randint(0, h) - mask_h // 2) 730 | xmax = min(w, xmin + mask_w) 731 | ymax = min(h, ymin + mask_h) 732 | 733 | # apply random color mask 734 | cv2.imwrite(file, img[ymin:ymax, xmin:xmax]) 735 | 736 | 737 | def coco_single_class_labels(path='../coco/labels/train2014/', label_class=43): 738 | # Makes single-class coco datasets. from utils.general import *; coco_single_class_labels() 739 | if os.path.exists('new/'): 740 | shutil.rmtree('new/') # delete output folder 741 | os.makedirs('new/') # make new output folder 742 | os.makedirs('new/labels/') 743 | os.makedirs('new/images/') 744 | for file in tqdm(sorted(glob.glob('%s/*.*' % path))): 745 | with open(file, 'r') as f: 746 | labels = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) 747 | i = labels[:, 0] == label_class 748 | if any(i): 749 | img_file = file.replace('labels', 'images').replace('txt', 'jpg') 750 | labels[:, 0] = 0 # reset class to 0 751 | with open('new/images.txt', 'a') as f: # add image to dataset list 752 | f.write(img_file + '\n') 753 | with open('new/labels/' + Path(file).name, 'a') as f: # write label 754 | for l in labels[i]: 755 | f.write('%g %.6f %.6f %.6f %.6f\n' % tuple(l)) 756 | shutil.copyfile(src=img_file, dst='new/images/' + Path(file).name.replace('txt', 'jpg')) # copy images 757 | 758 | 759 | def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=1.0, gen=1000, verbose=True): 760 | """ Creates kmeans-evolved anchors from training dataset 761 | 762 | Arguments: 763 | path: path to dataset *.yaml, or a loaded dataset 764 | n: number of anchors 765 | img_size: image size used for training 766 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 767 | gen: generations to evolve anchors using genetic algorithm 768 | 769 | Return: 770 | k: kmeans evolved anchors 771 | 772 | Usage: 773 | from utils.general import *; _ = kmean_anchors() 774 | """ 775 | thr = 1. / thr 776 | 777 | def metric(k, wh): # compute metrics 778 | r = wh[:, None] / k[None] 779 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 780 | # x = wh_iou(wh, torch.tensor(k)) # iou metric 781 | return x, x.max(1)[0] # x, best_x 782 | 783 | def fitness(k): # mutation fitness 784 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh) 785 | return (best * (best > thr).float()).mean() # fitness 786 | 787 | def print_results(k): 788 | k = k[np.argsort(k.prod(1))] # sort small to large 789 | x, best = metric(k, wh0) 790 | bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr 791 | print('thr=%.2f: %.4f best possible recall, %.2f anchors past thr' % (thr, bpr, aat)) 792 | print('n=%g, img_size=%s, metric_all=%.3f/%.3f-mean/best, past_thr=%.3f-mean: ' % 793 | (n, img_size, x.mean(), best.mean(), x[x > thr].mean()), end='') 794 | for i, x in enumerate(k): 795 | print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg 796 | return k 797 | 798 | if isinstance(path, str): # *.yaml file 799 | with open(path) as f: 800 | data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict 801 | from utils.datasets import LoadImagesAndLabels 802 | dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) 803 | else: 804 | dataset = path # dataset 805 | 806 | # Get label wh 807 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) 808 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh 809 | 810 | # Filter 811 | i = (wh0 < 3.0).any(1).sum() 812 | if i: 813 | print('WARNING: Extremely small objects found. ' 814 | '%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0))) 815 | wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels 816 | 817 | # Kmeans calculation 818 | print('Running kmeans for %g anchors on %g points...' % (n, len(wh))) 819 | s = wh.std(0) # sigmas for whitening 820 | k, dist = kmeans(wh / s, n, iter=30) # points, mean distance 821 | k *= s 822 | wh = torch.tensor(wh, dtype=torch.float32) # filtered 823 | wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered 824 | k = print_results(k) 825 | 826 | # Plot 827 | # k, d = [None] * 20, [None] * 20 828 | # for i in tqdm(range(1, 21)): 829 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance 830 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) 831 | # ax = ax.ravel() 832 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') 833 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh 834 | # ax[0].hist(wh[wh[:, 0]<100, 0],400) 835 | # ax[1].hist(wh[wh[:, 1]<100, 1],400) 836 | # fig.tight_layout() 837 | # fig.savefig('wh.png', dpi=200) 838 | 839 | # Evolve 840 | npr = np.random 841 | f, sh, mp, s = fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma 842 | pbar = tqdm(range(gen), desc='Evolving anchors with Genetic Algorithm') # progress bar 843 | for _ in pbar: 844 | v = np.ones(sh) 845 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates) 846 | v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) 847 | kg = (k.copy() * v).clip(min=2.0) 848 | fg = fitness(kg) 849 | if fg > f: 850 | f, k = fg, kg.copy() 851 | pbar.desc = 'Evolving anchors with Genetic Algorithm: fitness = %.4f' % f 852 | if verbose: 853 | print_results(k) 854 | 855 | return print_results(k) 856 | 857 | 858 | def print_mutation(hyp, results, yaml_file='hyp_evolved.yaml', bucket=''): 859 | # Print mutation results to evolve.txt (for use with train.py --evolve) 860 | a = '%10s' * len(hyp) % tuple(hyp.keys()) # hyperparam keys 861 | b = '%10.3g' * len(hyp) % tuple(hyp.values()) # hyperparam values 862 | c = '%10.4g' * len(results) % results # results (P, R, mAP@0.5, mAP@0.5:0.95, val_losses x 3) 863 | print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c)) 864 | 865 | if bucket: 866 | url = 'gs://%s/evolve.txt' % bucket 867 | if gsutil_getsize(url) > (os.path.getsize('evolve.txt') if os.path.exists('evolve.txt') else 0): 868 | os.system('gsutil cp %s .' % url) # download evolve.txt if larger than local 869 | 870 | with open('evolve.txt', 'a') as f: # append result 871 | f.write(c + b + '\n') 872 | x = np.unique(np.loadtxt('evolve.txt', ndmin=2), axis=0) # load unique rows 873 | x = x[np.argsort(-fitness(x))] # sort 874 | np.savetxt('evolve.txt', x, '%10.3g') # save sort by fitness 875 | 876 | # Save yaml 877 | for i, k in enumerate(hyp.keys()): 878 | hyp[k] = float(x[0, i + 7]) 879 | with open(yaml_file, 'w') as f: 880 | results = tuple(x[0, :7]) 881 | c = '%10.4g' * len(results) % results # results (P, R, mAP@0.5, mAP@0.5:0.95, val_losses x 3) 882 | f.write('# Hyperparameter Evolution Results\n# Generations: %g\n# Metrics: ' % len(x) + c + '\n\n') 883 | yaml.dump(hyp, f, sort_keys=False) 884 | 885 | if bucket: 886 | os.system('gsutil cp evolve.txt %s gs://%s' % (yaml_file, bucket)) # upload 887 | 888 | 889 | def apply_classifier(x, model, img, im0): 890 | # applies a second stage classifier to yolo outputs 891 | im0 = [im0] if isinstance(im0, np.ndarray) else im0 892 | for i, d in enumerate(x): # per image 893 | if d is not None and len(d): 894 | d = d.clone() 895 | 896 | # Reshape and pad cutouts 897 | b = xyxy2xywh(d[:, :4]) # boxes 898 | b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square 899 | b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad 900 | d[:, :4] = xywh2xyxy(b).long() 901 | 902 | # Rescale boxes from img_size to im0 size 903 | scale_coords(img.shape[2:], d[:, :4], im0[i].shape) 904 | 905 | # Classes 906 | pred_cls1 = d[:, 5].long() 907 | ims = [] 908 | for j, a in enumerate(d): # per item 909 | cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])] 910 | im = cv2.resize(cutout, (224, 224)) # BGR 911 | # cv2.imwrite('test%i.jpg' % j, cutout) 912 | 913 | im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 914 | im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32 915 | im /= 255.0 # 0 - 255 to 0.0 - 1.0 916 | ims.append(im) 917 | 918 | pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction 919 | x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections 920 | 921 | return x 922 | 923 | 924 | def fitness(x): 925 | # Returns fitness (for use with results.txt or evolve.txt) 926 | w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] 927 | return (x[:, :4] * w).sum(1) 928 | 929 | 930 | def output_to_target(output, width, height): 931 | # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] 932 | if isinstance(output, torch.Tensor): 933 | output = output.cpu().numpy() 934 | 935 | targets = [] 936 | for i, o in enumerate(output): 937 | if o is not None: 938 | for pred in o: 939 | box = pred[:4] 940 | w = (box[2] - box[0]) / width 941 | h = (box[3] - box[1]) / height 942 | x = box[0] / width + w / 2 943 | y = box[1] / height + h / 2 944 | conf = pred[4] 945 | cls = int(pred[5]) 946 | 947 | targets.append([i, cls, x, y, w, h, conf]) 948 | 949 | return np.array(targets) 950 | 951 | 952 | def increment_dir(dir, comment=''): 953 | # Increments a directory runs/exp1 --> runs/exp2_comment 954 | n = 0 # number 955 | dir = str(Path(dir)) # os-agnostic 956 | dirs = sorted(glob.glob(dir + '*')) # directories 957 | if dirs: 958 | matches = [re.search(r"exp(\d+)", d) for d in dirs] 959 | idxs = [int(m.groups()[0]) for m in matches if m] 960 | if idxs: 961 | n = max(idxs) + 1 # increment 962 | return dir + str(n) + ('_' + comment if comment else '') 963 | 964 | 965 | # Plotting functions --------------------------------------------------------------------------------------------------- 966 | def hist2d(x, y, n=100): 967 | # 2d histogram used in labels.png and evolve.png 968 | xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n) 969 | hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges)) 970 | xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1) 971 | yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1) 972 | return np.log(hist[xidx, yidx]) 973 | 974 | 975 | def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5): 976 | # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy 977 | def butter_lowpass(cutoff, fs, order): 978 | nyq = 0.5 * fs 979 | normal_cutoff = cutoff / nyq 980 | b, a = butter(order, normal_cutoff, btype='low', analog=False) 981 | return b, a 982 | 983 | b, a = butter_lowpass(cutoff, fs, order=order) 984 | return filtfilt(b, a, data) # forward-backward filter 985 | 986 | 987 | def plot_one_box(x, img, color=None, label=None, line_thickness=None): 988 | # Plots one bounding box on image img 989 | tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness 990 | color = color or [random.randint(0, 255) for _ in range(3)] 991 | c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) 992 | cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) 993 | if label: 994 | tf = max(tl - 1, 1) # font thickness 995 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] 996 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 997 | cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled 998 | cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) 999 | 1000 | 1001 | def plot_wh_methods(): # from utils.general import *; plot_wh_methods() 1002 | # Compares the two methods for width-height anchor multiplication 1003 | # https://github.com/ultralytics/yolov3/issues/168 1004 | x = np.arange(-4.0, 4.0, .1) 1005 | ya = np.exp(x) 1006 | yb = torch.sigmoid(torch.from_numpy(x)).numpy() * 2 1007 | 1008 | fig = plt.figure(figsize=(6, 3), dpi=150) 1009 | plt.plot(x, ya, '.-', label='YOLOv3') 1010 | plt.plot(x, yb ** 2, '.-', label='YOLOv5 ^2') 1011 | plt.plot(x, yb ** 1.6, '.-', label='YOLOv5 ^1.6') 1012 | plt.xlim(left=-4, right=4) 1013 | plt.ylim(bottom=0, top=6) 1014 | plt.xlabel('input') 1015 | plt.ylabel('output') 1016 | plt.grid() 1017 | plt.legend() 1018 | fig.tight_layout() 1019 | fig.savefig('comparison.png', dpi=200) 1020 | 1021 | 1022 | def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16): 1023 | tl = 3 # line thickness 1024 | tf = max(tl - 1, 1) # font thickness 1025 | 1026 | if isinstance(images, torch.Tensor): 1027 | images = images.cpu().float().numpy() 1028 | 1029 | if isinstance(targets, torch.Tensor): 1030 | targets = targets.cpu().numpy() 1031 | 1032 | # un-normalise 1033 | if np.max(images[0]) <= 1: 1034 | images *= 255 1035 | 1036 | bs, _, h, w = images.shape # batch size, _, height, width 1037 | bs = min(bs, max_subplots) # limit plot images 1038 | ns = np.ceil(bs ** 0.5) # number of subplots (square) 1039 | 1040 | # Check if we should resize 1041 | scale_factor = max_size / max(h, w) 1042 | if scale_factor < 1: 1043 | h = math.ceil(scale_factor * h) 1044 | w = math.ceil(scale_factor * w) 1045 | 1046 | # Empty array for output 1047 | mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) 1048 | 1049 | # Fix class - colour map 1050 | prop_cycle = plt.rcParams['axes.prop_cycle'] 1051 | # https://stackoverflow.com/questions/51350872/python-from-color-name-to-rgb 1052 | hex2rgb = lambda h: tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) 1053 | color_lut = [hex2rgb(h) for h in prop_cycle.by_key()['color']] 1054 | 1055 | for i, img in enumerate(images): 1056 | if i == max_subplots: # if last batch has fewer images than we expect 1057 | break 1058 | 1059 | block_x = int(w * (i // ns)) 1060 | block_y = int(h * (i % ns)) 1061 | 1062 | img = img.transpose(1, 2, 0) 1063 | if scale_factor < 1: 1064 | img = cv2.resize(img, (w, h)) 1065 | 1066 | mosaic[block_y:block_y + h, block_x:block_x + w, :] = img 1067 | if len(targets) > 0: 1068 | image_targets = targets[targets[:, 0] == i] 1069 | boxes = xywh2xyxy(image_targets[:, 2:6]).T 1070 | classes = image_targets[:, 1].astype('int') 1071 | gt = image_targets.shape[1] == 6 # ground truth if no conf column 1072 | conf = None if gt else image_targets[:, 6] # check for confidence presence (gt vs pred) 1073 | 1074 | boxes[[0, 2]] *= w 1075 | boxes[[0, 2]] += block_x 1076 | boxes[[1, 3]] *= h 1077 | boxes[[1, 3]] += block_y 1078 | for j, box in enumerate(boxes.T): 1079 | cls = int(classes[j]) 1080 | color = color_lut[cls % len(color_lut)] 1081 | cls = names[cls] if names else cls 1082 | if gt or conf[j] > 0.3: # 0.3 conf thresh 1083 | label = '%s' % cls if gt else '%s %.1f' % (cls, conf[j]) 1084 | plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) 1085 | 1086 | # Draw image filename labels 1087 | if paths is not None: 1088 | label = os.path.basename(paths[i])[:40] # trim to 40 char 1089 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] 1090 | cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf, 1091 | lineType=cv2.LINE_AA) 1092 | 1093 | # Image border 1094 | cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3) 1095 | 1096 | if fname is not None: 1097 | mosaic = cv2.resize(mosaic, (int(ns * w * 0.5), int(ns * h * 0.5)), interpolation=cv2.INTER_AREA) 1098 | cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) 1099 | 1100 | return mosaic 1101 | 1102 | 1103 | def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''): 1104 | # Plot LR simulating training for full epochs 1105 | optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals 1106 | y = [] 1107 | for _ in range(epochs): 1108 | scheduler.step() 1109 | y.append(optimizer.param_groups[0]['lr']) 1110 | plt.plot(y, '.-', label='LR') 1111 | plt.xlabel('epoch') 1112 | plt.ylabel('LR') 1113 | plt.grid() 1114 | plt.xlim(0, epochs) 1115 | plt.ylim(0) 1116 | plt.tight_layout() 1117 | plt.savefig(Path(save_dir) / 'LR.png', dpi=200) 1118 | 1119 | 1120 | def plot_test_txt(): # from utils.general import *; plot_test() 1121 | # Plot test.txt histograms 1122 | x = np.loadtxt('test.txt', dtype=np.float32) 1123 | box = xyxy2xywh(x[:, :4]) 1124 | cx, cy = box[:, 0], box[:, 1] 1125 | 1126 | fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True) 1127 | ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0) 1128 | ax.set_aspect('equal') 1129 | plt.savefig('hist2d.png', dpi=300) 1130 | 1131 | fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True) 1132 | ax[0].hist(cx, bins=600) 1133 | ax[1].hist(cy, bins=600) 1134 | plt.savefig('hist1d.png', dpi=200) 1135 | 1136 | 1137 | def plot_targets_txt(): # from utils.general import *; plot_targets_txt() 1138 | # Plot targets.txt histograms 1139 | x = np.loadtxt('targets.txt', dtype=np.float32).T 1140 | s = ['x targets', 'y targets', 'width targets', 'height targets'] 1141 | fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) 1142 | ax = ax.ravel() 1143 | for i in range(4): 1144 | ax[i].hist(x[i], bins=100, label='%.3g +/- %.3g' % (x[i].mean(), x[i].std())) 1145 | ax[i].legend() 1146 | ax[i].set_title(s[i]) 1147 | plt.savefig('targets.jpg', dpi=200) 1148 | 1149 | 1150 | def plot_study_txt(f='study.txt', x=None): # from utils.general import *; plot_study_txt() 1151 | # Plot study.txt generated by test.py 1152 | fig, ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True) 1153 | ax = ax.ravel() 1154 | 1155 | fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True) 1156 | for f in ['study/study_coco_yolov5%s.txt' % x for x in ['s', 'm', 'l', 'x']]: 1157 | y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T 1158 | x = np.arange(y.shape[1]) if x is None else np.array(x) 1159 | s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_inference (ms/img)', 't_NMS (ms/img)', 't_total (ms/img)'] 1160 | for i in range(7): 1161 | ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8) 1162 | ax[i].set_title(s[i]) 1163 | 1164 | j = y[3].argmax() + 1 1165 | ax2.plot(y[6, :j], y[3, :j] * 1E2, '.-', linewidth=2, markersize=8, 1166 | label=Path(f).stem.replace('study_coco_', '').replace('yolo', 'YOLO')) 1167 | 1168 | ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5], 1169 | 'k.-', linewidth=2, markersize=8, alpha=.25, label='EfficientDet') 1170 | 1171 | ax2.grid() 1172 | ax2.set_xlim(0, 30) 1173 | ax2.set_ylim(28, 50) 1174 | ax2.set_yticks(np.arange(30, 55, 5)) 1175 | ax2.set_xlabel('GPU Speed (ms/img)') 1176 | ax2.set_ylabel('COCO AP val') 1177 | ax2.legend(loc='lower right') 1178 | plt.savefig('study_mAP_latency.png', dpi=300) 1179 | plt.savefig(f.replace('.txt', '.png'), dpi=300) 1180 | 1181 | 1182 | def plot_labels(labels, save_dir=''): 1183 | # plot dataset labels 1184 | c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes 1185 | nc = int(c.max() + 1) # number of classes 1186 | 1187 | fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) 1188 | ax = ax.ravel() 1189 | ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8) 1190 | ax[0].set_xlabel('classes') 1191 | ax[1].scatter(b[0], b[1], c=hist2d(b[0], b[1], 90), cmap='jet') 1192 | ax[1].set_xlabel('x') 1193 | ax[1].set_ylabel('y') 1194 | ax[2].scatter(b[2], b[3], c=hist2d(b[2], b[3], 90), cmap='jet') 1195 | ax[2].set_xlabel('width') 1196 | ax[2].set_ylabel('height') 1197 | plt.savefig(Path(save_dir) / 'labels.png', dpi=200) 1198 | plt.close() 1199 | 1200 | # seaborn correlogram 1201 | try: 1202 | import seaborn as sns 1203 | import pandas as pd 1204 | x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height']) 1205 | sns.pairplot(x, corner=True, diag_kind='hist', kind='scatter', markers='o', 1206 | plot_kws=dict(s=3, edgecolor=None, linewidth=1, alpha=0.02), 1207 | diag_kws=dict(bins=50)) 1208 | plt.savefig(Path(save_dir) / 'labels_correlogram.png', dpi=200) 1209 | plt.close() 1210 | except Exception as e: 1211 | pass 1212 | 1213 | 1214 | def plot_evolution(yaml_file='data/hyp.finetune.yaml'): # from utils.general import *; plot_evolution() 1215 | # Plot hyperparameter evolution results in evolve.txt 1216 | with open(yaml_file) as f: 1217 | hyp = yaml.load(f, Loader=yaml.FullLoader) 1218 | x = np.loadtxt('evolve.txt', ndmin=2) 1219 | f = fitness(x) 1220 | # weights = (f - f.min()) ** 2 # for weighted results 1221 | plt.figure(figsize=(10, 12), tight_layout=True) 1222 | matplotlib.rc('font', **{'size': 8}) 1223 | for i, (k, v) in enumerate(hyp.items()): 1224 | y = x[:, i + 7] 1225 | # mu = (y * weights).sum() / weights.sum() # best weighted result 1226 | mu = y[f.argmax()] # best single result 1227 | plt.subplot(6, 5, i + 1) 1228 | plt.scatter(y, f, c=hist2d(y, f, 20), cmap='viridis', alpha=.8, edgecolors='none') 1229 | plt.plot(mu, f.max(), 'k+', markersize=15) 1230 | plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9}) # limit to 40 characters 1231 | if i % 5 != 0: 1232 | plt.yticks([]) 1233 | print('%15s: %.3g' % (k, mu)) 1234 | plt.savefig('evolve.png', dpi=200) 1235 | print('\nPlot saved as evolve.png') 1236 | 1237 | 1238 | def plot_results_overlay(start=0, stop=0): # from utils.general import *; plot_results_overlay() 1239 | # Plot training 'results*.txt', overlaying train and val losses 1240 | s = ['train', 'train', 'train', 'Precision', 'mAP@0.5', 'val', 'val', 'val', 'Recall', 'mAP@0.5:0.95'] # legends 1241 | t = ['Box', 'Objectness', 'Classification', 'P-R', 'mAP-F1'] # titles 1242 | for f in sorted(glob.glob('results*.txt') + glob.glob('../../Downloads/results*.txt')): 1243 | results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T 1244 | n = results.shape[1] # number of rows 1245 | x = range(start, min(stop, n) if stop else n) 1246 | fig, ax = plt.subplots(1, 5, figsize=(14, 3.5), tight_layout=True) 1247 | ax = ax.ravel() 1248 | for i in range(5): 1249 | for j in [i, i + 5]: 1250 | y = results[j, x] 1251 | ax[i].plot(x, y, marker='.', label=s[j]) 1252 | # y_smooth = butter_lowpass_filtfilt(y) 1253 | # ax[i].plot(x, np.gradient(y_smooth), marker='.', label=s[j]) 1254 | 1255 | ax[i].set_title(t[i]) 1256 | ax[i].legend() 1257 | ax[i].set_ylabel(f) if i == 0 else None # add filename 1258 | fig.savefig(f.replace('.txt', '.png'), dpi=200) 1259 | 1260 | 1261 | def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir=''): 1262 | # from utils.general import *; plot_results() 1263 | # Plot training 'results*.txt' as seen in https://github.com/ultralytics/yolov5#reproduce-our-training 1264 | fig, ax = plt.subplots(2, 5, figsize=(12, 6)) 1265 | ax = ax.ravel() 1266 | s = ['Box', 'Objectness', 'Classification', 'Precision', 'Recall', 1267 | 'val Box', 'val Objectness', 'val Classification', 'mAP@0.5', 'mAP@0.5:0.95'] 1268 | if bucket: 1269 | # os.system('rm -rf storage.googleapis.com') 1270 | # files = ['https://storage.googleapis.com/%s/results%g.txt' % (bucket, x) for x in id] 1271 | files = ['results%g.txt' % x for x in id] 1272 | c = ('gsutil cp ' + '%s ' * len(files) + '.') % tuple('gs://%s/results%g.txt' % (bucket, x) for x in id) 1273 | os.system(c) 1274 | else: 1275 | files = glob.glob(str(Path(save_dir) / 'results*.txt')) + glob.glob('../../Downloads/results*.txt') 1276 | for fi, f in enumerate(files): 1277 | try: 1278 | results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T 1279 | n = results.shape[1] # number of rows 1280 | x = range(start, min(stop, n) if stop else n) 1281 | for i in range(10): 1282 | y = results[i, x] 1283 | if i in [0, 1, 2, 5, 6, 7]: 1284 | y[y == 0] = np.nan # don't show zero loss values 1285 | # y /= y[0] # normalize 1286 | label = labels[fi] if len(labels) else Path(f).stem 1287 | ax[i].plot(x, y, marker='.', label=label, linewidth=1, markersize=6) 1288 | ax[i].set_title(s[i]) 1289 | # if i in [5, 6, 7]: # share train and val loss y axes 1290 | # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) 1291 | except Exception as e: 1292 | print('Warning: Plotting error for %s; %s' % (f, e)) 1293 | 1294 | fig.tight_layout() 1295 | ax[1].legend() 1296 | fig.savefig(Path(save_dir) / 'results.png', dpi=200) 1297 | -------------------------------------------------------------------------------- /utils/google_app_engine/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/google-appengine/python 2 | 3 | # Create a virtualenv for dependencies. This isolates these packages from 4 | # system-level packages. 5 | # Use -p python3 or -p python3.7 to select python version. Default is version 2. 6 | RUN virtualenv /env -p python3 7 | 8 | # Setting these environment variables are the same as running 9 | # source /env/bin/activate. 10 | ENV VIRTUAL_ENV /env 11 | ENV PATH /env/bin:$PATH 12 | 13 | RUN apt-get update && apt-get install -y python-opencv 14 | 15 | # Copy the application's requirements.txt and run pip to install all 16 | # dependencies into the virtualenv. 17 | ADD requirements.txt /app/requirements.txt 18 | RUN pip install -r /app/requirements.txt 19 | 20 | # Add the application source code. 21 | ADD . /app 22 | 23 | # Run a WSGI server to serve the application. gunicorn must be declared as 24 | # a dependency in requirements.txt. 25 | CMD gunicorn -b :$PORT main:app 26 | -------------------------------------------------------------------------------- /utils/google_app_engine/additional_requirements.txt: -------------------------------------------------------------------------------- 1 | # add these requirements in your app on top of the existing ones 2 | pip==18.1 3 | Flask==1.0.2 4 | gunicorn==19.9.0 5 | -------------------------------------------------------------------------------- /utils/google_app_engine/app.yaml: -------------------------------------------------------------------------------- 1 | runtime: custom 2 | env: flex 3 | 4 | service: yolov5app 5 | 6 | liveness_check: 7 | initial_delay_sec: 600 8 | 9 | manual_scaling: 10 | instances: 1 11 | resources: 12 | cpu: 1 13 | memory_gb: 4 14 | disk_size_gb: 20 -------------------------------------------------------------------------------- /utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | # pip install --upgrade google-cloud-storage 3 | # from google.cloud import storage 4 | 5 | import os 6 | import platform 7 | import subprocess 8 | import time 9 | from pathlib import Path 10 | 11 | import torch 12 | 13 | 14 | def gsutil_getsize(url=''): 15 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du 16 | s = subprocess.check_output('gsutil du %s' % url, shell=True).decode('utf-8') 17 | return eval(s.split(' ')[0]) if len(s) else 0 # bytes 18 | 19 | 20 | def attempt_download(weights): 21 | # Attempt to download pretrained weights if not found locally 22 | weights = weights.strip().replace("'", '') 23 | file = Path(weights).name 24 | 25 | msg = weights + ' missing, try downloading from https://github.com/ultralytics/yolov5/releases/' 26 | models = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt'] # available models 27 | 28 | if file in models and not os.path.isfile(weights): 29 | # Google Drive 30 | # d = {'yolov5s.pt': '1R5T6rIyy3lLwgFXNms8whc-387H0tMQO', 31 | # 'yolov5m.pt': '1vobuEExpWQVpXExsJ2w-Mbf3HJjWkQJr', 32 | # 'yolov5l.pt': '1hrlqD1Wdei7UT4OgT785BEk1JwnSvNEV', 33 | # 'yolov5x.pt': '1mM8aZJlWTxOg7BZJvNUMrTnA2AbeCVzS'} 34 | # r = gdrive_download(id=d[file], name=weights) if file in d else 1 35 | # if r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6: # check 36 | # return 37 | 38 | try: # GitHub 39 | url = 'https://github.com/ultralytics/yolov5/releases/download/v3.0/' + file 40 | print('Downloading %s to %s...' % (url, weights)) 41 | torch.hub.download_url_to_file(url, weights) 42 | assert os.path.exists(weights) and os.path.getsize(weights) > 1E6 # check 43 | except Exception as e: # GCP 44 | print('Download error: %s' % e) 45 | url = 'https://storage.googleapis.com/ultralytics/yolov5/ckpt/' + file 46 | print('Downloading %s to %s...' % (url, weights)) 47 | r = os.system('curl -L %s -o %s' % (url, weights)) # torch.hub.download_url_to_file(url, weights) 48 | finally: 49 | if not (os.path.exists(weights) and os.path.getsize(weights) > 1E6): # check 50 | os.remove(weights) if os.path.exists(weights) else None # remove partial downloads 51 | print('ERROR: Download failure: %s' % msg) 52 | print('') 53 | return 54 | 55 | 56 | def gdrive_download(id='1n_oKgR81BJtqk75b00eAjdv03qVCQn2f', name='coco128.zip'): 57 | # Downloads a file from Google Drive. from utils.google_utils import *; gdrive_download() 58 | t = time.time() 59 | 60 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='') 61 | os.remove(name) if os.path.exists(name) else None # remove existing 62 | os.remove('cookie') if os.path.exists('cookie') else None 63 | 64 | # Attempt file download 65 | out = "NUL" if platform.system() == "Windows" else "/dev/null" 66 | os.system('curl -c ./cookie -s -L "drive.google.com/uc?export=download&id=%s" > %s ' % (id, out)) 67 | if os.path.exists('cookie'): # large file 68 | s = 'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm=%s&id=%s" -o %s' % (get_token(), id, name) 69 | else: # small file 70 | s = 'curl -s -L -o %s "drive.google.com/uc?export=download&id=%s"' % (name, id) 71 | r = os.system(s) # execute, capture return 72 | os.remove('cookie') if os.path.exists('cookie') else None 73 | 74 | # Error check 75 | if r != 0: 76 | os.remove(name) if os.path.exists(name) else None # remove partial 77 | print('Download error ') # raise Exception('Download error') 78 | return r 79 | 80 | # Unzip if archive 81 | if name.endswith('.zip'): 82 | print('unzipping... ', end='') 83 | os.system('unzip -q %s' % name) # unzip 84 | os.remove(name) # remove zip to free space 85 | 86 | print('Done (%.1fs)' % (time.time() - t)) 87 | return r 88 | 89 | 90 | def get_token(cookie="./cookie"): 91 | with open(cookie) as f: 92 | for line in f: 93 | if "download" in line: 94 | return line.split()[-1] 95 | return "" 96 | 97 | # def upload_blob(bucket_name, source_file_name, destination_blob_name): 98 | # # Uploads a file to a bucket 99 | # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 100 | # 101 | # storage_client = storage.Client() 102 | # bucket = storage_client.get_bucket(bucket_name) 103 | # blob = bucket.blob(destination_blob_name) 104 | # 105 | # blob.upload_from_filename(source_file_name) 106 | # 107 | # print('File {} uploaded to {}.'.format( 108 | # source_file_name, 109 | # destination_blob_name)) 110 | # 111 | # 112 | # def download_blob(bucket_name, source_blob_name, destination_file_name): 113 | # # Uploads a blob from a bucket 114 | # storage_client = storage.Client() 115 | # bucket = storage_client.get_bucket(bucket_name) 116 | # blob = bucket.blob(source_blob_name) 117 | # 118 | # blob.download_to_filename(destination_file_name) 119 | # 120 | # print('Blob {} downloaded to {}.'.format( 121 | # source_blob_name, 122 | # destination_file_name)) 123 | -------------------------------------------------------------------------------- /utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import math 3 | import os 4 | import time 5 | from copy import deepcopy 6 | 7 | import torch 8 | import torch.backends.cudnn as cudnn 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | import torchvision 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | def init_torch_seeds(seed=0): 17 | torch.manual_seed(seed) 18 | 19 | # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html 20 | if seed == 0: # slower, more reproducible 21 | cudnn.deterministic = True 22 | cudnn.benchmark = False 23 | else: # faster, less reproducible 24 | cudnn.deterministic = False 25 | cudnn.benchmark = True 26 | 27 | 28 | def select_device(device='', batch_size=None): 29 | # device = 'cpu' or '0' or '0,1,2,3' 30 | cpu_request = device.lower() == 'cpu' 31 | if device and not cpu_request: # if device requested other than 'cpu' 32 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 33 | assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity 34 | 35 | cuda = False if cpu_request else torch.cuda.is_available() 36 | if cuda: 37 | c = 1024 ** 2 # bytes to MB 38 | ng = torch.cuda.device_count() 39 | if ng > 1 and batch_size: # check that batch_size is compatible with device_count 40 | assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng) 41 | x = [torch.cuda.get_device_properties(i) for i in range(ng)] 42 | s = 'Using CUDA ' 43 | for i in range(0, ng): 44 | if i == 1: 45 | s = ' ' * len(s) 46 | logger.info("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" % 47 | (s, i, x[i].name, x[i].total_memory / c)) 48 | else: 49 | logger.info('Using CPU') 50 | 51 | logger.info('') # skip a line 52 | return torch.device('cuda:0' if cuda else 'cpu') 53 | 54 | 55 | def time_synchronized(): 56 | torch.cuda.synchronize() if torch.cuda.is_available() else None 57 | return time.time() 58 | 59 | 60 | def is_parallel(model): 61 | return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) 62 | 63 | 64 | def intersect_dicts(da, db, exclude=()): 65 | # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values 66 | return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} 67 | 68 | 69 | def initialize_weights(model): 70 | for m in model.modules(): 71 | t = type(m) 72 | if t is nn.Conv2d: 73 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 74 | elif t is nn.BatchNorm2d: 75 | m.eps = 1e-3 76 | m.momentum = 0.03 77 | elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 78 | m.inplace = True 79 | 80 | 81 | def find_modules(model, mclass=nn.Conv2d): 82 | # Finds layer indices matching module class 'mclass' 83 | return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] 84 | 85 | 86 | def sparsity(model): 87 | # Return global model sparsity 88 | a, b = 0., 0. 89 | for p in model.parameters(): 90 | a += p.numel() 91 | b += (p == 0).sum() 92 | return b / a 93 | 94 | 95 | def prune(model, amount=0.3): 96 | # Prune model to requested global sparsity 97 | import torch.nn.utils.prune as prune 98 | print('Pruning model... ', end='') 99 | for name, m in model.named_modules(): 100 | if isinstance(m, nn.Conv2d): 101 | prune.l1_unstructured(m, name='weight', amount=amount) # prune 102 | prune.remove(m, 'weight') # make permanent 103 | print(' %.3g global sparsity' % sparsity(model)) 104 | 105 | 106 | def fuse_conv_and_bn(conv, bn): 107 | # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 108 | 109 | # init 110 | fusedconv = nn.Conv2d(conv.in_channels, 111 | conv.out_channels, 112 | kernel_size=conv.kernel_size, 113 | stride=conv.stride, 114 | padding=conv.padding, 115 | groups=conv.groups, 116 | bias=True).requires_grad_(False).to(conv.weight.device) 117 | 118 | # prepare filters 119 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 120 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 121 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 122 | 123 | # prepare spatial bias 124 | b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias 125 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 126 | fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) 127 | 128 | return fusedconv 129 | 130 | 131 | def model_info(model, verbose=False): 132 | # Plots a line-by-line description of a PyTorch model 133 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 134 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 135 | if verbose: 136 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 137 | for i, (name, p) in enumerate(model.named_parameters()): 138 | name = name.replace('module_list.', '') 139 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 140 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 141 | 142 | try: # FLOPS 143 | from thop import profile 144 | flops = profile(deepcopy(model), inputs=(torch.zeros(1, 3, 64, 64),), verbose=False)[0] / 1E9 * 2 145 | fs = ', %.1f GFLOPS' % (flops * 100) # 640x640 FLOPS 146 | except: 147 | fs = '' 148 | 149 | logger.info( 150 | 'Model Summary: %g layers, %g parameters, %g gradients%s' % (len(list(model.parameters())), n_p, n_g, fs)) 151 | 152 | 153 | def load_classifier(name='resnet101', n=2): 154 | # Loads a pretrained model reshaped to n-class output 155 | model = torchvision.models.__dict__[name](pretrained=True) 156 | 157 | # ResNet model properties 158 | # input_size = [3, 224, 224] 159 | # input_space = 'RGB' 160 | # input_range = [0, 1] 161 | # mean = [0.485, 0.456, 0.406] 162 | # std = [0.229, 0.224, 0.225] 163 | 164 | # Reshape output to n classes 165 | filters = model.fc.weight.shape[1] 166 | model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) 167 | model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) 168 | model.fc.out_features = n 169 | return model 170 | 171 | 172 | def scale_img(img, ratio=1.0, same_shape=False): # img(16,3,256,416), r=ratio 173 | # scales img(bs,3,y,x) by ratio 174 | if ratio == 1.0: 175 | return img 176 | else: 177 | h, w = img.shape[2:] 178 | s = (int(h * ratio), int(w * ratio)) # new size 179 | img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize 180 | if not same_shape: # pad/crop img 181 | gs = 32 # (pixels) grid size 182 | h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] 183 | return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean 184 | 185 | 186 | def copy_attr(a, b, include=(), exclude=()): 187 | # Copy attributes from b to a, options to only include [...] and to exclude [...] 188 | for k, v in b.__dict__.items(): 189 | if (len(include) and k not in include) or k.startswith('_') or k in exclude: 190 | continue 191 | else: 192 | setattr(a, k, v) 193 | 194 | 195 | class ModelEMA: 196 | """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models 197 | Keep a moving average of everything in the model state_dict (parameters and buffers). 198 | This is intended to allow functionality like 199 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage 200 | A smoothed version of the weights is necessary for some training schemes to perform well. 201 | This class is sensitive where it is initialized in the sequence of model init, 202 | GPU assignment and distributed training wrappers. 203 | """ 204 | 205 | def __init__(self, model, decay=0.9999, updates=0): 206 | # Create EMA 207 | self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA 208 | # if next(model.parameters()).device.type != 'cpu': 209 | # self.ema.half() # FP16 EMA 210 | self.updates = updates # number of EMA updates 211 | self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) 212 | for p in self.ema.parameters(): 213 | p.requires_grad_(False) 214 | 215 | def update(self, model): 216 | # Update EMA parameters 217 | with torch.no_grad(): 218 | self.updates += 1 219 | d = self.decay(self.updates) 220 | 221 | msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict 222 | for k, v in self.ema.state_dict().items(): 223 | if v.dtype.is_floating_point: 224 | v *= d 225 | v += (1. - d) * msd[k].detach() 226 | 227 | def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): 228 | # Update EMA attributes 229 | copy_attr(self.ema, model, include, exclude) 230 | -------------------------------------------------------------------------------- /yolo5_detect.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from grabscreen import grab_screen 3 | import cv2 4 | import time 5 | import directkeys 6 | import torch 7 | from torch.autograd import Variable 8 | from directkeys import PressKey, ReleaseKey, key_down, key_up 9 | from getkeys import key_check 10 | from utils.torch_utils import select_device, load_classifier, time_synchronized 11 | from utils.general import ( 12 | check_img_size, non_max_suppression, apply_classifier, scale_coords, 13 | xyxy2xywh, plot_one_box, strip_optimizer, set_logging) 14 | from models.experimental import attempt_load 15 | import random 16 | 17 | def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=False, scaleFill=False, scaleup=True): 18 | # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232 19 | shape = img.shape[:2] # current shape [height, width] 20 | if isinstance(new_shape, int): 21 | new_shape = (new_shape, new_shape) 22 | 23 | # Scale ratio (new / old) 24 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 25 | if not scaleup: # only scale down, do not scale up (for better test mAP) 26 | r = min(r, 1.0) 27 | 28 | # Compute padding 29 | ratio = r, r # width, height ratios 30 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 31 | dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding 32 | if auto: # minimum rectangle 33 | dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding 34 | elif scaleFill: # stretch 35 | dw, dh = 0.0, 0.0 36 | new_unpad = (new_shape[1], new_shape[0]) 37 | ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios 38 | 39 | dw /= 2 # divide padding into 2 sides 40 | dh /= 2 41 | 42 | if shape[::-1] != new_unpad: # resize 43 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 44 | top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) 45 | left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) 46 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border 47 | return img, ratio, (dw, dh) 48 | 49 | weights = r'E:\Computer_vision\yolov5\YOLO5\yolov5-master\runs\exp0\weights\best.pt' 50 | device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") 51 | model = attempt_load(weights, map_location=device) # load FP32 model 52 | window_size = (0,0,1200,750) 53 | last_time = time.time() 54 | for i in list(range(5))[::-1]: 55 | print(i + 1) 56 | time.sleep(1) 57 | img_size = 608 58 | paused = False 59 | half = device.type != 'cpu' 60 | view_img = True 61 | save_txt = False 62 | conf_thres = 0.3 63 | iou_thres = 0.2 64 | classes = None 65 | agnostic_nms = True 66 | names = ['hero', 'small_map', "monster", 'money', 'material', 'door', 'BOSS', 'box', 'options'] 67 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] 68 | if half: 69 | model.half() # to FP16 70 | 71 | while (True): 72 | if not paused: 73 | img0 = grab_screen(window_size) 74 | print('loop took {} seconds'.format(time.time() - last_time)) 75 | last_time = time.time() 76 | img0 = cv2.cvtColor(img0, cv2.COLOR_BGRA2BGR) 77 | # Padded resize 78 | img = letterbox(img0, new_shape=img_size)[0] 79 | 80 | # Convert 81 | img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 82 | img = np.ascontiguousarray(img) 83 | 84 | img = torch.from_numpy(img).to(device).unsqueeze(0) 85 | img = img.half() if half else img.float() # uint8 to fp16/32 86 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 87 | 88 | t1 = time_synchronized() 89 | # print(img.shape) 90 | pred = model(img, augment=False)[0] 91 | 92 | # Apply NMS 93 | det = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) 94 | t2 = time_synchronized() 95 | print("inference and NMS time: ", t2 - t1) 96 | gn = torch.tensor(img0.shape)[[1, 0, 1, 0]] 97 | det = det[0] 98 | if det is not None and len(det): 99 | # Rescale boxes from img_size to im0 size 100 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() 101 | 102 | # Print results 103 | for c in det[:, -1].unique(): 104 | n = (det[:, -1] == c).sum() # detections per class 105 | 106 | # Write results 107 | for *xyxy, conf, cls in reversed(det): 108 | # if save_txt: # Write to file 109 | # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh 110 | # with open(txt_path + '.txt', 'a') as f: 111 | # f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format 112 | 113 | if view_img: # Add bbox to image 114 | label = '%s %.2f' % (names[int(cls)], conf) 115 | plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=2) 116 | 117 | img0 = cv2.resize(img0, (600, 375)) 118 | # Stream results 119 | if view_img: 120 | cv2.imshow('window', img0) 121 | if cv2.waitKey(5) & 0xFF == ord('q'): 122 | raise StopIteration 123 | 124 | # Setting pause and unpause 125 | keys = key_check() 126 | if 'P' in keys: 127 | if paused: 128 | paused = False 129 | time.sleep(1) 130 | else: 131 | paused = True 132 | time.sleep(1) -------------------------------------------------------------------------------- /问号模板.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c925777075/yolov5-dnf/a8ec3d885cd8c4aafd4da300e4234f38427ad167/问号模板.npy --------------------------------------------------------------------------------