├── utils ├── __init__.py ├── parse_config.py ├── datasets.py └── utils.py ├── data ├── RoboCup │ ├── .gitignore │ └── anchors │ │ ├── anchorsFinetune3.txt │ │ ├── anchors3.txt │ │ ├── anchors4.txt │ │ └── anchors6.txt └── robo.names ├── checkpoints_old └── .gitignore ├── config ├── robo.data ├── roboFinetune.data ├── robo-2c.cfg ├── robo.cfg ├── robo-hr.cfg └── robo-bn.cfg ├── .idea ├── encodings.xml ├── vcs.xml ├── modules.xml ├── misc.xml ├── PyTorch-YOLOv3.iml └── workspace.xml ├── .gitignore ├── dataMean.py ├── splitSets.py ├── paramSave.py ├── README.md ├── yoloFolder.py ├── YOLOExtractor.py ├── detect.py ├── compute_anchors.py ├── train.py ├── YOLOLabeller.py ├── models.py └── test.py /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/RoboCup/.gitignore: -------------------------------------------------------------------------------- 1 | *.txt 2 | -------------------------------------------------------------------------------- /checkpoints_old/.gitignore: -------------------------------------------------------------------------------- 1 | *.weights 2 | -------------------------------------------------------------------------------- /data/robo.names: -------------------------------------------------------------------------------- 1 | ball 2 | crossing 3 | goalpost 4 | robot 5 | -------------------------------------------------------------------------------- /data/RoboCup/anchors/anchorsFinetune3.txt: -------------------------------------------------------------------------------- 1 | 42.00,39.00, 29.00,16.00, 31.00,109.00, 79.00,106.00 2 | -------------------------------------------------------------------------------- /data/RoboCup/anchors/anchors3.txt: -------------------------------------------------------------------------------- 1 | 19.00,19.00, 90.00,182.00, 32.00,58.00, 21.00,72.00, 13.00,6.00 2 | -------------------------------------------------------------------------------- /data/RoboCup/anchors/anchors4.txt: -------------------------------------------------------------------------------- 1 | 14.00,9.00, 29.00,54.00, 56.00,121.00, 145.00,277.00 2 | 0.512722 3 | -------------------------------------------------------------------------------- /data/RoboCup/anchors/anchors6.txt: -------------------------------------------------------------------------------- 1 | 7.24,2.81, 13.17,11.35, 16.02,44.09, 32.64,21.50, 36.97,81.54, 96.13,182.02 2 | 0.621273 3 | -------------------------------------------------------------------------------- /config/robo.data: -------------------------------------------------------------------------------- 1 | classes= 4 2 | train=./data/RoboCup/train.txt 3 | valid=./data/RoboCup/test.txt 4 | names=data/robo.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .DS_Store 3 | build 4 | .git 5 | *.egg-info 6 | dist 7 | output 8 | data/coco 9 | backup 10 | weights/*.weights 11 | __pycache__ 12 | checkpoints 13 | -------------------------------------------------------------------------------- /config/roboFinetune.data: -------------------------------------------------------------------------------- 1 | classes= 4 2 | train=./data/RoboCup/FinetuneTrain.txt 3 | valid=./data/RoboCup/FinetuneTest.txt 4 | names=data/robo.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | -------------------------------------------------------------------------------- /utils/parse_config.py: -------------------------------------------------------------------------------- 1 | 2 | def parse_data_config(path): 3 | """Parses the data configuration file""" 4 | options = dict() 5 | options['gpus'] = '0,1,2,3' 6 | options['num_workers'] = '10' 7 | with open(path, 'r') as fp: 8 | lines = fp.readlines() 9 | for line in lines: 10 | line = line.strip() 11 | if line == '' or line.startswith('#'): 12 | continue 13 | key, value = line.split('=') 14 | options[key.strip()] = value.strip() 15 | return options 16 | -------------------------------------------------------------------------------- /.idea/PyTorch-YOLOv3.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /dataMean.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import numpy as np 3 | import cv2 4 | 5 | if __name__ == '__main__': 6 | #root = "E:/RoboCup/FinetuneHorizon/train/images/" 7 | root = "E:/RoboCup/YOLO/Finetune/train/" 8 | #root = "E:/RoboCup/train/images/" 9 | 10 | mean = np.zeros(3) 11 | std = np.zeros(3) 12 | 13 | imgs = glob.glob1(root,"*.png") 14 | 15 | for i in imgs: 16 | img = cv2.cvtColor(cv2.imread(root+i),cv2.COLOR_BGR2RGB) 17 | m = np.mean(img,axis=(0,1)) 18 | s = np.sqrt(np.var(img,axis=(0,1))) 19 | mean += m 20 | std += s 21 | 22 | mean /= len(imgs)*255 23 | std /= len(imgs)*255 24 | std = np.sqrt(std) 25 | print(mean,std) -------------------------------------------------------------------------------- /splitSets.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import glob 4 | import cv2 5 | import random 6 | 7 | 8 | if __name__ == "__main__": 9 | 10 | inPath = "/Users/martonszemenyei/Projects/ROBO/data/YOLO/sydney/" 11 | oPathTrain = "/Users/martonszemenyei/Projects/ROBO/data/YOLO/Finetune/train/" 12 | oPathTest = "/Users/martonszemenyei/Projects/ROBO/data/YOLO/Finetune/test/" 13 | 14 | names = sorted(glob.glob1(inPath,"syd*.png")) 15 | labNames = sorted(glob.glob1(inPath,"*.txt")) 16 | 17 | for img,lab in zip(names,labNames): 18 | 19 | r = random.random() 20 | 21 | if r > 0.8: 22 | os.rename(osp.join(inPath,img),osp.join(oPathTest,img)) 23 | os.rename(osp.join(inPath,lab),osp.join(oPathTest,lab)) 24 | else: 25 | os.rename(osp.join(inPath,img),osp.join(oPathTrain,img)) 26 | os.rename(osp.join(inPath,lab),osp.join(oPathTrain,lab)) 27 | 28 | '''for name in names: 29 | img = cv2.imread(oPathTrain+name) 30 | img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) 31 | cv2.imwrite(oPathTrain+name,img)''' -------------------------------------------------------------------------------- /paramSave.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | os.environ['KMP_DUPLICATE_LIB_OK']='True' 5 | from utils.datasets import * 6 | from models import * 7 | 8 | def saveParams( path, model, fName="weights.dat" ): 9 | if not os.path.exists(path): 10 | os.makedirs(path) 11 | params = np.empty(0) 12 | Dict = model.state_dict() 13 | for name in Dict: 14 | param = Dict[name].numpy() 15 | if "num_batches" in name: 16 | continue 17 | param = param.reshape(param.size) 18 | params = np.concatenate((params, param)) 19 | params.tofile(path+"/"+fName) 20 | 21 | if __name__ == "__main__": 22 | 23 | path = "checkpoints/bestFinetuneHR93_34.weights" 24 | 25 | model = ROBO(bn=False,inch=3,halfRes=True) 26 | model.load_state_dict(torch.load(path, map_location={'cuda:0': 'cpu'})) 27 | 28 | saveParams("checkpoints/",model,fName="weightsHR.dat") 29 | 30 | path = "checkpoints/bestFinetune2C93_43.weights" 31 | 32 | model = ROBO(bn=False,inch=2,halfRes=False) 33 | model.load_state_dict(torch.load(path, map_location={'cuda:0': 'cpu'})) 34 | 35 | saveParams("checkpoints/",model,fName="weights2C.dat") 36 | 37 | path = "checkpoints/bestFinetune2CHR93_32.weights" 38 | 39 | model = ROBO(bn=False,inch=2,halfRes=True) 40 | model.load_state_dict(torch.load(path, map_location={'cuda:0': 'cpu'})) 41 | 42 | saveParams("checkpoints/",model,fName="weights2CHR.dat") 43 | 44 | path = "checkpoints/bestFinetuneBN97_79.weights" 45 | 46 | model = ROBO(bn=True,inch=3,halfRes=False) 47 | model.load_state_dict(torch.load(path, map_location={'cuda:0': 'cpu'})) 48 | 49 | saveParams("checkpoints/",model,fName="weightsBN.dat") 50 | 51 | path = "checkpoints/bestFinetune93_41.weights" 52 | 53 | model = ROBO(bn=False,inch=3,halfRes=False) 54 | model.load_state_dict(torch.load(path, map_location={'cuda:0': 'cpu'})) 55 | 56 | saveParams("checkpoints/",model,fName="weights.dat") -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ROBO 2 | Robust Real-time Object Detection for the Nao Robots 3 | 4 | ## Introduction 5 | This Repo contains the code for our submission for the RoboCup 2019 Symposium. It allows you to train your own models, evaluate, or to use our pre-trained models. Some of the code is based on this excellent repo: https://github.com/eriklindernoren/PyTorch-YOLOv3 6 | 7 | The code and dataset for ROBO-UNet and its variants is found [here](https://github.com/szemenyeim/RoboCupVision). 8 | 9 | ## Requirements 10 | 11 | - PyTorch 1.0 12 | - Progressbar 13 | 14 | ## Dataset and Pretrained models 15 | The datasets contain images in YUV format, using 512x384 resultion. For every image, the annotations are found in a .txt file of the same name (YOLO format). You can download the dataset used from the following links: 16 | 17 | - [Synthetic train](https://deeplearning.iit.bme.hu/Public/ROBO/ROBO_Train.zip) 18 | - [Synthetic test](https://deeplearning.iit.bme.hu/Public/ROBO/ROBO_Test.zip) 19 | - [Finetune](https://deeplearning.iit.bme.hu/Public/ROBO/ROBO_Finetune.zip) 20 | 21 | The pretrained models are available from [here](https://deeplearning.iit.bme.hu/Public/ROBO/checkpoints.zip) 22 | 23 | ## Train your own models 24 | 1. To train your own models, first extract the dataset to a folder of your choice. 25 | 2. Regenerate the train and test image lists by running `python yoloFolder.py --root ` 26 | 3. Run the training on the synthetic database using `python train.py` 27 | 4. Finetune on the real database with `python train.py --finetune` 28 | 29 | You have several other options to use: 30 | 31 | `--bn` trains the ROBO-BN model 32 | 33 | `--yu` uses only 2 input channels `(Y and (U+V)/2)` 34 | 35 | `--hr` trains the ROBO-HR model 36 | 37 | `--transfer ` trains only the first N layers on the real database, fintunes the rest 38 | 39 | ## Evaluate and detect 40 | Run `python test.py` to evaluate the model, and `python detect.py` to perform detection on the datasets. These scripts have the same input arguments as the train script. 41 | 42 | ## Export your models for RoboDNN 43 | You can run your models on the Nao robot using RoboDNN. You can export weights files using the paramSave.py script. The correcsponding config files are in the config subfolder. 44 | -------------------------------------------------------------------------------- /yoloFolder.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import cv2 3 | import argparse 4 | import os.path as osp 5 | 6 | def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA): 7 | # initialize the dimensions of the image to be resized and 8 | # grab the image size 9 | dim = None 10 | (h, w) = image.shape[:2] 11 | 12 | # if both the width and height are None, then return the 13 | # original image 14 | if width is None and height is None: 15 | return image 16 | 17 | # check to see if the width is None 18 | if width is None: 19 | # calculate the ratio of the height and construct the 20 | # dimensions 21 | r = height / float(h) 22 | dim = (int(w * r), height) 23 | 24 | # otherwise, the height is None 25 | else: 26 | # calculate the ratio of the width and construct the 27 | # dimensions 28 | r = width / float(w) 29 | dim = (width, int(h * r)) 30 | 31 | # resize the image 32 | resized = cv2.resize(image, dim, interpolation = inter) 33 | 34 | # return the resized image 35 | return resized 36 | 37 | if __name__ =="__main__": 38 | 39 | parser = argparse.ArgumentParser() 40 | parser.add_argument("--root", help="Path pointing to the YOLO folder", type=str,required=True) 41 | opt = parser.parse_args() 42 | root = opt.root 43 | 44 | trPath = osp.join(root,"YOLO/Train/") 45 | trFile = "./data/RoboCup/train.txt" 46 | 47 | with open(trFile,"w+") as file: 48 | for fName in glob.glob1(trPath,"*.png"): 49 | file.write(trPath+fName + "\n") 50 | file.close() 51 | 52 | tePath = osp.join(root,"YOLO/Test/") 53 | teFile = "./data/RoboCup/test.txt" 54 | 55 | with open(teFile, "w+") as file: 56 | for fName in glob.glob1(tePath, "*.png"): 57 | file.write(tePath + fName + "\n") 58 | file.close() 59 | 60 | trPath = osp.join(root,"YOLO/Finetune/train/") 61 | trFile = "./data/RoboCup/FinetuneTrain.txt" 62 | 63 | with open(trFile,"w+") as file: 64 | for fName in glob.glob1(trPath,"*.png"): 65 | file.write(trPath+fName + "\n") 66 | file.close() 67 | 68 | tePath = osp.join(root,"YOLO/Finetune/test/") 69 | teFile = "./data/RoboCup/FinetuneTest.txt" 70 | 71 | with open(teFile, "w+") as file: 72 | for fName in glob.glob1(tePath, "*.png"): 73 | file.write(tePath + fName + "\n") 74 | file.close() 75 | 76 | -------------------------------------------------------------------------------- /config/robo-2c.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | mean = 0.4637419,0.47166784,0.48316576 3 | std = 0.45211827,0.16890674,0.18645908 4 | width = 512 5 | height = 384 6 | channels = 2 7 | 8 | [convolutional] 9 | filters=4 10 | size=3 11 | stride=2 12 | pad=1 13 | activation=linear 14 | hasBias = false 15 | 16 | [batchnorm] 17 | activation = leaky 18 | 19 | [convolutional] 20 | filters=8 21 | size=3 22 | stride=2 23 | pad=1 24 | activation=linear 25 | hasBias = false 26 | 27 | [batchnorm] 28 | activation = leaky 29 | 30 | [convolutional] 31 | filters=16 32 | size=3 33 | stride=2 34 | pad=1 35 | activation=linear 36 | hasBias = false 37 | 38 | [batchnorm] 39 | activation = leaky 40 | 41 | [convolutional] 42 | filters=16 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=linear 47 | hasBias = false 48 | 49 | [batchnorm] 50 | activation = leaky 51 | 52 | [convolutional] 53 | filters=32 54 | size=3 55 | stride=2 56 | pad=1 57 | activation=linear 58 | hasBias = false 59 | 60 | [batchnorm] 61 | activation = leaky 62 | 63 | [convolutional] 64 | filters=32 65 | size=3 66 | stride=1 67 | pad=1 68 | activation=linear 69 | hasBias = false 70 | 71 | [batchnorm] 72 | activation = leaky 73 | 74 | [convolutional] 75 | filters=64 76 | size=3 77 | stride=2 78 | pad=1 79 | activation=linear 80 | hasBias = false 81 | 82 | [batchnorm] 83 | activation = leaky 84 | 85 | [convolutional] 86 | filters=64 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=linear 91 | hasBias = false 92 | 93 | [batchnorm] 94 | activation = leaky 95 | 96 | [convolutional] 97 | filters=64 98 | size=3 99 | stride=1 100 | pad=1 101 | activation=linear 102 | hasBias = false 103 | 104 | [batchnorm] 105 | activation = leaky 106 | 107 | [convolutional] 108 | filters=64 109 | size=3 110 | stride=1 111 | pad=1 112 | activation=linear 113 | hasBias = false 114 | 115 | [batchnorm] 116 | activation = leaky 117 | 118 | [convolutional] 119 | filters=64 120 | size=3 121 | stride=1 122 | pad=1 123 | activation=linear 124 | hasBias = false 125 | 126 | [batchnorm] 127 | activation = leaky 128 | 129 | [convolutional] 130 | filters=128 131 | size=3 132 | stride=2 133 | pad=1 134 | activation=linear 135 | hasBias = false 136 | 137 | [batchnorm] 138 | activation = leaky 139 | 140 | [convolutional] 141 | filters=64 142 | size=3 143 | stride=1 144 | pad=1 145 | activation=linear 146 | hasBias = false 147 | 148 | [batchnorm] 149 | activation = leaky 150 | 151 | [convolutional] 152 | filters=128 153 | size=3 154 | stride=1 155 | pad=1 156 | activation=linear 157 | hasBias = false 158 | 159 | [batchnorm] 160 | activation = leaky 161 | 162 | [convolutional] 163 | filters=64 164 | size=3 165 | stride=1 166 | pad=1 167 | activation=linear 168 | hasBias = false 169 | 170 | [batchnorm] 171 | activation = leaky 172 | 173 | [convolutional] 174 | filters=128 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=linear 179 | hasBias = false 180 | 181 | [batchnorm] 182 | activation = leaky 183 | 184 | [route] 185 | from = 21 186 | 187 | [convolutional] 188 | size=1 189 | stride=1 190 | pad=0 191 | filters=10 192 | activation=linear 193 | 194 | [route] 195 | from = 31 196 | 197 | [convolutional] 198 | size=1 199 | stride=1 200 | pad=0 201 | filters=10 202 | activation=linear 203 | 204 | [concat] 205 | from = 33 206 | oned = 1 207 | -------------------------------------------------------------------------------- /config/robo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | mean = 0.4637419,0.47166784,0.48316576 3 | std = 0.45211827,0.16890674,0.18645908 4 | width = 512 5 | height = 384 6 | channels = 3 7 | 8 | [convolutional] 9 | filters=4 10 | size=3 11 | stride=2 12 | pad=1 13 | activation=linear 14 | hasBias = false 15 | 16 | [batchnorm] 17 | activation = leaky 18 | 19 | [convolutional] 20 | filters=8 21 | size=3 22 | stride=2 23 | pad=1 24 | activation=linear 25 | hasBias = false 26 | 27 | [batchnorm] 28 | activation = leaky 29 | 30 | [convolutional] 31 | filters=16 32 | size=3 33 | stride=2 34 | pad=1 35 | activation=linear 36 | hasBias = false 37 | 38 | [batchnorm] 39 | activation = leaky 40 | 41 | [convolutional] 42 | filters=16 43 | size=3 44 | stride=1 45 | pad=1 46 | activation=linear 47 | hasBias = false 48 | 49 | [batchnorm] 50 | activation = leaky 51 | 52 | [convolutional] 53 | filters=32 54 | size=3 55 | stride=2 56 | pad=1 57 | activation=linear 58 | hasBias = false 59 | 60 | [batchnorm] 61 | activation = leaky 62 | 63 | [convolutional] 64 | filters=32 65 | size=3 66 | stride=1 67 | pad=1 68 | activation=linear 69 | hasBias = false 70 | 71 | [batchnorm] 72 | activation = leaky 73 | 74 | [convolutional] 75 | filters=64 76 | size=3 77 | stride=2 78 | pad=1 79 | activation=linear 80 | hasBias = false 81 | 82 | [batchnorm] 83 | activation = leaky 84 | 85 | [convolutional] 86 | filters=64 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=linear 91 | hasBias = false 92 | 93 | [batchnorm] 94 | activation = leaky 95 | 96 | [convolutional] 97 | filters=64 98 | size=3 99 | stride=1 100 | pad=1 101 | activation=linear 102 | hasBias = false 103 | 104 | [batchnorm] 105 | activation = leaky 106 | 107 | [convolutional] 108 | filters=64 109 | size=3 110 | stride=1 111 | pad=1 112 | activation=linear 113 | hasBias = false 114 | 115 | [batchnorm] 116 | activation = leaky 117 | 118 | [convolutional] 119 | filters=64 120 | size=3 121 | stride=1 122 | pad=1 123 | activation=linear 124 | hasBias = false 125 | 126 | [batchnorm] 127 | activation = leaky 128 | 129 | [convolutional] 130 | filters=128 131 | size=3 132 | stride=2 133 | pad=1 134 | activation=linear 135 | hasBias = false 136 | 137 | [batchnorm] 138 | activation = leaky 139 | 140 | [convolutional] 141 | filters=64 142 | size=3 143 | stride=1 144 | pad=1 145 | activation=linear 146 | hasBias = false 147 | 148 | [batchnorm] 149 | activation = leaky 150 | 151 | [convolutional] 152 | filters=128 153 | size=3 154 | stride=1 155 | pad=1 156 | activation=linear 157 | hasBias = false 158 | 159 | [batchnorm] 160 | activation = leaky 161 | 162 | [convolutional] 163 | filters=64 164 | size=3 165 | stride=1 166 | pad=1 167 | activation=linear 168 | hasBias = false 169 | 170 | [batchnorm] 171 | activation = leaky 172 | 173 | [convolutional] 174 | filters=128 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=linear 179 | hasBias = false 180 | 181 | [batchnorm] 182 | activation = leaky 183 | 184 | [route] 185 | from = 21 186 | 187 | [convolutional] 188 | size=1 189 | stride=1 190 | pad=0 191 | filters=10 192 | activation=linear 193 | 194 | [route] 195 | from = 31 196 | 197 | [convolutional] 198 | size=1 199 | stride=1 200 | pad=0 201 | filters=10 202 | activation=linear 203 | 204 | [concat] 205 | from = 33 206 | oned = 1 207 | -------------------------------------------------------------------------------- /config/robo-hr.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | mean = 0.4637419,0.47166784,0.48316576 3 | std = 0.45211827,0.16890674,0.18645908 4 | width = 256 5 | height = 192 6 | channels = 3 7 | 8 | [convolutional] 9 | filters=8 10 | size=3 11 | stride=2 12 | pad=1 13 | activation=linear 14 | hasBias = false 15 | 16 | [batchnorm] 17 | activation = leaky 18 | 19 | [convolutional] 20 | filters=16 21 | size=3 22 | stride=2 23 | pad=1 24 | activation=linear 25 | hasBias = false 26 | 27 | [batchnorm] 28 | activation = leaky 29 | 30 | [convolutional] 31 | filters=16 32 | size=3 33 | stride=1 34 | pad=1 35 | activation=linear 36 | hasBias = false 37 | 38 | [batchnorm] 39 | activation = leaky 40 | 41 | [convolutional] 42 | filters=32 43 | size=3 44 | stride=2 45 | pad=1 46 | activation=linear 47 | hasBias = false 48 | 49 | [batchnorm] 50 | activation = leaky 51 | 52 | [convolutional] 53 | filters=32 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=linear 58 | hasBias = false 59 | 60 | [batchnorm] 61 | activation = leaky 62 | 63 | [convolutional] 64 | filters=64 65 | size=3 66 | stride=2 67 | pad=1 68 | activation=linear 69 | hasBias = false 70 | 71 | [batchnorm] 72 | activation = leaky 73 | 74 | [convolutional] 75 | filters=64 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=linear 80 | hasBias = false 81 | 82 | [batchnorm] 83 | activation = leaky 84 | 85 | [convolutional] 86 | filters=64 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=linear 91 | hasBias = false 92 | 93 | [batchnorm] 94 | activation = leaky 95 | 96 | [convolutional] 97 | filters=64 98 | size=3 99 | stride=1 100 | pad=1 101 | activation=linear 102 | hasBias = false 103 | 104 | [batchnorm] 105 | activation = leaky 106 | 107 | [convolutional] 108 | filters=64 109 | size=3 110 | stride=1 111 | pad=1 112 | activation=linear 113 | hasBias = false 114 | 115 | [batchnorm] 116 | activation = leaky 117 | 118 | [convolutional] 119 | filters=128 120 | size=3 121 | stride=2 122 | pad=1 123 | activation=linear 124 | hasBias = false 125 | 126 | [batchnorm] 127 | activation = leaky 128 | 129 | [convolutional] 130 | filters=64 131 | size=3 132 | stride=1 133 | pad=1 134 | activation=linear 135 | hasBias = false 136 | 137 | [batchnorm] 138 | activation = leaky 139 | 140 | [convolutional] 141 | filters=128 142 | size=3 143 | stride=1 144 | pad=1 145 | activation=linear 146 | hasBias = false 147 | 148 | [batchnorm] 149 | activation = leaky 150 | 151 | [convolutional] 152 | filters=64 153 | size=3 154 | stride=1 155 | pad=1 156 | activation=linear 157 | hasBias = false 158 | 159 | [batchnorm] 160 | activation = leaky 161 | 162 | [convolutional] 163 | filters=128 164 | size=3 165 | stride=1 166 | pad=1 167 | activation=linear 168 | hasBias = false 169 | 170 | [batchnorm] 171 | activation = leaky 172 | 173 | [route] 174 | from = 19 175 | 176 | [convolutional] 177 | size=1 178 | stride=1 179 | pad=0 180 | filters=10 181 | activation=linear 182 | 183 | [route] 184 | from = 29 185 | 186 | [convolutional] 187 | size=1 188 | stride=1 189 | pad=0 190 | filters=10 191 | activation=linear 192 | 193 | [concat] 194 | from = 31 195 | oned = 1 196 | -------------------------------------------------------------------------------- /YOLOExtractor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import cv2 4 | import numpy as np 5 | 6 | imgPath = "E:/RoboCup/YOLO/Train/" 7 | labelPath = "E:/RoboCup/YOLO/Masks/Train/" 8 | 9 | labelDict = {} 10 | legendDict = {} 11 | 12 | 13 | def loadLabelConfig(): 14 | """ 15 | Reads LabelConfig.txt to dictionary 16 | """ 17 | with open(labelPath + "LabelConfig.cfg") as file: 18 | data = file.readlines() 19 | data = [x.replace("\n", "") for x in data] 20 | data = [x.split(":") for x in data] 21 | for i in data: 22 | labelDict[i[0]] = i[1] 23 | 24 | 25 | def readLegendFile(): 26 | """ 27 | Loads the legend file generated by UETrainingSetGenerator into a 28 | dictionary structure 29 | """ 30 | with open(labelPath + "segmentationLegend.leg", "r") as currFile: 31 | fileData = currFile.readline().split(" ") 32 | currLegendIndex = 0 33 | for i in fileData: 34 | i = i.split(":") 35 | if (len(i) < 2): # catching occunring whitespaces at file endings 36 | continue 37 | 38 | currLegendIndex += int(i[0]) 39 | legendDict[str(currLegendIndex)] = i[1] 40 | 41 | 42 | def getTag(key): 43 | legendKeyArray = sorted(map(int, legendDict.keys())) 44 | for legendKey in legendKeyArray: 45 | if (key - 1 < legendKey): 46 | return (legendDict[str(legendKey)]) 47 | 48 | 49 | def getLabel(key): 50 | currTag = getTag(key) 51 | return (int(labelDict[currTag])) 52 | 53 | 54 | def processMask(maskName, imageHeight = 480): 55 | """ 56 | Processes given maskFile into 2d-array structure. 57 | """ 58 | maskArray = [] 59 | with open(labelPath + maskName, "r") as currFile: 60 | for i in range(imageHeight): # 480 61 | # read line from segMaskFile 62 | currLineData = currFile.readline() 63 | # gather segNames from File 64 | currLineData = currLineData.split(" ") 65 | maskArray.append(currLineData[:-1]) 66 | return maskArray 67 | 68 | import re 69 | 70 | def sorted_nicely( l ): 71 | """ Sort the given iterable in the way that humans expect.""" 72 | convert = lambda text: int(text) if text.isdigit() else text 73 | alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 74 | return sorted(l, key = alphanum_key) 75 | 76 | if __name__ == "__main__": 77 | 78 | readLegendFile() 79 | loadLabelConfig() 80 | 81 | labels = sorted_nicely(glob.glob1(labelPath,"*.txt")) 82 | images = sorted_nicely(glob.glob1(imgPath,"*.png")) 83 | 84 | for i,(imageN,labelN) in enumerate(zip(images,labels)): 85 | print(i) 86 | label = np.array(processMask(labelN),'uint8') 87 | file = open(imgPath + imageN.split(".")[0] + ".txt","w+") 88 | for i in range(1,62): 89 | a = np.where(label == i) 90 | if a[0].size == 0 or a[1].size == 0: 91 | continue 92 | bbox = getLabel(i)-1, (np.max(a[1])+np.min(a[1]))/1280.0, (np.max(a[0])+np.min(a[0]))/960.0, \ 93 | (np.max(a[1])-np.min(a[1]))/640.0, (np.max(a[0])-np.min(a[0]))/480.0 94 | if bbox[0] < 0: 95 | continue 96 | if bbox[3] > 0.012 or bbox[4] > 0.015: 97 | for elem in bbox: 98 | file.write(str(elem)) 99 | file.write(" ") 100 | file.write("\n") 101 | file.close() 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /config/robo-bn.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | mean = 0.4637419,0.47166784,0.48316576 3 | std = 0.45211827,0.16890674,0.18645908 4 | width = 512 5 | height = 384 6 | channels = 3 7 | 8 | [convolutional] 9 | filters=8 10 | size=3 11 | stride=2 12 | pad=1 13 | activation=linear 14 | hasBias = false 15 | 16 | [batchnorm] 17 | activation = leaky 18 | 19 | [convolutional] 20 | filters=16 21 | size=3 22 | stride=2 23 | pad=1 24 | activation=linear 25 | hasBias = false 26 | 27 | [batchnorm] 28 | activation = leaky 29 | 30 | [convolutional] 31 | filters=32 32 | size=3 33 | stride=2 34 | pad=1 35 | activation=linear 36 | hasBias = false 37 | 38 | [batchnorm] 39 | activation = leaky 40 | 41 | [convolutional] 42 | filters=16 43 | size=1 44 | stride=1 45 | pad=0 46 | activation=linear 47 | hasBias = false 48 | 49 | [batchnorm] 50 | activation = leaky 51 | 52 | [convolutional] 53 | filters=32 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=linear 58 | hasBias = false 59 | 60 | [batchnorm] 61 | activation = leaky 62 | 63 | [convolutional] 64 | filters=64 65 | size=3 66 | stride=2 67 | pad=1 68 | activation=linear 69 | hasBias = false 70 | 71 | [batchnorm] 72 | activation = leaky 73 | 74 | [convolutional] 75 | filters=32 76 | size=1 77 | stride=1 78 | pad=0 79 | activation=linear 80 | hasBias = false 81 | 82 | [batchnorm] 83 | activation = leaky 84 | 85 | [convolutional] 86 | filters=64 87 | size=3 88 | stride=1 89 | pad=1 90 | activation=linear 91 | hasBias = false 92 | 93 | [batchnorm] 94 | activation = leaky 95 | 96 | [convolutional] 97 | filters=128 98 | size=3 99 | stride=2 100 | pad=1 101 | activation=linear 102 | hasBias = false 103 | 104 | [batchnorm] 105 | activation = leaky 106 | 107 | [convolutional] 108 | filters=64 109 | size=1 110 | stride=1 111 | pad=0 112 | activation=linear 113 | hasBias = false 114 | 115 | [batchnorm] 116 | activation = leaky 117 | 118 | [convolutional] 119 | filters=128 120 | size=3 121 | stride=1 122 | pad=1 123 | activation=linear 124 | hasBias = false 125 | 126 | [batchnorm] 127 | activation = leaky 128 | 129 | [convolutional] 130 | filters=64 131 | size=1 132 | stride=1 133 | pad=0 134 | activation=linear 135 | hasBias = false 136 | 137 | [batchnorm] 138 | activation = leaky 139 | 140 | [convolutional] 141 | filters=128 142 | size=3 143 | stride=1 144 | pad=1 145 | activation=linear 146 | hasBias = false 147 | 148 | [batchnorm] 149 | activation = leaky 150 | 151 | [convolutional] 152 | filters=256 153 | size=3 154 | stride=2 155 | pad=1 156 | activation=linear 157 | hasBias = false 158 | 159 | [batchnorm] 160 | activation = leaky 161 | 162 | [convolutional] 163 | filters=128 164 | size=1 165 | stride=1 166 | pad=0 167 | activation=linear 168 | hasBias = false 169 | 170 | [batchnorm] 171 | activation = leaky 172 | 173 | [convolutional] 174 | filters=256 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=linear 179 | hasBias = false 180 | 181 | [batchnorm] 182 | activation = leaky 183 | 184 | [convolutional] 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=0 189 | activation=linear 190 | hasBias = false 191 | 192 | [batchnorm] 193 | activation = leaky 194 | 195 | [convolutional] 196 | filters=256 197 | size=3 198 | stride=1 199 | pad=1 200 | activation=linear 201 | hasBias = false 202 | 203 | [batchnorm] 204 | activation = leaky 205 | 206 | [route] 207 | from = 25 208 | 209 | [convolutional] 210 | size=1 211 | stride=1 212 | pad=0 213 | filters=10 214 | activation=linear 215 | 216 | [route] 217 | from = 35 218 | 219 | [convolutional] 220 | size=1 221 | stride=1 222 | pad=0 223 | filters=10 224 | activation=linear 225 | 226 | [concat] 227 | from = 37 228 | oned = 1 229 | -------------------------------------------------------------------------------- /detect.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | from models import * 4 | from utils.utils import * 5 | from utils.datasets import * 6 | 7 | import os 8 | import sys 9 | import time 10 | import datetime 11 | import argparse 12 | import cv2 13 | 14 | import torch 15 | from torch.utils.data import DataLoader 16 | from torchvision import datasets 17 | from torch.autograd import Variable 18 | import progressbar 19 | 20 | 21 | if __name__ == '__main__': 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--weights_path', type=str, default='checkpoints/DBestFinetunePruned.weights', help='path to weights file') 24 | parser.add_argument('--class_path', type=str, default='data/robo.names', help='path to class label file') 25 | parser.add_argument('--conf_thres', type=float, default=0.5, help='object confidence threshold') 26 | parser.add_argument('--nms_thres', type=float, default=0.4, help='iou thresshold for non-maximum suppression') 27 | parser.add_argument('--batch_size', type=int, default=1, help='size of the batches') 28 | parser.add_argument('--n_cpu', type=int, default=4, help='number of cpu threads to use during batch generation') 29 | parser.add_argument('--img_size', type=int, default=(384,512), help='size of each image dimension') 30 | parser.add_argument("--finetune", help="Finetuning", action="store_true", default=False) 31 | parser.add_argument("--bn", help="Use bottleneck", action="store_true", default=False) 32 | parser.add_argument("--yu", help="Use 2 channels", action="store_true", default=False) 33 | parser.add_argument("--hr", help="Use half res", action="store_true", default=False) 34 | opt = parser.parse_args() 35 | print(opt) 36 | 37 | cuda = torch.cuda.is_available() 38 | 39 | image_folder = "E:/RoboCup/YOLO/Finetune/test/" if opt.finetune else "E:/RoboCup/YOLO/Test/" 40 | 41 | weights_path = "checkpoints/bestFinetune" if opt.finetune else "checkpoints/best" 42 | 43 | if opt.yu: 44 | weights_path += "2C" 45 | if opt.bn: 46 | weights_path += "BN" 47 | if opt.hr: 48 | weights_path += "HR" 49 | 50 | weights_path += ".weights" 51 | 52 | os.makedirs('output', exist_ok=True) 53 | 54 | # Set up model 55 | channels = 2 if opt.yu else 3 56 | model = ROBO(inch=channels,bn=opt.bn,halfRes=opt.hr) 57 | model.load_state_dict(torch.load(weights_path,map_location={'cuda:0': 'cpu'})) 58 | 59 | print(count_zero_weights(model)) 60 | 61 | if cuda: 62 | model.cuda() 63 | 64 | model.eval() # Set in evaluation mode 65 | 66 | dataloader = DataLoader(ImageFolder(image_folder, synth=opt.finetune, type='%s/*.png', yu=opt.yu, hr=opt.hr), 67 | batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_cpu) 68 | 69 | classes = load_classes(opt.class_path) # Extracts class labels from file 70 | 71 | Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor 72 | 73 | imgs = [] # Stores image paths 74 | img_detections = [] # Stores detections for each image index 75 | 76 | print ('\nPerforming object detection:') 77 | bar = progressbar.ProgressBar(0, len(dataloader), redirect_stdout=False) 78 | elapsed_time = 0 79 | for batch_i, (img_paths, input_imgs) in enumerate(dataloader): 80 | # Configure input 81 | input_imgs = input_imgs.type(Tensor) 82 | 83 | # Get detections 84 | with torch.no_grad(): 85 | start_time = time.time() 86 | detections = model(input_imgs) 87 | elapsed_time += time.time() - start_time 88 | detections = non_max_suppression(detections, 80, opt.conf_thres, opt.nms_thres) 89 | 90 | # Log progress 91 | bar.update(batch_i) 92 | 93 | # Save image and detections 94 | imgs.extend(img_paths) 95 | img_detections.extend(detections) 96 | 97 | bar.finish() 98 | print("\nAverage time: %.2f" % (elapsed_time*1000/len(dataloader))) 99 | print ('\nSaving images:') 100 | # Iterate through images and save plot of detections 101 | bar = progressbar.ProgressBar(0, len(imgs), redirect_stdout=False) 102 | for img_i, (path, detections) in enumerate(zip(imgs, img_detections)): 103 | 104 | # Create plot 105 | img = np.array(Image.open(path).convert('RGB')) 106 | 107 | # The amount of padding that was added 108 | pad_x = 0 109 | pad_y = 0 110 | # Image height and width after padding is removed 111 | unpad_h = opt.img_size[0] - pad_y 112 | unpad_w = opt.img_size[1] - pad_x 113 | 114 | img = cv2.cvtColor(img,cv2.COLOR_YUV2BGR) 115 | 116 | # Draw bounding boxes and labels of detections 117 | if detections is not None: 118 | unique_labels = detections[:, -1].cpu().unique() 119 | n_cls_preds = len(unique_labels) 120 | bbox_colors = [(0,0,255),(255,0,255),(255,0,0),(0,255,255)] 121 | for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections: 122 | 123 | # Rescale coordinates to original dimensions 124 | box_h = ((y2 - y1) / unpad_h) * img.shape[0] 125 | box_w = ((x2 - x1) / unpad_w) * img.shape[1] 126 | y1 = (y1 - pad_y // 2) * 1 127 | x1 = (x1 - pad_x // 2) * 1 128 | y2 = (y2 - pad_y // 2) * 1 129 | x2 = (x2 - pad_x // 2) * 1 130 | 131 | color = bbox_colors[int(cls_pred)] 132 | # Create a Rectangle patch 133 | cv2.rectangle(img,(x1,y1),(x2,y2),color,2) 134 | 135 | # Save generated image with detections 136 | cv2.imwrite('output/%d.png' % (img_i),img) 137 | bar.update(img_i) 138 | bar.finish() 139 | -------------------------------------------------------------------------------- /compute_anchors.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Feb 20, 2017 3 | @author: jumabek 4 | ''' 5 | from os import listdir 6 | from os.path import isfile, join 7 | import argparse 8 | # import cv2 9 | import numpy as np 10 | import sys 11 | import os 12 | from scipy.cluster.vq import kmeans, whiten 13 | import shutil 14 | import random 15 | import math 16 | 17 | width_in_cfg_file = 416. 18 | height_in_cfg_file = 416. 19 | 20 | 21 | def IOU(x, centroids): 22 | similarities = [] 23 | k = len(centroids) 24 | for centroid in centroids: 25 | c_w, c_h = centroid 26 | w, h = x 27 | if c_w >= w and c_h >= h: 28 | similarity = w * h / (c_w * c_h) 29 | elif c_w >= w and c_h <= h: 30 | similarity = w * c_h / (w * h + (c_w - w) * c_h) 31 | elif c_w <= w and c_h >= h: 32 | similarity = c_w * h / (w * h + c_w * (c_h - h)) 33 | else: # means both w,h are bigger than c_w and c_h respectively 34 | similarity = (c_w * c_h) / (w * h) 35 | similarities.append(similarity) # will become (k,) shape 36 | return np.array(similarities) 37 | 38 | 39 | def avg_IOU(X, centroids): 40 | n, d = X.shape 41 | sum = 0. 42 | for i in range(X.shape[0]): 43 | # note IOU() will return array which contains IoU for each centroid and X[i] // slightly ineffective, but I am too lazy 44 | sum += max(IOU(X[i], centroids)) 45 | return sum / n 46 | 47 | 48 | def write_anchors_to_file(centroids, X, anchor_file): 49 | f = open(anchor_file, 'w') 50 | 51 | anchors = centroids.copy() 52 | print(anchors.shape) 53 | 54 | for i in range(anchors.shape[0]): 55 | anchors[i][0] = round(anchors[i][0]*512) 56 | anchors[i][1] = round(anchors[i][1]*384) 57 | 58 | 59 | print('Anchors = ', anchors) 60 | 61 | for i in range(anchors.shape[0]-1): 62 | f.write('%0.2f,%0.2f, ' % (anchors[i, 0], anchors[i, 1])) 63 | 64 | # there should not be comma after last anchor, that's why 65 | f.write('%0.2f,%0.2f\n' % (anchors[-1, 0], anchors[-1, 1])) 66 | 67 | if X is not None: 68 | f.write('%f\n' % (avg_IOU(X, centroids))) 69 | print() 70 | 71 | 72 | def kmeans2(X, centroids, eps, anchor_file): 73 | N = X.shape[0] 74 | iterations = 0 75 | k, dim = centroids.shape 76 | prev_assignments = np.ones(N) * (-1) 77 | iter = 0 78 | old_D = np.zeros((N, k)) 79 | 80 | while True: 81 | D = [] 82 | iter += 1 83 | for i in range(N): 84 | d = 1 - IOU(X[i], centroids) 85 | D.append(d) 86 | D = np.array(D) # D.shape = (N,k) 87 | 88 | print("iter {}: dists = {}".format(iter, np.sum(np.abs(old_D - D)))) 89 | 90 | # assign samples to centroids 91 | assignments = np.argmin(D, axis=1) 92 | 93 | if (assignments == prev_assignments).all(): 94 | print("Centroids = ", centroids) 95 | write_anchors_to_file(centroids, X, anchor_file) 96 | return 97 | 98 | # calculate new centroids 99 | centroid_sums = np.zeros((k, dim), np.float) 100 | for i in range(N): 101 | centroid_sums[assignments[i]] += X[i] 102 | for j in range(k): 103 | centroids[j] = centroid_sums[j] / (np.sum(assignments == j)) 104 | 105 | prev_assignments = assignments.copy() 106 | old_D = D.copy() 107 | 108 | 109 | def main(argv): 110 | parser = argparse.ArgumentParser() 111 | parser.add_argument('-filelist', default='./data/RoboCup/FinetuneTrain.txt', 112 | help='path to filelist\n') 113 | parser.add_argument('-output_dir', default='./data/RoboCup/anchors', type=str, 114 | help='Output anchor directory\n') 115 | parser.add_argument('-num_clusters', default=3, type=int, 116 | help='number of clusters\n') 117 | 118 | args = parser.parse_args() 119 | 120 | nclass = 4 121 | 122 | if not os.path.exists(args.output_dir): 123 | os.mkdir(args.output_dir) 124 | 125 | f = open(args.filelist) 126 | 127 | lines = [line.rstrip('\n') for line in f.readlines()] 128 | 129 | annotation_dims = [] 130 | for i in range(nclass): 131 | annotation_dims.append([]) 132 | 133 | size = np.zeros((1, 1, 3)) 134 | for line in lines: 135 | 136 | # line = line.replace('images','labels') 137 | # line = line.replace('img1','labels') 138 | line = line.replace('JPEGImages', 'labels') 139 | 140 | line = line.replace('.jpg', '.txt') 141 | line = line.replace('.png', '.txt') 142 | print(line) 143 | f2 = open(line) 144 | for line in f2.readlines(): 145 | line = line.rstrip('\n') 146 | c, _, _, w, h = line.split(' ') 147 | # print(w,h) 148 | annotation_dims[int(c)].append(tuple(map(float, (w, h)))) 149 | 150 | anchors = np.zeros([nclass,2]) 151 | for i in range(nclass): 152 | dims = np.array(annotation_dims[i]) 153 | anchors[i] = np.mean(dims,0) 154 | anchor_file = join(args.output_dir, 'anchorsFinetune%d.txt' % (args.num_clusters)) 155 | write_anchors_to_file(anchors,None,anchor_file) 156 | 157 | 158 | '''annotation_dims = np.array(annotation_dims) 159 | 160 | eps = 0.005 161 | 162 | if args.num_clusters == 0: 163 | for num_clusters in range(1, 11): # we make 1 through 10 clusters 164 | anchor_file = join(args.output_dir, 'anchors%d.txt' % (num_clusters)) 165 | 166 | indices = [random.randrange(annotation_dims.shape[0]) for i in range(num_clusters)] 167 | centroids = annotation_dims[indices] 168 | kmeans(annotation_dims, centroids, eps, anchor_file) 169 | print('centroids.shape', centroids.shape) 170 | else: 171 | anchor_file = join(args.output_dir, 'anchors%d.txt' % (args.num_clusters)) 172 | indices = [random.randrange(annotation_dims.shape[0]) for i in range(args.num_clusters)] 173 | centroids = annotation_dims[indices] 174 | kmeans(annotation_dims, centroids, eps, anchor_file) 175 | print('centroids.shape', centroids.shape)''' 176 | 177 | 178 | if __name__ == "__main__": 179 | main(sys.argv) -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | from models import * 4 | from utils.utils import * 5 | from utils.datasets import * 6 | from utils.parse_config import * 7 | 8 | import os 9 | import argparse 10 | 11 | import torch 12 | from torch.utils.data import DataLoader 13 | import torch.optim as optim 14 | 15 | import progressbar 16 | 17 | def l1reg(model): 18 | regularization_loss = 0 19 | for param in model.parameters(): 20 | regularization_loss += torch.sum(torch.abs(param)) 21 | return regularization_loss 22 | 23 | def add_dimension_glasso(var, dim=0): 24 | return var.pow(2).sum(dim=dim).add(1e-8).pow(1/2.).sum() 25 | 26 | def gl1reg(model): 27 | reg = 0 28 | for param in model.parameters(): 29 | dim = param.size() 30 | if dim.__len__() > 2: 31 | reg += add_dimension_glasso(param, (1,2,3)) 32 | #reg += add_dimension_glasso(param, (0,2,3)) 33 | return reg 34 | 35 | def train(epoch,epochs,bestLoss,indices = None): 36 | ############# 37 | ####TRAIN#### 38 | ############# 39 | 40 | lossx = 0 41 | lossy = 0 42 | lossw = 0 43 | lossh = 0 44 | lossconf = 0 45 | lossreg = 0 46 | losstotal = 0 47 | recall = 0 48 | prec = 0 49 | 50 | recs = [0,0] 51 | precs = [0,0] 52 | 53 | model.train() 54 | 55 | bar = progressbar.ProgressBar(0, len(trainloader), redirect_stdout=False) 56 | 57 | for batch_i, (_, imgs, targets) in enumerate(trainloader): 58 | imgs = imgs.type(Tensor) 59 | targets = [x.type(Tensor) for x in targets] 60 | 61 | optimizer.zero_grad() 62 | 63 | loss = model(imgs, targets) 64 | reg = Tensor([0.0]) 65 | if indices is None: 66 | reg = decay * regularize(model) 67 | loss += reg 68 | 69 | loss.backward() 70 | 71 | if indices is not None: 72 | pIdx = 0 73 | for param in model.parameters(): 74 | if param.dim() > 1: 75 | if param.grad is not None: 76 | param.grad[indices[pIdx]] = 0 77 | pIdx += 1 78 | 79 | optimizer.step() 80 | bar.update(batch_i) 81 | 82 | lossx += model.losses["x"] 83 | lossy += model.losses["y"] 84 | lossw += model.losses["w"] 85 | lossh += model.losses["h"] 86 | lossconf += model.losses["conf"] 87 | lossreg += reg.item() 88 | losstotal += loss.item() 89 | recall += model.losses["recall"] 90 | prec += model.losses["precision"] 91 | recs[0] += model.recprec[0] 92 | recs[1] += model.recprec[2] 93 | precs[0] += model.recprec[1] 94 | precs[1] += model.recprec[3] 95 | 96 | bar.finish() 97 | prune = count_zero_weights(model,glasso) 98 | print( 99 | "[Epoch Train %d/%d lr: %.4f][Losses: x %f, y %f, w %f, h %f, conf %f, reg %f, pruned %f, total %f, recall: %.5f (%.5f / %.5f), precision: %.5f (%.5f / %.5f)]" 100 | % ( 101 | epoch + 1, 102 | epochs, 103 | scheduler.get_lr()[-1]/learning_rate, 104 | lossx / float(len(trainloader)), 105 | lossy / float(len(trainloader)), 106 | lossw / float(len(trainloader)), 107 | lossh / float(len(trainloader)), 108 | lossconf / float(len(trainloader)), 109 | lossreg / float(len(trainloader)), 110 | prune, 111 | losstotal / float(len(trainloader)), 112 | recall / float(len(trainloader)), 113 | recs[0] / float(len(trainloader)), 114 | recs[1] / float(len(trainloader)), 115 | prec / float(len(trainloader)), 116 | precs[0] / float(len(trainloader)), 117 | precs[1] / float(len(trainloader)), 118 | ) 119 | ) 120 | 121 | if indices is None: 122 | scheduler.step() 123 | 124 | name = "bestFinetune" if finetune else "best" 125 | name += "2C" if opt.yu else "" 126 | name += "BN" if opt.bn else "" 127 | name += "HR" if opt.hr else "" 128 | if transfer != 0: 129 | name += "T%d" % transfer 130 | if indices is not None: 131 | pruneP = round(prune * 100) 132 | comp = round(sum(model.get_computations(True))/1000000) 133 | name = name + ("%d_%d" %(pruneP,comp)) 134 | 135 | '''if bestLoss < (recall + prec): 136 | print("Saving best model") 137 | bestLoss = (recall + prec) 138 | torch.save(model.state_dict(), "checkpoints/%s.weights" % name)''' 139 | 140 | return bestLoss 141 | 142 | 143 | def valid(epoch,epochs,bestLoss,pruned): 144 | ############# 145 | ####VALID#### 146 | ############# 147 | 148 | model.eval() 149 | 150 | mAP, APs = computeAP(model,valloader,0.5,0.45,4,(384,512),False,32) 151 | prune = count_zero_weights(model,glasso) 152 | 153 | name = "bestFinetune" if finetune else "best" 154 | name += "2C" if opt.yu else "" 155 | name += "BN" if opt.bn else "" 156 | name += "HR" if opt.hr else "" 157 | if transfer != 0: 158 | name += "T%d" % transfer 159 | if pruned: 160 | pruneP = round(prune * 100) 161 | comp = round(sum(model.get_computations(True))/1000000) 162 | name = name + ("%d_%d" %(pruneP,comp)) 163 | 164 | print("[Epoch Val %d/%d mAP: %.4f][Ball: %.4f Crossing: %.4f Goalpost: %.4f Robot: %.4f]" % (epoch + 1,epochs,mAP,APs[0],APs[1],APs[2],APs[3])) 165 | 166 | if bestLoss < (mAP): 167 | print("Saving best model") 168 | bestLoss = (mAP) 169 | torch.save(model.state_dict(), "checkpoints/%s.weights" % name) 170 | 171 | return bestLoss 172 | 173 | if __name__ == '__main__': 174 | 175 | parser = argparse.ArgumentParser() 176 | parser.add_argument("--finetune", help="Finetuning", action="store_true", default=False) 177 | parser.add_argument("--lr", help="Learning rate", type=float, default=1e-3) 178 | parser.add_argument("--decay", help="Weight decay", type=float, default=1e-4) 179 | parser.add_argument("--transfer", help="Layers to truly train", action="store_true") 180 | parser.add_argument("--bn", help="Use bottleneck", action="store_true") 181 | parser.add_argument("--yu", help="Use 2 channels", action="store_true", default=False) 182 | parser.add_argument("--hr", help="Use half res", action="store_true", default=False) 183 | parser.add_argument("--singleDec", help="Just use a single decay value", action="store_true", default=False) 184 | parser.add_argument("--glasso", help="Use group lasso regularization", action="store_true", default=False) 185 | opt = parser.parse_args() 186 | 187 | finetune = opt.finetune 188 | learning_rate = opt.lr/2 if opt.transfer else opt.lr 189 | dec = opt.decay if finetune else opt.decay/10 190 | transfers = ([3, 5, 8, 11] if opt.bn else [3, 5, 7, 9]) if opt.transfer else [0] 191 | decays = [dec*25, dec*10, dec*5, dec*2.5, dec] if (finetune and not opt.transfer) else [dec] 192 | if opt.singleDec: 193 | decays = [decays[0]] 194 | halfRes = opt.hr 195 | glasso = opt.glasso 196 | regularize = gl1reg if glasso else l1reg 197 | if glasso: 198 | decays = [d*100 for d in decays] 199 | 200 | classPath = "data/robo.names" 201 | data_config_path = "config/roboFinetune.data" if finetune else "config/robo.data" 202 | img_size = (192,256) if halfRes else (384,512) 203 | weights_path = "checkpoints/best%s%s%s.weights" % ("2C" if opt.yu else "","BN" if opt.bn else "", "HR" if opt.hr else "") 204 | n_cpu = 8 205 | batch_size = 64 206 | channels = 2 if opt.yu else 3 207 | epochs = 125 if opt.transfer == 0 else 150 208 | 209 | os.makedirs("output", exist_ok=True) 210 | os.makedirs("checkpoints", exist_ok=True) 211 | 212 | classes = load_classes(classPath) 213 | 214 | # Get data configuration 215 | data_config = parse_data_config(data_config_path) 216 | train_path = data_config["train"] 217 | val_path = data_config["valid"] 218 | 219 | cuda = torch.cuda.is_available() 220 | Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor 221 | 222 | # Get dataloader 223 | trainloader = torch.utils.data.DataLoader( 224 | ListDataset(train_path,img_size=img_size, train=True, synth=finetune, yu=opt.yu), batch_size=batch_size, shuffle=True, num_workers=n_cpu 225 | ) 226 | valloader = torch.utils.data.DataLoader( 227 | ListDataset(val_path,img_size=img_size, train=False, synth=finetune, yu=opt.yu), batch_size=batch_size, shuffle=False, num_workers=n_cpu 228 | ) 229 | 230 | for transfer in transfers: 231 | if len(transfers) > 1: 232 | print("######################################################") 233 | print("############# Finetune with transfer: %d #############" % transfer) 234 | print("######################################################") 235 | for decay in decays: 236 | 237 | if len(decays) > 1: 238 | print("######################################################") 239 | print("############ Finetune with decay: %.1E ############" % decay) 240 | print("######################################################") 241 | 242 | torch.random.manual_seed(12345678) 243 | if cuda: 244 | torch.cuda.manual_seed(12345678) 245 | 246 | # Initiate model 247 | model = ROBO(inch=channels,bn=opt.bn,halfRes = halfRes) 248 | comp = model.get_computations() 249 | print(comp) 250 | print(sum(comp)) 251 | 252 | if finetune: 253 | model.load_state_dict(torch.load(weights_path)) 254 | 255 | if cuda: 256 | model = model.cuda() 257 | 258 | bestLoss = 0 259 | 260 | optimizer = torch.optim.Adam([ 261 | {'params': model.downPart[0:transfer].parameters(), 'lr': learning_rate*10}, 262 | {'params': model.downPart[transfer:].parameters()}, 263 | {'params': model.classifiers.parameters()} 264 | ],lr=learning_rate) 265 | eta_min = learning_rate/25 if opt.transfer else learning_rate/10 266 | scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,epochs,eta_min=eta_min) 267 | 268 | for epoch in range(epochs): 269 | #if finetune: 270 | train(epoch,epochs,100) 271 | bestLoss = valid(epoch,epochs,bestLoss,False) 272 | #else: 273 | #bestLoss = train(epoch,epochs,bestLoss) 274 | 275 | if finetune and (transfer == 0): 276 | model.load_state_dict(torch.load("checkpoints/bestFinetune%s%s%s.weights" % ("2C" if opt.yu else "","BN" if opt.bn else "","HR" if opt.hr else ""))) 277 | with torch.no_grad(): 278 | indices = pruneModel(model.parameters(),glasso) 279 | 280 | optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate/40) 281 | print("Finetuning") 282 | 283 | bestLoss = 0 284 | 285 | for epoch in range(25): 286 | train(epoch, 25, 100, indices=indices) 287 | bestLoss = valid(epoch,25,bestLoss,True) 288 | -------------------------------------------------------------------------------- /YOLOLabeller.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import os 4 | import os.path as osp 5 | from glob import glob1 6 | import copy 7 | import pickle 8 | import re 9 | 10 | def sorted_nicely( l ): 11 | """ Sort the given iterable in the way that humans expect.""" 12 | convert = lambda text: int(text) if text.isdigit() else text 13 | alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 14 | return sorted(l, key = alphanum_key) 15 | 16 | global sbox, ebox, img, colors, drawing 17 | colors = [(0,0,255),(255,0,255),(255,0,0),(0,255,255)] 18 | 19 | def on_mouse(event, x, y, flags, params): 20 | global sbox, ebox, img, drawing 21 | if event == cv2.EVENT_LBUTTONDOWN: 22 | sbox = (x, y) 23 | drawing = True 24 | 25 | elif event == cv2.EVENT_MOUSEMOVE: 26 | if drawing: 27 | ebox = (x, y) 28 | img2 = img.copy() 29 | cv2.rectangle(img2,sbox,ebox,colors[classIdx],1) 30 | cv2.imshow("video", img2) 31 | 32 | elif event == cv2.EVENT_LBUTTONUP: 33 | ebox = (x, y) 34 | img2 = img.copy() 35 | cv2.rectangle(img2,sbox,ebox,colors[classIdx],1) 36 | cv2.imshow("video", img2) 37 | drawing = False 38 | 39 | 40 | 41 | if __name__ == '__main__': 42 | 43 | global img, drawing 44 | 45 | drawing = False 46 | 47 | path = "/Users/martonszemenyei/Projects/ROBO/data/YOLO/sydney/" 48 | 49 | names = sorted_nicely(glob1(path, "*.png")) 50 | 51 | cv2.namedWindow("video") 52 | cv2.setMouseCallback("video",on_mouse) 53 | 54 | BBLists = [] 55 | classIdx = 0 56 | 57 | for frameCntr,name in enumerate(names): 58 | 59 | img = cv2.imread(path+name) 60 | if img.shape[0] != 384: 61 | img = cv2.resize(img,(512,384)) 62 | cv2.imwrite(path+name,img) 63 | img = cv2.cvtColor(img,cv2.COLOR_RGB2BGR) 64 | img = cv2.cvtColor(img,cv2.COLOR_YUV2BGR) 65 | 66 | orig = img.copy() 67 | print(frameCntr) 68 | 69 | if len(BBLists) <= frameCntr: 70 | BBLists.append([])#copy.deepcopy(BBLists[-1]) if len(BBLists) else []) 71 | if osp.exists(path + name.split(".")[0] + ".txt"): 72 | file = open(path + name.split(".")[0] + ".txt", "r") 73 | BBLists[frameCntr] = [] 74 | while True: 75 | line = file.readline().split(" ") 76 | if len(line) < 5: 77 | break 78 | BB = [] 79 | xc = int(float(line[1])*img.shape[1]) 80 | yc = int(float(line[2])*img.shape[0]) 81 | w = int(float(line[3])*img.shape[1]) 82 | h = int(float(line[4])*img.shape[0]) 83 | BB.append((xc-w//2,yc-h//2)) 84 | BB.append((xc+w//2,yc+h//2)) 85 | BB.append(int(line[0])) 86 | BBLists[frameCntr].append(BB) 87 | for BB in BBLists[frameCntr]: 88 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1) 89 | 90 | BBSel = -1 91 | 92 | BBNum = len(BBLists[frameCntr]) 93 | 94 | drawing = False 95 | cv2.imshow("video", img) 96 | 97 | while True: 98 | 99 | key = cv2.waitKey(20) 100 | 101 | if key == 27: 102 | exit(0) 103 | elif key == 13: 104 | cv2.rectangle(img, sbox, ebox, colors[classIdx], 1) 105 | cv2.imshow("video", img) 106 | BBLists[frameCntr].append([sbox, ebox, classIdx]) 107 | BBNum = len(BBLists[frameCntr]) 108 | # k = next image 109 | elif key == 107: 110 | classIdx = 0 111 | BBSel = -1 112 | break 113 | # x = del all BBs 114 | elif key == 120: 115 | BBLists[frameCntr] = [] 116 | img = orig.copy() 117 | cv2.imshow("video", img) 118 | elif key == 48: 119 | classIdx = 0 120 | elif key == 49: 121 | classIdx = 1 122 | elif key == 50: 123 | classIdx = 2 124 | elif key == 51: 125 | classIdx = 3 126 | # n = next BB 127 | elif key == 110: 128 | if BBNum > 0: 129 | BBSel += 1 130 | if BBSel == BBNum: 131 | BBSel = 0 132 | sbox = BBLists[frameCntr][BBSel][0] 133 | ebox = BBLists[frameCntr][BBSel][1] 134 | img2 = img.copy() 135 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1) 136 | cv2.imshow("video", img2) 137 | # w = widen vertically 138 | elif key == 119: 139 | if BBSel >= 0: 140 | sbox = BBLists[frameCntr][BBSel][0] 141 | ebox = BBLists[frameCntr][BBSel][1] 142 | ebox = (ebox[0], ebox[1]+1) 143 | BBLists[frameCntr][BBSel][1] = ebox 144 | img = orig.copy() 145 | for BB in BBLists[frameCntr]: 146 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1) 147 | img2 = img.copy() 148 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1) 149 | cv2.imshow("video", img2) 150 | # s = compress vertically 151 | elif key == 115: 152 | if BBSel >= 0: 153 | sbox = BBLists[frameCntr][BBSel][0] 154 | ebox = BBLists[frameCntr][BBSel][1] 155 | ebox = (ebox[0], ebox[1]-1) 156 | BBLists[frameCntr][BBSel][1] = ebox 157 | img = orig.copy() 158 | for BB in BBLists[frameCntr]: 159 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1) 160 | img2 = img.copy() 161 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1) 162 | cv2.imshow("video", img2) 163 | # a = widen horizontally 164 | elif key == 97: 165 | if BBSel >= 0: 166 | sbox = BBLists[frameCntr][BBSel][0] 167 | ebox = BBLists[frameCntr][BBSel][1] 168 | ebox = (ebox[0]+1, ebox[1]) 169 | BBLists[frameCntr][BBSel][1] = ebox 170 | img = orig.copy() 171 | for BB in BBLists[frameCntr]: 172 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1) 173 | img2 = img.copy() 174 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1) 175 | cv2.imshow("video", img2) 176 | # d = compress horizontally 177 | elif key == 100: 178 | if BBSel >= 0: 179 | sbox = BBLists[frameCntr][BBSel][0] 180 | ebox = BBLists[frameCntr][BBSel][1] 181 | ebox = (ebox[0]-1, ebox[1]) 182 | BBLists[frameCntr][BBSel][1] = ebox 183 | img = orig.copy() 184 | for BB in BBLists[frameCntr]: 185 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1) 186 | img2 = img.copy() 187 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1) 188 | cv2.imshow("video", img2) 189 | # t = move up 190 | elif key == 116: 191 | if BBSel >= 0: 192 | sbox = BBLists[frameCntr][BBSel][0] 193 | ebox = BBLists[frameCntr][BBSel][1] 194 | sbox = (sbox[0], sbox[1]+1) 195 | BBLists[frameCntr][BBSel][0] = sbox 196 | img = orig.copy() 197 | for BB in BBLists[frameCntr]: 198 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1) 199 | img2 = img.copy() 200 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1) 201 | cv2.imshow("video", img2) 202 | # g = move down 203 | elif key == 103: 204 | if BBSel >= 0: 205 | sbox = BBLists[frameCntr][BBSel][0] 206 | ebox = BBLists[frameCntr][BBSel][1] 207 | sbox = (sbox[0], sbox[1]-1) 208 | BBLists[frameCntr][BBSel][0] = sbox 209 | img = orig.copy() 210 | for BB in BBLists[frameCntr]: 211 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1) 212 | img2 = img.copy() 213 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1) 214 | cv2.imshow("video", img2) 215 | # f = move left 216 | elif key == 102: 217 | if BBSel >= 0: 218 | sbox = BBLists[frameCntr][BBSel][0] 219 | ebox = BBLists[frameCntr][BBSel][1] 220 | sbox = (sbox[0]+1, sbox[1]) 221 | BBLists[frameCntr][BBSel][0] = sbox 222 | img = orig.copy() 223 | for BB in BBLists[frameCntr]: 224 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1) 225 | img2 = img.copy() 226 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1) 227 | cv2.imshow("video", img2) 228 | # h = move right 229 | elif key == 104: 230 | if BBSel >= 0: 231 | sbox = BBLists[frameCntr][BBSel][0] 232 | ebox = BBLists[frameCntr][BBSel][1] 233 | sbox = (sbox[0]-1, sbox[1]) 234 | BBLists[frameCntr][BBSel][0] = sbox 235 | img = orig.copy() 236 | for BB in BBLists[frameCntr]: 237 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1) 238 | img2 = img.copy() 239 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1) 240 | cv2.imshow("video", img2) 241 | # r = remove BB 242 | elif key == 114: 243 | if BBSel >= 0: 244 | BBLists[frameCntr].pop(BBSel) 245 | BBSel = -1 246 | img = orig.copy() 247 | for BB in BBLists[frameCntr]: 248 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1) 249 | cv2.imshow("video", img) 250 | BBNum = len(BBLists[frameCntr]) 251 | 252 | file = open(path + name.split(".")[0] + ".txt","w") 253 | for BB in BBLists[frameCntr]: 254 | center = ((BB[0][0] + BB[1][0])/(2*img.shape[1]), (BB[0][1] + BB[1][1])/(2*img.shape[0])) 255 | size = (abs(BB[1][0] - BB[0][0]) / img.shape[1],abs(BB[1][1] - BB[0][1]) / img.shape[0]) 256 | label = BB[2] 257 | file.write(str(label)) 258 | file.write(" ") 259 | file.write(str(center[0])) 260 | file.write(" ") 261 | file.write(str(center[1])) 262 | file.write(" ") 263 | file.write(str(size[0])) 264 | file.write(" ") 265 | file.write(str(size[1])) 266 | file.write("\n") 267 | file.close() -------------------------------------------------------------------------------- /models.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from utils.utils import build_targets 7 | from collections import defaultdict 8 | 9 | class Conv(nn.Module): 10 | def __init__(self,inch,ch,stride=1,size=3,doBN = True): 11 | super(Conv,self).__init__() 12 | self.conv = nn.Conv2d(inch,ch,kernel_size=size,stride=stride,padding=size//2, bias=not doBN) 13 | self.bn = nn.BatchNorm2d(ch) 14 | self.relu = nn.LeakyReLU(0.1) 15 | 16 | self.size = size 17 | self.inch = inch 18 | self.stride = stride 19 | self.ch = ch 20 | self.doBN = doBN 21 | 22 | def forward(self, x): 23 | x = self.conv(x) 24 | if self.doBN: 25 | x = self.bn(x) 26 | return self.relu(x) 27 | 28 | def getComp(self,W,H): 29 | W = W // self.stride 30 | H = H // self.stride 31 | 32 | return self.size*self.size*W*H*self.inch*self.ch*2 + (W*H*self.ch*4 if self.doBN else 0), W, H 33 | 34 | def getParams(self): 35 | return self.ch*(self.inch*self.size*self.size + 4 if self.doBN else 1) 36 | 37 | class YOLOLayer(nn.Module): 38 | """Detection layer""" 39 | 40 | def __init__(self, anchors, num_classes, img_dim): 41 | super(YOLOLayer, self).__init__() 42 | self.anchors = anchors 43 | self.num_anchors = len(anchors) 44 | self.num_classes = num_classes 45 | self.bbox_attrs = 5 #+ num_classes 46 | self.image_dim = img_dim 47 | self.ignore_thres = 0.5 48 | self.lambda_coord = 1 49 | 50 | self.mse_loss = nn.MSELoss(reduction='mean') # Coordinate loss 51 | self.bce_loss = nn.BCELoss(reduction='mean') # Confidence loss 52 | #self.ce_loss = nn.CrossEntropyLoss() # Class loss 53 | 54 | def forward(self, x, targets=None): 55 | nA = self.num_anchors 56 | nB = x.size(0) 57 | nGy = x.size(2) 58 | nGx = x.size(3) 59 | stride = self.image_dim / nGy 60 | 61 | # Tensors for cuda support 62 | FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor 63 | LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor 64 | ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor 65 | 66 | prediction = x.view(nB, nA, self.bbox_attrs, nGy, nGx).permute(0, 1, 3, 4, 2).contiguous() 67 | 68 | # Get outputs 69 | x = torch.sigmoid(prediction[..., 0]) # Center x 70 | y = torch.sigmoid(prediction[..., 1]) # Center y 71 | w = prediction[..., 2] # Width 72 | h = prediction[..., 3] # Height 73 | pred_conf = torch.sigmoid(prediction[..., 4]) # Conf 74 | #pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. 75 | 76 | # Calculate offsets for each grid 77 | grid_x = torch.arange(nGx).repeat(nGy, 1).view([1, 1, nGy, nGx]).type(FloatTensor) 78 | grid_y = torch.arange(nGy).repeat(nGx, 1).t().view([1, 1, nGy, nGx]).type(FloatTensor) 79 | scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors]) 80 | anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1)) 81 | anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1)) 82 | 83 | # Add offset and scale with anchors 84 | pred_boxes = FloatTensor(prediction[..., :4].shape) 85 | pred_boxes[..., 0] = x.detach() + grid_x 86 | pred_boxes[..., 1] = y.detach() + grid_y 87 | pred_boxes[..., 2] = torch.exp(w.detach()) * anchor_w 88 | pred_boxes[..., 3] = torch.exp(h.detach()) * anchor_h 89 | 90 | # Training 91 | if targets is not None: 92 | 93 | if x.is_cuda: 94 | self.mse_loss = self.mse_loss.cuda() 95 | self.bce_loss = self.bce_loss.cuda() 96 | #self.ce_loss = self.ce_loss.cuda() 97 | 98 | nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, corr = build_targets( 99 | pred_boxes=pred_boxes.cpu().detach(), 100 | pred_conf=pred_conf.cpu().detach(), 101 | #pred_cls=pred_cls.cpu().detach(), 102 | target=targets.cpu().detach(), 103 | anchors=scaled_anchors.cpu().detach(), 104 | num_anchors=nA, 105 | num_classes=self.num_classes, 106 | grid_size_y=nGy, 107 | grid_size_x=nGx, 108 | ignore_thres=self.ignore_thres, 109 | img_dim=self.image_dim, 110 | ) 111 | 112 | nProposals = int((pred_conf > 0.5).sum().item()) 113 | recall = float(nCorrect / nGT) if nGT else 1 114 | nCorrPrec = int((corr).sum().item()) 115 | precision = float(nCorrPrec / nProposals) if nProposals > 0 else 0 116 | 117 | # Handle masks 118 | mask = mask.type(ByteTensor) 119 | conf_mask = conf_mask.type(ByteTensor) 120 | 121 | # Handle target variables 122 | tx = tx.type(FloatTensor) 123 | ty = ty.type(FloatTensor) 124 | tw = tw.type(FloatTensor) 125 | th = th.type(FloatTensor) 126 | tconf = tconf.type(FloatTensor) 127 | #tcls = tcls.type(LongTensor) 128 | 129 | # Get conf mask where gt and where there is no gt 130 | conf_mask_true = mask 131 | conf_mask_false = conf_mask - mask 132 | 133 | mask = mask.bool() 134 | conf_mask_false = conf_mask_false.bool() 135 | conf_mask_true = conf_mask_true.bool() 136 | 137 | # Mask outputs to ignore non-existing objects 138 | loss_x = self.mse_loss(x[mask], tx[mask]) 139 | loss_y = self.mse_loss(y[mask], ty[mask]) 140 | loss_w = self.mse_loss(w[mask], tw[mask]) 141 | loss_h = self.mse_loss(h[mask], th[mask]) 142 | loss_conf = 30*self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + 1*self.bce_loss( 143 | pred_conf[conf_mask_true], tconf[conf_mask_true] 144 | ) 145 | #loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1)) 146 | loss = loss_x + loss_y + loss_w + loss_h + loss_conf #+ loss_cls 147 | 148 | return ( 149 | loss, 150 | loss_x.item(), 151 | loss_y.item(), 152 | loss_w.item(), 153 | loss_h.item(), 154 | loss_conf.item(), 155 | 0, 156 | recall, 157 | precision, 158 | ) 159 | 160 | else: 161 | # If not in training phase return predictions 162 | output = torch.cat( 163 | ( 164 | pred_boxes.view(nB, -1, 4) * stride, 165 | pred_conf.view(nB, -1, 1), 166 | #pred_cls.view(nB, -1, self.num_classes), 167 | ), 168 | -1, 169 | ) 170 | return output 171 | 172 | class ROBO(nn.Module): 173 | def __init__(self, inch=3, ch=4, img_shape=(384,512), bn = False, halfRes=False): 174 | super(ROBO,self).__init__() 175 | 176 | self.img_shape = (img_shape[0] // 2,img_shape[1] // 2) if halfRes else img_shape 177 | 178 | self.bn = bn 179 | self.halfRes = halfRes 180 | 181 | self.loss_names = ["x", "y", "w", "h", "conf", "cls", "recall", "precision"] 182 | 183 | self.branchLayers = [ 184 | 10 if halfRes else 11, 185 | -1 186 | ] 187 | 188 | self.anchors = [ 189 | (42,39), 190 | (29,16), 191 | (31,109), 192 | (79,106), 193 | ] 194 | if bn: 195 | ch *= 2 196 | self.downPart = nn.ModuleList([ 197 | None if halfRes else Conv(inch,ch,2), # Stride: 2 198 | Conv(inch if halfRes else ch,ch*2,2), # Stride: 4 199 | Conv(ch*2,ch*4,2), # Stride: 8 200 | Conv(ch*4,ch*2,1,1), 201 | Conv(ch*2,ch*4,1), 202 | Conv(ch*4,ch*8,2), # Stride: 16 203 | Conv(ch*8,ch*4,1,1), 204 | Conv(ch*4,ch*8,1), 205 | Conv(ch*8,ch*16,2), # Stride: 32 206 | Conv(ch*16,ch*8,1,1), 207 | Conv(ch*8,ch*16,1), 208 | Conv(ch*16,ch*8,1,1), 209 | Conv(ch*8,ch*16,1), # First Classifier 210 | Conv(ch*16,ch*32,2), # Stride: 64 211 | Conv(ch*32,ch*16,1,1), 212 | Conv(ch*16,ch*32,1), 213 | Conv(ch*32,ch*16,1,1), 214 | Conv(ch*16,ch*32,1) # Second Classifier 215 | ]) 216 | self.classifiers = nn.ModuleList([ 217 | nn.Conv2d(ch*16,10,1), 218 | nn.Conv2d(ch*32,10,1) 219 | ]) 220 | else: 221 | self.downPart = nn.ModuleList([ 222 | None if halfRes else Conv(inch,ch,2), # Stride: 2 223 | Conv(inch if halfRes else ch,ch*2,2), # Stride: 4 224 | Conv(ch*2,ch*4,2), # Stride: 8 225 | Conv(ch*4,ch*4,1), 226 | Conv(ch*4,ch*8,2), # Stride: 16 227 | Conv(ch*8,ch*8,1), 228 | Conv(ch*8,ch*16,2), # Stride: 32 229 | Conv(ch*16,ch*16,1), 230 | Conv(ch*16,ch*16,1), 231 | Conv(ch*16,ch*16,1), 232 | Conv(ch*16,ch*16,1), # First Classifier 233 | Conv(ch*16,ch*32,2), # Stride: 64 234 | Conv(ch*32,ch*16,1), 235 | Conv(ch*16,ch*32,1), 236 | Conv(ch*32,ch*16,1), 237 | Conv(ch*16,ch*32,1) # Second Classifier 238 | ]) 239 | self.classifiers = nn.ModuleList([ 240 | nn.Conv2d(ch*16,10,1), 241 | nn.Conv2d(ch*32,10,1) 242 | ]) 243 | self.yolo = nn.ModuleList([ 244 | YOLOLayer(self.anchors[0:2], 2, img_shape[0]), 245 | YOLOLayer(self.anchors[2:4], 2, img_shape[0]) 246 | ]) 247 | 248 | def forward(self, x, targets = None): 249 | 250 | is_training = targets is not None 251 | output = [] 252 | self.losses = defaultdict(float) 253 | outNum = 0 254 | self.recprec = [0, 0, 0, 0] 255 | layer_outputs = [x] 256 | 257 | for layer in self.downPart: 258 | if layer is not None: 259 | layer_outputs.append(layer(layer_outputs[-1])) 260 | 261 | for idx, cl, yolo in zip(self.branchLayers,self.classifiers,self.yolo): 262 | out = cl(layer_outputs[idx]) 263 | if is_training: 264 | out, *losses = yolo(out, targets[outNum]) 265 | self.recprec[outNum * 2] += (losses[-2]) 266 | self.recprec[outNum * 2 + 1] += (losses[-1]) 267 | for name, loss in zip(self.loss_names, losses): 268 | self.losses[name] += loss 269 | # Test phase: Get detections 270 | else: 271 | out = yolo(out) 272 | output.append(out) 273 | outNum += 1 274 | 275 | 276 | self.losses["recall"] /= outNum 277 | self.losses["precision"] /= outNum 278 | return sum(output) if is_training else torch.cat(output, 1) 279 | 280 | 281 | def get_computations(self,pruned = False): 282 | H, W = self.img_shape 283 | computations = [] 284 | 285 | for module in self.downPart: 286 | if module is not None: 287 | ratio = float(module.conv.weight.nonzero().size(0)) / float(module.conv.weight.numel()) if pruned else 1 288 | if module is not None: 289 | comp, W, H = module.getComp(W,H) 290 | computations.append(comp * ratio) 291 | 292 | H, W = self.img_shape[0] // 32, self.img_shape[1] // 32 293 | computations.append(H*W*64*10*2 * (2 if self.bn else 1)) 294 | computations.append(H*W*128*10//2 * (2 if self.bn else 1)) 295 | 296 | return computations 297 | 298 | def getParams(self): 299 | params = sum([layer.getParams() for layer in self.downPart if layer is not None]) 300 | params += 64*10*2 * (2 if self.bn else 1) 301 | params += 128*10//2 * (2 if self.bn else 1) 302 | return params 303 | 304 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 18 | 19 | 24 | 25 | 26 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 55 | 56 | 57 | 58 | 59 | 78 | 79 | 80 | 99 | 100 | 101 | 120 | 121 | 122 | 141 | 142 | 143 | 162 | 163 | 164 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 1543402315301 208 | 212 | 213 | 214 | 215 | 217 | 218 | 229 | 230 | 231 | 232 | 233 | np.min(img[:,:,0]) 234 | Python 235 | EXPRESSION 236 | 237 | 238 | np.min(img) 239 | Python 240 | EXPRESSION 241 | 242 | 243 | np.max(img) 244 | Python 245 | EXPRESSION 246 | 247 | 248 | target[b,t,:] 249 | Python 250 | EXPRESSION 251 | 252 | 253 | 254 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | from models import * 4 | from utils.utils import * 5 | from utils.datasets import * 6 | from utils.parse_config import * 7 | 8 | import sys 9 | 10 | import argparse 11 | import progressbar 12 | 13 | import torch 14 | from torch.utils.data import DataLoader 15 | 16 | def getmAP(all_annotations,all_detections): 17 | mAPs = np.zeros((2, 5)) 18 | APs = np.zeros((2, 4, 5)) 19 | thresholds = np.array([[4, 8, 16, 32, 64], [0.75, 0.5, 0.25, 0.1, 0.05]]) 20 | for useIoU in range(2): 21 | for threshIdx in range(5): 22 | average_precisions = {} 23 | for label in range(num_classes): 24 | true_positives = [] 25 | scores = [] 26 | num_annotations = 0 27 | 28 | for i in range(len(all_annotations)): 29 | detections = all_detections[i][label] 30 | annotations = all_annotations[i][label] 31 | 32 | num_annotations += annotations.shape[0] 33 | detected_annotations = [] 34 | 35 | for *bbox, score in detections: 36 | scores.append(score) 37 | 38 | if annotations.shape[0] == 0: 39 | true_positives.append(0) 40 | continue 41 | 42 | if useIoU > 0: 43 | overlaps = bbox_iou_numpy(np.expand_dims(bbox, axis=0), annotations) 44 | assigned_annotation = np.argmax(overlaps, axis=1) 45 | max_overlap = overlaps[0, assigned_annotation] 46 | 47 | if max_overlap >= thresholds[ 48 | useIoU, threshIdx] and assigned_annotation not in detected_annotations: 49 | true_positives.append(1) 50 | detected_annotations.append(assigned_annotation) 51 | else: 52 | true_positives.append(0) 53 | else: 54 | distances = bbox_dist(bbox, annotations) 55 | assigned_annotation = np.argmin(distances) 56 | min_dist = distances[assigned_annotation] 57 | 58 | if min_dist <= thresholds[ 59 | useIoU, threshIdx] and assigned_annotation not in detected_annotations: 60 | true_positives.append(1) 61 | detected_annotations.append(assigned_annotation) 62 | else: 63 | true_positives.append(0) 64 | 65 | # no annotations -> AP for this class is 0 66 | if num_annotations == 0: 67 | average_precisions[label] = 0 68 | continue 69 | 70 | true_positives = np.array(true_positives) 71 | false_positives = np.ones_like(true_positives) - true_positives 72 | # sort by score 73 | indices = np.argsort(-np.array(scores)) 74 | false_positives = false_positives[indices] 75 | true_positives = true_positives[indices] 76 | 77 | # compute false positives and true positives 78 | false_positives = np.cumsum(false_positives) 79 | true_positives = np.cumsum(true_positives) 80 | 81 | # compute recall and precision 82 | recall = true_positives / num_annotations 83 | precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps) 84 | 85 | # compute average precision 86 | average_precision = compute_ap(recall, precision) 87 | average_precisions[label] = average_precision 88 | 89 | for c, ap in average_precisions.items(): 90 | APs[useIoU, c, threshIdx] = ap 91 | 92 | mAP = np.mean(list(average_precisions.values())) 93 | mAPs[useIoU, threshIdx] = mAP 94 | return mAPs, APs 95 | 96 | if __name__ == '__main__': 97 | parser = argparse.ArgumentParser() 98 | parser.add_argument("--model_config_path", type=str, default="config/robo-down-small.cfg", help="path to model config file") 99 | parser.add_argument("--class_path", type=str, default="data/robo.names", help="path to class label file") 100 | parser.add_argument('--batch_size', type=int, default=64, help='size of the batches') 101 | parser.add_argument("--iou_thres", type=float, default=0.5, help="iou threshold required to qualify as detected") 102 | parser.add_argument("--conf_thres", type=float, default=0.5, help="object confidence threshold") 103 | parser.add_argument("--nms_thres", type=float, default=0.45, help="iou thresshold for non-maximum suppression") 104 | parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation") 105 | parser.add_argument("--img_size", type=int, default=(384,512), help="size of each image dimension") 106 | parser.add_argument("--transfer", help="Layers to truly train", action="store_true", default=False) 107 | parser.add_argument("--finetune", help="Finetuning", action="store_true", default=False) 108 | parser.add_argument("--bn", help="Use bottleneck", action="store_true", default=False) 109 | parser.add_argument("--yu", help="Use 2 channels", action="store_true", default=False) 110 | parser.add_argument("--hr", help="Use half res", action="store_true", default=False) 111 | parser.add_argument("--lprop", help="Use half res", action="store_true", default=False) 112 | opt = parser.parse_args() 113 | 114 | cuda = torch.cuda.is_available() 115 | 116 | lprop = opt.lprop 117 | 118 | data_config_path = "config/roboFinetune.data" if opt.finetune else "config/robo.data" 119 | img_size = (192,256) if opt.hr else (384,512) 120 | 121 | name = "checkpoints/bestFinetune" if opt.finetune else "checkpoints/best" 122 | if opt.yu: 123 | name += "2C" 124 | if opt.bn: 125 | name += "BN" 126 | if opt.hr: 127 | name += "HR" 128 | 129 | weights_path = [] 130 | if opt.transfer: 131 | weights_path = sorted(glob.glob(name + "T*.weights"),reverse=True) 132 | elif opt.finetune: 133 | weights_path = sorted(glob.glob(name + "*_*.weights"),reverse=True) 134 | weights_path += [name + ".weights"] 135 | if not opt.bn: 136 | weights_path = [path for path in weights_path if "BN" not in path] 137 | if not opt.yu: 138 | weights_path = [path for path in weights_path if "2C" not in path] 139 | if not opt.hr: 140 | weights_path = [path for path in weights_path if "HR" not in path] 141 | if lprop: 142 | weights_path = [weights_path[0]] 143 | 144 | # Get data configuration 145 | data_config = parse_data_config(data_config_path) 146 | test_path = data_config["valid"] 147 | if lprop: 148 | test_path = "../../Data/RoboCup" if sys.platform != 'win32' else "D:/Datasets/RoboCup" 149 | num_classes = int(data_config["classes"]) 150 | channels = 2 if opt.yu else 3 151 | seq_len = 4 152 | 153 | # Initiate model 154 | for path in weights_path: 155 | print(path) 156 | model = ROBO(inch=channels,bn=opt.bn, halfRes=opt.hr) 157 | print(model.getParams()) 158 | model.load_state_dict(torch.load(path,map_location={'cuda:0': 'cpu'})) 159 | 160 | print(count_zero_weights(model)) 161 | 162 | #with torch.no_grad(): 163 | #pruneModel(model.parameters()) 164 | 165 | computations = model.get_computations(True) 166 | 167 | print(computations) 168 | print(sum(computations)) 169 | 170 | if cuda: 171 | model = model.cuda() 172 | 173 | model.eval() 174 | 175 | # Get dataloader 176 | if lprop: 177 | dataset = LPDataSet(test_path, train=False, finetune=opt.finetune, yu=opt.yu, img_size=img_size,len_seq=seq_len) 178 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=opt.n_cpu, collate_fn=my_collate) 179 | else: 180 | dataset = ListDataset(test_path, train=False, synth=opt.finetune, yu=opt.yu, img_size=img_size) 181 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_cpu) 182 | 183 | Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor 184 | 185 | print("Compute mAP...") 186 | 187 | all_detections = [] 188 | all_lp_detections = [] 189 | all_annotations = [] 190 | 191 | bar = progressbar.ProgressBar(0, len(dataloader), redirect_stdout=False) 192 | 193 | for batch_i, data in enumerate(dataloader): 194 | 195 | if lprop: 196 | imgs, targets, cvimgs = data 197 | cvimgs = cvimgs[0] 198 | else: 199 | _, imgs, targets = data 200 | imgs = imgs.type(Tensor) 201 | 202 | with torch.no_grad(): 203 | outputs = model(imgs) 204 | outputs = non_max_suppression(outputs, 80, conf_thres=opt.conf_thres, nms_thres=opt.nms_thres) 205 | 206 | for output, annotations in zip(outputs, targets): 207 | 208 | all_detections.append([np.array([]) for _ in range(num_classes)]) 209 | if output is not None: 210 | # Get predicted boxes, confidence scores and labels 211 | pred_boxes = output[:, :5].cpu().numpy() 212 | scores = output[:, 4].cpu().numpy() 213 | pred_labels = output[:, -1].cpu().numpy() 214 | 215 | # Order by confidence 216 | sort_i = np.argsort(scores) 217 | pred_labels = pred_labels[sort_i] 218 | pred_boxes = pred_boxes[sort_i] 219 | 220 | for label in range(num_classes): 221 | all_detections[-1][label] = pred_boxes[pred_labels == label] 222 | 223 | all_annotations.append([np.array([]) for _ in range(num_classes)]) 224 | if any(annotations[:, -1] > 0): 225 | 226 | annotation_labels = annotations[annotations[:, -1] > 0, 0].numpy() 227 | _annotation_boxes = annotations[annotations[:, -1] > 0, 1:] 228 | 229 | # Reformat to x1, y1, x2, y2 and rescale to image dimensions 230 | annotation_boxes = np.empty_like(_annotation_boxes) 231 | annotation_boxes[:, 0] = (_annotation_boxes[:, 0] - _annotation_boxes[:, 2] / 2)*opt.img_size[1] 232 | annotation_boxes[:, 1] = (_annotation_boxes[:, 1] - _annotation_boxes[:, 3] / 2)*opt.img_size[0] 233 | annotation_boxes[:, 2] = (_annotation_boxes[:, 0] + _annotation_boxes[:, 2] / 2)*opt.img_size[1] 234 | annotation_boxes[:, 3] = (_annotation_boxes[:, 1] + _annotation_boxes[:, 3] / 2)*opt.img_size[0] 235 | #annotation_boxes *= opt.img_size 236 | 237 | for label in range(num_classes): 238 | all_annotations[-1][label] = annotation_boxes[annotation_labels == label, :] 239 | 240 | if lprop: 241 | for i in range(seq_len): 242 | if i == 0: 243 | all_lp_detections.append(labelProp(cvimgs[i],cvimgs[i+1],all_detections[-seq_len+1])) 244 | else: 245 | all_lp_detections.append(labelProp(cvimgs[i],cvimgs[i-1],all_detections[-seq_len+i-1])) 246 | 247 | bar.update(batch_i) 248 | bar.finish() 249 | 250 | mAPs, APs = getmAP(all_annotations,all_detections) 251 | if not lprop: 252 | for c in range(4): 253 | print("Class %d:" % c) 254 | for i in range(2): 255 | print("Dist: " if i < 1 else "IoU: ",APs[i,c,:]) 256 | print("mAP:") 257 | for i in range(2): 258 | print("Dist: " if i < 1 else "IoU: ",mAPs[i,:]) 259 | 260 | if lprop: 261 | LPmAPs, LPAPs = getmAP(all_annotations, all_lp_detections) 262 | print("LP") 263 | print("mAP:") 264 | for i in range(2): 265 | print("Dist: " if i < 1 else "IoU: ",LPmAPs[i,:]) 266 | -------------------------------------------------------------------------------- /utils/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import os 4 | import os.path as osp 5 | import numpy as np 6 | 7 | import torch 8 | 9 | from torch.utils.data import Dataset 10 | from PIL import Image 11 | import torchvision.transforms as transforms 12 | import torchvision.transforms.functional as F 13 | import numbers 14 | import cv2 15 | import re 16 | 17 | def tryint(s): 18 | try: 19 | return int(s) 20 | except: 21 | return s 22 | 23 | def alphanum_key(s): 24 | """ Turn a string into a list of string and number chunks. 25 | "z23a" -> ["z", 23, "a"] 26 | """ 27 | return [ tryint(c) for c in re.split('([0-9]+)', s) ] 28 | 29 | def my_collate(batch): 30 | imgs,targets,cvimgs = zip(*batch) 31 | return torch.cat(imgs),torch.cat(targets),cvimgs 32 | 33 | def get_immediate_subdirectories(a_dir): 34 | return [name for name in os.listdir(a_dir) 35 | if os.path.isdir(os.path.join(a_dir, name))] 36 | 37 | class RandomAffineCust(object): 38 | """Random affine transformation of the image keeping center invariant 39 | 40 | Args: 41 | degrees (sequence or float or int): Range of degrees to select from. 42 | If degrees is a number instead of sequence like (min, max), the range of degrees 43 | will be (-degrees, +degrees). Set to 0 to deactivate rotations. 44 | translate (tuple, optional): tuple of maximum absolute fraction for horizontal 45 | and vertical translations. For example translate=(a, b), then horizontal shift 46 | is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is 47 | randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default. 48 | scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is 49 | randomly sampled from the range a <= scale <= b. Will keep original scale by default. 50 | shear (sequence or float or int, optional): Range of degrees to select from. 51 | If degrees is a number instead of sequence like (min, max), the range of degrees 52 | will be (-degrees, +degrees). Will not apply shear by default 53 | resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional): 54 | An optional resampling filter. See `filters`_ for more information. 55 | If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST. 56 | fillcolor (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0) 57 | 58 | .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters 59 | 60 | """ 61 | 62 | def __init__(self, degrees, translate=None, scale=None, resample=False, fillcolor=0): 63 | if isinstance(degrees, numbers.Number): 64 | if degrees < 0: 65 | raise ValueError("If degrees is a single number, it must be positive.") 66 | self.degrees = (-degrees, degrees) 67 | else: 68 | assert isinstance(degrees, (tuple, list)) and len(degrees) == 2, \ 69 | "degrees should be a list or tuple and it must be of length 2." 70 | self.degrees = degrees 71 | 72 | if translate is not None: 73 | assert isinstance(translate, (tuple, list)) and len(translate) == 2, \ 74 | "translate should be a list or tuple and it must be of length 2." 75 | for t in translate: 76 | if not (0.0 <= t <= 1.0): 77 | raise ValueError("translation values should be between 0 and 1") 78 | self.translate = translate 79 | 80 | if scale is not None: 81 | assert isinstance(scale, (tuple, list)) and len(scale) == 2, \ 82 | "scale should be a list or tuple and it must be of length 2." 83 | for s in scale: 84 | if s <= 0: 85 | raise ValueError("scale values should be positive") 86 | self.scale = scale 87 | 88 | self.resample = resample 89 | self.fillcolor = fillcolor 90 | 91 | @staticmethod 92 | def get_params(degrees, translate, scale_ranges, img_size): 93 | """Get parameters for affine transformation 94 | 95 | Returns: 96 | sequence: params to be passed to the affine transformation 97 | """ 98 | angle = random.uniform(degrees[0], degrees[1]) 99 | if translate is not None: 100 | max_dx = translate[0] * img_size[0] 101 | max_dy = translate[1] * img_size[1] 102 | translations = (np.round(random.uniform(-max_dx, max_dx)), 103 | np.round(random.uniform(-max_dy, max_dy))) 104 | else: 105 | translations = (0, 0) 106 | 107 | if scale_ranges is not None: 108 | scale = random.uniform(scale_ranges[0], scale_ranges[1]) 109 | else: 110 | scale = 1.0 111 | 112 | shear = 0.0 113 | 114 | return angle, translations, scale, shear 115 | 116 | def __call__(self, img, label): 117 | """ 118 | img (PIL Image): Image to be transformed. 119 | 120 | Returns: 121 | PIL Image: Affine transformed image. 122 | """ 123 | ret = self.get_params(self.degrees, self.translate, self.scale, img.size) 124 | 125 | angle = np.deg2rad(ret[0]) 126 | translations = (ret[1][0]/img.size[0],ret[1][1]/img.size[1]) 127 | scale = ret[2] 128 | imgRatio = img.size[0]/img.size[1] 129 | x = (label[:,1]-0.5)*imgRatio 130 | y = label[:,2]-0.5 131 | label[:,1] = (x*np.cos(angle) - y*np.sin(angle))*scale/imgRatio + 0.5 + translations[0] 132 | label[:,2] = (x*np.sin(angle) + y*np.cos(angle))*scale + 0.5 + translations[1] 133 | label[:, 3] *= scale 134 | label[:, 4] *= scale 135 | 136 | o_img = F.affine(img, *ret, resample=self.resample, fillcolor=self.fillcolor) 137 | return o_img, label 138 | 139 | class ImageFolder(Dataset): 140 | def __init__(self, folder_path, type = '%s/*.*', synth = False, yu = False, hr = False): 141 | self.files = sorted(glob.glob(type % folder_path)) 142 | self.yu = yu 143 | self.hr = hr 144 | self.resize = transforms.Resize((192,256)) 145 | self.mean = [0.4637419, 0.47166784, 0.48316576] if synth else [0.36224657, 0.41139355, 0.28278301] 146 | self.std = [0.45211827, 0.16890674, 0.18645908] if synth else [0.3132638, 0.21061972, 0.34144647] 147 | self.transform = transforms.Compose([ 148 | transforms.ToTensor(), 149 | transforms.Normalize(mean=self.mean,std=self.std) 150 | ]) 151 | 152 | def __getitem__(self, index): 153 | img_path = self.files[index % len(self.files)].rstrip() 154 | img = Image.open(img_path) 155 | 156 | if self.hr: 157 | img = self.resize(img) 158 | 159 | input_img = self.transform(img) 160 | 161 | if self.yu: 162 | input_img[1] = input_img[2]*0.5 + input_img[1]*0.5 163 | input_img = input_img[0:2] 164 | 165 | return img_path, input_img 166 | 167 | def __len__(self): 168 | return len(self.files) 169 | 170 | 171 | class ListDataset(Dataset): 172 | def __init__(self, list_path, img_size=(384,512), train=True, synth = False, yu=False): 173 | with open(list_path, 'r') as file: 174 | self.img_files = file.readlines() 175 | self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in self.img_files] 176 | self.img_shape = img_size 177 | self.max_objects = 50 178 | self.train = train 179 | self.synth = synth 180 | self.img_size = img_size 181 | self.yu = yu 182 | self.jitter = ColorJitter(0.3,0.3,0.3,3.1415/6,0.05) 183 | self.resize = transforms.Resize(img_size) 184 | self.affine = RandomAffineCust(5,(0.025,0.025),(0.9,1.1),fillcolor=0) 185 | self.mean = [0.36269532, 0.41144562, 0.282713] if synth else [0.40513613, 0.48072927, 0.48718367] 186 | self.std = [0.31111388, 0.21010718, 0.34060917] if synth else [0.44540985, 0.15460468, 0.18062305] 187 | self.normalize = transforms.Normalize(mean=self.mean,std=self.std) 188 | 189 | def __getitem__(self, index): 190 | 191 | #--------- 192 | # Image 193 | #--------- 194 | img_path = self.img_files[index % len(self.img_files)].rstrip() 195 | img = Image.open(img_path) 196 | 197 | if self.img_size[0] != img.size[1] and self.img_size[1] != img.size[0]: 198 | img = self.resize(img) 199 | 200 | w, h = img.size 201 | 202 | # --------- 203 | # Label 204 | # --------- 205 | label_path = self.label_files[index % len(self.img_files)].rstrip() 206 | labels = np.loadtxt(label_path).reshape(-1, 5) 207 | 208 | if self.train: 209 | img,labels = self.affine(img,labels) 210 | 211 | p = 0 212 | input_img = transforms.functional.to_tensor(img) 213 | input_img = self.normalize(input_img) 214 | if self.train: 215 | p = torch.rand(1).item() 216 | if p > 0.5: 217 | input_img = input_img.flip(2) 218 | input_img = self.jitter(input_img) 219 | 220 | if self.yu: 221 | input_img[1] = input_img[2]*0.5 + input_img[1]*0.5 222 | input_img = input_img[0:2] 223 | 224 | if p > 0.5: 225 | labels[:,1] = 1 - labels[:,1] 226 | 227 | # Squeeze centers inside image 228 | labels[:, 1] = np.clip(labels[:, 1], a_min=0, a_max = 0.999) 229 | labels[:, 2] = np.clip(labels[:, 2], a_min=0, a_max = 0.999) 230 | 231 | smallLabels = np.array([lab for lab in labels if lab[0] < 2]) 232 | bigLabels = np.array([lab for lab in labels if lab[0] >= 2]) 233 | 234 | if self.train: 235 | # Fill matrix 236 | filled_labels_small = np.zeros((self.max_objects//2, 5)) 237 | filled_labels_big = np.zeros((self.max_objects//2, 5)) 238 | if smallLabels is not None and smallLabels.shape[0] > 0: 239 | filled_labels_small[range(len(smallLabels))[:self.max_objects]] = smallLabels[:self.max_objects] 240 | filled_labels_small = torch.from_numpy(filled_labels_small) 241 | if bigLabels is not None and bigLabels.shape[0] > 0: 242 | bigLabels[:,0] -= 2 243 | filled_labels_big[range(len(bigLabels))[:self.max_objects]] = bigLabels[:self.max_objects] 244 | filled_labels_big = torch.from_numpy(filled_labels_big) 245 | 246 | return img_path, input_img, (filled_labels_small,filled_labels_big) 247 | else: 248 | filled_labels = np.zeros((self.max_objects, 5)) 249 | if labels is not None: 250 | filled_labels[range(len(labels))[:self.max_objects]] = labels[:self.max_objects] 251 | filled_labels = torch.from_numpy(filled_labels) 252 | 253 | return img_path, input_img, filled_labels 254 | 255 | 256 | def __len__(self): 257 | return len(self.img_files) 258 | 259 | def myRGB2YUV(img): 260 | mtx = torch.FloatTensor([[0.299,0.587,0.114],[-0.14713,-0.28886,0.436],[0.615,-0.51499,-0.10001]]) 261 | return torch.einsum('nm,mbc->nbc',mtx,img) 262 | 263 | class ColorJitter(object): 264 | def __init__(self,b=0.3,c=0.3,s=0.3,h=3.1415/6,var=0.05): 265 | super(ColorJitter,self).__init__() 266 | self.b = b 267 | self.c = c 268 | self.s = s 269 | self.h = h 270 | self.var = var 271 | 272 | def __call__(self, img): 273 | b_val = random.uniform(-self.b,self.b) 274 | c_val = random.uniform(1-self.c,1+self.c) 275 | s_val = random.uniform(1-self.s,1+self.s) 276 | h_val = random.uniform(-self.h,self.h) 277 | 278 | mtx = torch.FloatTensor([[s_val*np.cos(h_val),-np.sin(h_val)],[np.sin(h_val),s_val*np.cos(h_val)]]) 279 | 280 | img += torch.randn_like(img)*self.var 281 | img[0] = (img[0]+b_val)*c_val 282 | if self.s > 0 and self.h > 0: 283 | img[1:] = torch.einsum('nm,mbc->nbc',mtx,img[1:]) 284 | 285 | return img 286 | 287 | class LPDataSet(Dataset): 288 | def __init__(self, root, img_size=(384,512), train=True, finetune = False, yu=False, len_seq = 2): 289 | self.finetune = finetune 290 | self.img_size = img_size 291 | self.yu = yu 292 | self.len_seq = len_seq 293 | self.max_objects = 50 294 | self.root = osp.join(root,"LabelProp") 295 | self.split = "train" if train else "val" 296 | self.resize = transforms.Resize(img_size) 297 | self.mean = [0.34190056, 0.4833289, 0.48565758] if finetune else [0.36269532, 0.41144562, 0.282713] 298 | self.std = [0.47421749, 0.13846053, 0.1714848] if finetune else [0.31111388, 0.21010718, 0.34060917] 299 | self.normalize = transforms.Normalize(mean=self.mean,std=self.std) 300 | self.images = [] 301 | self.labels = [] 302 | self.predictions = [] 303 | 304 | 305 | data_dir = osp.join(self.root,"Real" if finetune else "Synthetic") 306 | data_dir = osp.join(data_dir, self.split) 307 | 308 | for dir in get_immediate_subdirectories(data_dir): 309 | currDir = osp.join(data_dir,dir) 310 | img_dir = osp.join(currDir,"images") 311 | images = [] 312 | for file in sorted(glob.glob1(img_dir, "*.png"), key=alphanum_key): 313 | images.append(osp.join(img_dir, file)) 314 | self.images.append(images) 315 | self.labels.append([path.replace('.png', '.txt').replace('.jpg', '.txt') for path in images]) 316 | 317 | def __len__(self): 318 | length = 0 319 | for imgs in self.images: 320 | length += len(imgs) - self.len_seq + 1 321 | return length 322 | 323 | def __getitem__(self, index): 324 | dirindex = 0 325 | itemindex = index 326 | 327 | #print index 328 | 329 | for imgs in self.images: 330 | #print(dirindex, itemindex, len(imgs)) 331 | if itemindex >= len(imgs) - self.len_seq + 1: 332 | dirindex += 1 333 | itemindex -= (len(imgs) - self.len_seq + 1) 334 | else: 335 | break 336 | 337 | #print(dirindex, itemindex) 338 | labels = [] 339 | imgs = [] 340 | cvimgs = [] 341 | for i in range(self.len_seq): 342 | img_file = self.images[dirindex][itemindex+i] 343 | label_file = self.labels[dirindex][itemindex+i].rstrip() 344 | 345 | img = Image.open(img_file).convert('RGB') 346 | label = np.loadtxt(label_file).reshape(-1, 5) 347 | # Squeeze centers inside image 348 | label[:, 1] = np.clip(label[:, 1], a_min=0, a_max = 0.999) 349 | label[:, 2] = np.clip(label[:, 2], a_min=0, a_max = 0.999) 350 | 351 | if self.img_size[0] != img.size[1] and self.img_size[1] != img.size[0]: 352 | img = self.resize(img) 353 | 354 | img_ten = cv2.cvtColor(np.array(img),cv2.COLOR_RGB2YUV) 355 | img_ten = transforms.functional.to_tensor(img_ten).float() 356 | img_ten = self.normalize(img_ten) 357 | if self.yu: 358 | img_ten[1] = img_ten[2] * 0.5 + img_ten[1] * 0.5 359 | img_ten = img_ten[0:2] 360 | img_ten = img_ten.unsqueeze(0) 361 | 362 | filled_label = np.zeros((self.max_objects, 5)) 363 | if label is not None: 364 | filled_label[range(len(label))[:self.max_objects]] = label[:self.max_objects] 365 | filled_label = torch.from_numpy(filled_label).unsqueeze(0) 366 | 367 | labels.append(filled_label) 368 | imgs.append(img_ten) 369 | cvimgs.append(cv2.resize(cv2.cvtColor(np.array(img),cv2.COLOR_RGB2GRAY),(160,120))) 370 | 371 | imgs = torch.cat(imgs) 372 | labels = torch.cat(labels) 373 | return imgs, labels, cvimgs -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import math 3 | import time 4 | import torch 5 | import torch.nn as nn 6 | import numpy as np 7 | import glob 8 | from PIL import Image 9 | import progressbar 10 | import cv2 11 | import os 12 | 13 | 14 | def load_classes(path): 15 | """ 16 | Loads class labels at 'path' 17 | """ 18 | fp = open(path, "r") 19 | names = fp.read().split("\n")[:-1] 20 | return names 21 | 22 | 23 | def compute_ap(recall, precision): 24 | """ Compute the average precision, given the recall and precision curves. 25 | Code originally from https://github.com/rbgirshick/py-faster-rcnn. 26 | 27 | # Arguments 28 | recall: The recall curve (list). 29 | precision: The precision curve (list). 30 | # Returns 31 | The average precision as computed in py-faster-rcnn. 32 | """ 33 | # correct AP calculation 34 | # first append sentinel values at the end 35 | mrec = np.concatenate(([0.0], recall, [1.0])) 36 | mpre = np.concatenate(([0.0], precision, [0.0])) 37 | 38 | # compute the precision envelope 39 | for i in range(mpre.size - 1, 0, -1): 40 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 41 | 42 | # to calculate area under PR curve, look for points 43 | # where X axis (recall) changes value 44 | i = np.where(mrec[1:] != mrec[:-1])[0] 45 | 46 | # and sum (\Delta recall) * prec 47 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 48 | return ap 49 | 50 | 51 | def bbox_iou(box1, box2, x1y1x2y2=True): 52 | """ 53 | Returns the IoU of two bounding boxes 54 | """ 55 | if not x1y1x2y2: 56 | # Transform from center and width to exact coordinates 57 | b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 58 | b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 59 | b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 60 | b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 61 | else: 62 | # Get the coordinates of bounding boxes 63 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] 64 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] 65 | 66 | # get the corrdinates of the intersection rectangle 67 | inter_rect_x1 = torch.max(b1_x1, b2_x1) 68 | inter_rect_y1 = torch.max(b1_y1, b2_y1) 69 | inter_rect_x2 = torch.min(b1_x2, b2_x2) 70 | inter_rect_y2 = torch.min(b1_y2, b2_y2) 71 | # Intersection area 72 | inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp( 73 | inter_rect_y2 - inter_rect_y1 + 1, min=0 74 | ) 75 | # Union Area 76 | b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) 77 | b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) 78 | 79 | iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) 80 | 81 | return iou 82 | 83 | 84 | def bbox_iou_numpy(box1, box2): 85 | """Computes IoU between bounding boxes. 86 | Parameters 87 | ---------- 88 | box1 : ndarray 89 | (N, 4) shaped array with bboxes 90 | box2 : ndarray 91 | (M, 4) shaped array with bboxes 92 | Returns 93 | ------- 94 | : ndarray 95 | (N, M) shaped array with IoUs 96 | """ 97 | area = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1]) 98 | 99 | iw = np.minimum(np.expand_dims(box1[:, 2], axis=1), box2[:, 2]) - np.maximum( 100 | np.expand_dims(box1[:, 0], 1), box2[:, 0] 101 | ) 102 | ih = np.minimum(np.expand_dims(box1[:, 3], axis=1), box2[:, 3]) - np.maximum( 103 | np.expand_dims(box1[:, 1], 1), box2[:, 1] 104 | ) 105 | 106 | iw = np.maximum(iw, 0) 107 | ih = np.maximum(ih, 0) 108 | 109 | ua = np.expand_dims((box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1]), axis=1) + area - iw * ih 110 | 111 | ua = np.maximum(ua, np.finfo(float).eps) 112 | 113 | intersection = iw * ih 114 | 115 | return intersection / ua 116 | 117 | 118 | def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4): 119 | """ 120 | Removes detections with lower object confidence score than 'conf_thres' and performs 121 | Non-Maximum Suppression to further filter detections. 122 | Returns detections with shape: 123 | (x1, y1, x2, y2, object_conf, class_score, class_pred) 124 | """ 125 | 126 | # From (center x, center y, width, height) to (x1, y1, x2, y2) 127 | box_corner = prediction.new(prediction.shape) 128 | box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 129 | box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 130 | box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 131 | box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 132 | prediction[:, :, :4] = box_corner[:, :, :4] 133 | 134 | output = [None for _ in range(len(prediction))] 135 | for image_i, image_pred in enumerate(prediction): 136 | # Filter out confidence scores below threshold 137 | conf_mask = (image_pred[:, 4] >= conf_thres).squeeze() 138 | classPred = torch.cat((torch.zeros(192),torch.ones(192),2*torch.ones(48),3*torch.ones(48))).unsqueeze(1) 139 | if torch.cuda.is_available(): 140 | classPred = classPred.cuda() 141 | classPred = classPred[conf_mask] 142 | image_pred = image_pred[conf_mask] 143 | # If none are remaining => process next image 144 | if not image_pred.size(0): 145 | continue 146 | # Get score and class with highest confidence 147 | class_conf = image_pred[:, 4].unsqueeze(1) 148 | # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred) 149 | detections = torch.cat((image_pred[:, :5], class_conf.float(), classPred.float()), 1) 150 | # Iterate through all predicted classes 151 | unique_labels = detections[:, -1].cpu().unique() 152 | if prediction.is_cuda: 153 | unique_labels = unique_labels.cuda() 154 | for c in unique_labels: 155 | # Get the detections with the particular class 156 | detections_class = detections[detections[:, -1] == c] 157 | # Sort the detections by maximum objectness confidence 158 | _, conf_sort_index = torch.sort(detections_class[:, 4], descending=True) 159 | detections_class = detections_class[conf_sort_index] 160 | # Perform non-maximum suppression 161 | max_detections = [] 162 | while detections_class.size(0): 163 | # Get detection with highest confidence and save as max detection 164 | max_detections.append(detections_class[0].unsqueeze(0)) 165 | # Stop if we're at the last detection 166 | if len(detections_class) == 1: 167 | break 168 | # Get the IOUs for all boxes with lower confidence 169 | ious = bbox_iou(max_detections[-1], detections_class[1:]) 170 | # Remove detections with IoU >= NMS threshold 171 | detections_class = detections_class[1:][ious < nms_thres] 172 | 173 | max_detections = torch.cat(max_detections).data 174 | # Add max detections to outputs 175 | output[image_i] = ( 176 | max_detections if output[image_i] is None else torch.cat((output[image_i], max_detections)) 177 | ) 178 | 179 | return output 180 | 181 | def get_immediate_subdirectories(a_dir): 182 | return [name for name in os.listdir(a_dir) 183 | if os.path.isdir(os.path.join(a_dir, name))] 184 | 185 | def labelProp(img_gr,prevImg,BBS): 186 | of = cv2.calcOpticalFlowFarneback(prevImg, img_gr, None, pyr_scale=0.5,levels=2,winsize=15,iterations=2,poly_n=7,poly_sigma=1.5,flags=0) 187 | scale = 4.0 188 | ret = [] 189 | for classBB in BBS: 190 | newClassBB = [] 191 | for BB in classBB: 192 | xMin = max(0, int(BB[0]/scale)) 193 | yMin = max(0, int(BB[1]/scale)) 194 | xMax = min(img_gr.shape[1] - 1, int(math.ceil(BB[2]/scale))) 195 | yMax = min(img_gr.shape[0] - 1, int(math.ceil(BB[3]/scale))) 196 | patch = of[yMin:yMax, xMin:xMax] 197 | meanX = np.mean(patch[:, :, 0])*scale 198 | meanY = np.mean(patch[:, :, 1])*scale 199 | newBB = [] 200 | newBB.append(max(0, int(round(BB[0] + meanX)))) 201 | newBB.append(max(0, int(round(BB[1] + meanY)))) 202 | newBB.append(min(img_gr.shape[1]*scale - 1, int(round(BB[2] + meanX)))) 203 | newBB.append(min(img_gr.shape[0]*scale - 1, int(round(BB[3] + meanY)))) 204 | newBB.append(BB[4]) 205 | newClassBB.append(newBB) 206 | ret.append(newClassBB) 207 | return ret 208 | 209 | def pruneModel(params, ratio = 0.01, glasso=False): 210 | i = 0 211 | indices = [] 212 | for param in params: 213 | if param.dim() > 1: 214 | if glasso: 215 | dim = param.size() 216 | if dim.__len__() > 2: 217 | ind = torch.zeros_like(param) 218 | filtCnt = 0 219 | vals = param.pow(2).sum(dim=(1,2,3)).add(1e-8).pow(1 / 2.) 220 | thresh = torch.max(vals) * ratio 221 | for i,v in enumerate(vals): 222 | if v < thresh: 223 | filtCnt += 1 224 | param[i,:] = torch.zeros_like(param[i]) 225 | ind[i,:] = torch.ones_like(ind[i]) 226 | print("Pruned %f%% of the filters" % (filtCnt/vals.numel()*100)) 227 | indices.append(ind.bool()) 228 | else: 229 | indices.append(torch.zeros_like(param).bool()) 230 | else: 231 | thresh = torch.max(torch.abs(param)) * ratio 232 | print("Pruned %f%% of the weights" % ( 233 | float(torch.sum(torch.abs(param) < thresh)) / float(torch.sum(param != 0)) * 100)) 234 | param[torch.abs(param) < thresh] = 0 235 | indices.append(torch.abs(param) < thresh) 236 | i += 1 237 | 238 | return indices 239 | 240 | def count_zero_weights(model,glasso=False): 241 | nonzeroWeights = 0 242 | totalWeights = 0 243 | if glasso: 244 | for param in model.parameters(): 245 | dim = param.size() 246 | if dim.__len__() > 2: 247 | vals = param.pow(2).sum(dim=(1,2,3)).add(1e-8).pow(1/2.) 248 | max = torch.max(vals) 249 | nonzeroWeights += (vals < max * 0.01).sum().float() 250 | totalWeights += vals.numel() 251 | else: 252 | for param in model.parameters(): 253 | max = torch.max(torch.abs(param)) 254 | nonzeroWeights += (torch.abs(param) < max*0.01).sum().float() 255 | totalWeights += param.numel() 256 | return float(nonzeroWeights/totalWeights) 257 | 258 | def build_targets( 259 | pred_boxes, pred_conf, target, anchors, num_anchors, num_classes, grid_size_y, grid_size_x, ignore_thres, img_dim 260 | ): 261 | nB = target.size(0) 262 | nA = num_anchors 263 | #nC = num_classes 264 | nGx = grid_size_x 265 | nGy = grid_size_y 266 | mask = torch.zeros(nB, nA, nGy, nGx) 267 | conf_mask = torch.ones(nB, nA, nGy, nGx) 268 | tx = torch.zeros(nB, nA, nGy, nGx) 269 | ty = torch.zeros(nB, nA, nGy, nGx) 270 | tw = torch.zeros(nB, nA, nGy, nGx) 271 | th = torch.zeros(nB, nA, nGy, nGx) 272 | tconf = torch.ByteTensor(nB, nA, nGy, nGx).fill_(0) 273 | corr = torch.ByteTensor(nB, nA, nGy, nGx).fill_(0) 274 | 275 | nGT = 0 276 | nCorrect = 0 277 | for b in range(nB): 278 | for t in range(target.shape[1]): 279 | if target[b, t].sum() == 0: 280 | continue 281 | nGT += 1 282 | # Convert to position relative to box 283 | # One-hot encoding of label 284 | target_label = int(target[b, t, 0]) 285 | gx = target[b, t, 1] * nGx 286 | gy = target[b, t, 2] * nGy 287 | gw = target[b, t, 3] * nGx 288 | gh = target[b, t, 4] * nGy 289 | # Get grid box indices 290 | gi = int(gx) 291 | gj = int(gy) 292 | 293 | best_n = target_label 294 | # Get ground truth box 295 | gt_box = torch.FloatTensor(np.array([gx, gy, gw, gh])).unsqueeze(0) 296 | # Get the best prediction 297 | pred_box = pred_boxes[b, best_n, gj, gi].unsqueeze(0) 298 | # Masks 299 | mask[b, best_n, gj, gi] = 1 300 | conf_mask[b, best_n, gj, gi] = 1 301 | # Coordinates 302 | tx[b, best_n, gj, gi] = gx - gi 303 | ty[b, best_n, gj, gi] = gy - gj 304 | # Width and height 305 | tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16) 306 | th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16) 307 | #tcls[b, best_n, gj, gi, target_label] = 1 308 | tconf[b, best_n, gj, gi] = 1 309 | 310 | # Calculate iou between ground truth and best matching prediction 311 | iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False) 312 | score = pred_conf[b, best_n, gj, gi] 313 | if (target_label != 3 or iou > 0.5) and score > 0.5: 314 | nCorrect += 1 315 | corr[b, best_n, gj, gi] = 1 316 | 317 | return nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, corr 318 | 319 | 320 | def to_categorical(y, num_classes): 321 | """ 1-hot encodes a tensor """ 322 | return torch.from_numpy(np.eye(num_classes, dtype="uint8")[y]) 323 | 324 | def bbox_dist(box1,boxes): 325 | distances = np.array([]) 326 | for box2 in boxes: 327 | cent1x = (box1[0] + box1[2]) / 2 328 | cent1y = (box1[1] + box1[3]) / 2 329 | cent2x = (box2[0] + box2[2]) / 2 330 | cent2y = (box2[1] + box2[3]) / 2 331 | distances = np.append(distances,np.sqrt(pow(cent1x-cent2x,2) + pow(cent1y-cent2y,2))) 332 | return distances 333 | 334 | def computeAP(model,dataloader,conf_thres,nms_thres,num_classes,img_size,useIoU,thresh): 335 | Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor 336 | 337 | all_detections = [] 338 | all_annotations = [] 339 | 340 | bar = progressbar.ProgressBar(0, len(dataloader), redirect_stdout=False) 341 | 342 | for batch_i, (_, imgs, targets) in enumerate(dataloader): 343 | 344 | if torch.cuda.is_available(): 345 | imgs = imgs.cuda() 346 | 347 | with torch.no_grad(): 348 | outputs = model(imgs) 349 | outputs = non_max_suppression(outputs, 80, conf_thres=conf_thres, nms_thres=nms_thres) 350 | 351 | for output, annotations in zip(outputs, targets): 352 | 353 | all_detections.append([np.array([]) for _ in range(num_classes)]) 354 | if output is not None: 355 | # Get predicted boxes, confidence scores and labels 356 | pred_boxes = output[:, :5].cpu().numpy() 357 | scores = output[:, 4].cpu().numpy() 358 | pred_labels = output[:, -1].cpu().numpy() 359 | 360 | # Order by confidence 361 | sort_i = np.argsort(scores) 362 | pred_labels = pred_labels[sort_i] 363 | pred_boxes = pred_boxes[sort_i] 364 | 365 | for label in range(num_classes): 366 | all_detections[-1][label] = pred_boxes[pred_labels == label] 367 | 368 | all_annotations.append([np.array([]) for _ in range(num_classes)]) 369 | if any(annotations[:, -1] > 0): 370 | 371 | annotation_labels = annotations[annotations[:, -1] > 0, 0].numpy() 372 | _annotation_boxes = annotations[annotations[:, -1] > 0, 1:] 373 | 374 | # Reformat to x1, y1, x2, y2 and rescale to image dimensions 375 | annotation_boxes = np.empty_like(_annotation_boxes) 376 | annotation_boxes[:, 0] = (_annotation_boxes[:, 0] - _annotation_boxes[:, 2] / 2) * img_size[1] 377 | annotation_boxes[:, 1] = (_annotation_boxes[:, 1] - _annotation_boxes[:, 3] / 2) * img_size[0] 378 | annotation_boxes[:, 2] = (_annotation_boxes[:, 0] + _annotation_boxes[:, 2] / 2) * img_size[1] 379 | annotation_boxes[:, 3] = (_annotation_boxes[:, 1] + _annotation_boxes[:, 3] / 2) * img_size[0] 380 | # annotation_boxes *= opt.img_size 381 | 382 | for label in range(num_classes): 383 | all_annotations[-1][label] = annotation_boxes[annotation_labels == label, :] 384 | 385 | bar.update(batch_i) 386 | bar.finish() 387 | average_precisions = {} 388 | for label in range(num_classes): 389 | true_positives = [] 390 | scores = [] 391 | num_annotations = 0 392 | 393 | for i in range(len(all_annotations)): 394 | detections = all_detections[i][label] 395 | annotations = all_annotations[i][label] 396 | 397 | num_annotations += annotations.shape[0] 398 | detected_annotations = [] 399 | 400 | for *bbox, score in detections: 401 | scores.append(score) 402 | 403 | if annotations.shape[0] == 0: 404 | true_positives.append(0) 405 | continue 406 | 407 | if useIoU: 408 | overlaps = bbox_iou_numpy(np.expand_dims(bbox, axis=0), annotations) 409 | assigned_annotation = np.argmax(overlaps, axis=1) 410 | max_overlap = overlaps[0, assigned_annotation] 411 | 412 | if max_overlap >= thresh and assigned_annotation not in detected_annotations: 413 | true_positives.append(1) 414 | detected_annotations.append(assigned_annotation) 415 | else: 416 | true_positives.append(0) 417 | else: 418 | distances = bbox_dist(bbox, annotations) 419 | assigned_annotation = np.argmin(distances) 420 | min_dist = distances[assigned_annotation] 421 | 422 | if min_dist <= thresh and assigned_annotation not in detected_annotations: 423 | true_positives.append(1) 424 | detected_annotations.append(assigned_annotation) 425 | else: 426 | true_positives.append(0) 427 | 428 | # no annotations -> AP for this class is 0 429 | if num_annotations == 0: 430 | average_precisions[label] = 0 431 | continue 432 | 433 | true_positives = np.array(true_positives) 434 | false_positives = np.ones_like(true_positives) - true_positives 435 | # sort by score 436 | indices = np.argsort(-np.array(scores)) 437 | false_positives = false_positives[indices] 438 | true_positives = true_positives[indices] 439 | 440 | # compute false positives and true positives 441 | false_positives = np.cumsum(false_positives) 442 | true_positives = np.cumsum(true_positives) 443 | 444 | # compute recall and precision 445 | recall = true_positives / num_annotations 446 | precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps) 447 | 448 | # compute average precision 449 | average_precision = compute_ap(recall, precision) 450 | average_precisions[label] = average_precision 451 | 452 | mAP = np.mean(list(average_precisions.values())) 453 | 454 | return mAP,list(average_precisions.values()) 455 | --------------------------------------------------------------------------------