├── utils
├── __init__.py
├── parse_config.py
├── datasets.py
└── utils.py
├── data
├── RoboCup
│ ├── .gitignore
│ └── anchors
│ │ ├── anchorsFinetune3.txt
│ │ ├── anchors3.txt
│ │ ├── anchors4.txt
│ │ └── anchors6.txt
└── robo.names
├── checkpoints_old
└── .gitignore
├── config
├── robo.data
├── roboFinetune.data
├── robo-2c.cfg
├── robo.cfg
├── robo-hr.cfg
└── robo-bn.cfg
├── .idea
├── encodings.xml
├── vcs.xml
├── modules.xml
├── misc.xml
├── PyTorch-YOLOv3.iml
└── workspace.xml
├── .gitignore
├── dataMean.py
├── splitSets.py
├── paramSave.py
├── README.md
├── yoloFolder.py
├── YOLOExtractor.py
├── detect.py
├── compute_anchors.py
├── train.py
├── YOLOLabeller.py
├── models.py
└── test.py
/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/data/RoboCup/.gitignore:
--------------------------------------------------------------------------------
1 | *.txt
2 |
--------------------------------------------------------------------------------
/checkpoints_old/.gitignore:
--------------------------------------------------------------------------------
1 | *.weights
2 |
--------------------------------------------------------------------------------
/data/robo.names:
--------------------------------------------------------------------------------
1 | ball
2 | crossing
3 | goalpost
4 | robot
5 |
--------------------------------------------------------------------------------
/data/RoboCup/anchors/anchorsFinetune3.txt:
--------------------------------------------------------------------------------
1 | 42.00,39.00, 29.00,16.00, 31.00,109.00, 79.00,106.00
2 |
--------------------------------------------------------------------------------
/data/RoboCup/anchors/anchors3.txt:
--------------------------------------------------------------------------------
1 | 19.00,19.00, 90.00,182.00, 32.00,58.00, 21.00,72.00, 13.00,6.00
2 |
--------------------------------------------------------------------------------
/data/RoboCup/anchors/anchors4.txt:
--------------------------------------------------------------------------------
1 | 14.00,9.00, 29.00,54.00, 56.00,121.00, 145.00,277.00
2 | 0.512722
3 |
--------------------------------------------------------------------------------
/data/RoboCup/anchors/anchors6.txt:
--------------------------------------------------------------------------------
1 | 7.24,2.81, 13.17,11.35, 16.02,44.09, 32.64,21.50, 36.97,81.54, 96.13,182.02
2 | 0.621273
3 |
--------------------------------------------------------------------------------
/config/robo.data:
--------------------------------------------------------------------------------
1 | classes= 4
2 | train=./data/RoboCup/train.txt
3 | valid=./data/RoboCup/test.txt
4 | names=data/robo.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | .DS_Store
3 | build
4 | .git
5 | *.egg-info
6 | dist
7 | output
8 | data/coco
9 | backup
10 | weights/*.weights
11 | __pycache__
12 | checkpoints
13 |
--------------------------------------------------------------------------------
/config/roboFinetune.data:
--------------------------------------------------------------------------------
1 | classes= 4
2 | train=./data/RoboCup/FinetuneTrain.txt
3 | valid=./data/RoboCup/FinetuneTest.txt
4 | names=data/robo.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/utils/parse_config.py:
--------------------------------------------------------------------------------
1 |
2 | def parse_data_config(path):
3 | """Parses the data configuration file"""
4 | options = dict()
5 | options['gpus'] = '0,1,2,3'
6 | options['num_workers'] = '10'
7 | with open(path, 'r') as fp:
8 | lines = fp.readlines()
9 | for line in lines:
10 | line = line.strip()
11 | if line == '' or line.startswith('#'):
12 | continue
13 | key, value = line.split('=')
14 | options[key.strip()] = value.strip()
15 | return options
16 |
--------------------------------------------------------------------------------
/.idea/PyTorch-YOLOv3.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/dataMean.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import numpy as np
3 | import cv2
4 |
5 | if __name__ == '__main__':
6 | #root = "E:/RoboCup/FinetuneHorizon/train/images/"
7 | root = "E:/RoboCup/YOLO/Finetune/train/"
8 | #root = "E:/RoboCup/train/images/"
9 |
10 | mean = np.zeros(3)
11 | std = np.zeros(3)
12 |
13 | imgs = glob.glob1(root,"*.png")
14 |
15 | for i in imgs:
16 | img = cv2.cvtColor(cv2.imread(root+i),cv2.COLOR_BGR2RGB)
17 | m = np.mean(img,axis=(0,1))
18 | s = np.sqrt(np.var(img,axis=(0,1)))
19 | mean += m
20 | std += s
21 |
22 | mean /= len(imgs)*255
23 | std /= len(imgs)*255
24 | std = np.sqrt(std)
25 | print(mean,std)
--------------------------------------------------------------------------------
/splitSets.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path as osp
3 | import glob
4 | import cv2
5 | import random
6 |
7 |
8 | if __name__ == "__main__":
9 |
10 | inPath = "/Users/martonszemenyei/Projects/ROBO/data/YOLO/sydney/"
11 | oPathTrain = "/Users/martonszemenyei/Projects/ROBO/data/YOLO/Finetune/train/"
12 | oPathTest = "/Users/martonszemenyei/Projects/ROBO/data/YOLO/Finetune/test/"
13 |
14 | names = sorted(glob.glob1(inPath,"syd*.png"))
15 | labNames = sorted(glob.glob1(inPath,"*.txt"))
16 |
17 | for img,lab in zip(names,labNames):
18 |
19 | r = random.random()
20 |
21 | if r > 0.8:
22 | os.rename(osp.join(inPath,img),osp.join(oPathTest,img))
23 | os.rename(osp.join(inPath,lab),osp.join(oPathTest,lab))
24 | else:
25 | os.rename(osp.join(inPath,img),osp.join(oPathTrain,img))
26 | os.rename(osp.join(inPath,lab),osp.join(oPathTrain,lab))
27 |
28 | '''for name in names:
29 | img = cv2.imread(oPathTrain+name)
30 | img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
31 | cv2.imwrite(oPathTrain+name,img)'''
--------------------------------------------------------------------------------
/paramSave.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 |
4 | os.environ['KMP_DUPLICATE_LIB_OK']='True'
5 | from utils.datasets import *
6 | from models import *
7 |
8 | def saveParams( path, model, fName="weights.dat" ):
9 | if not os.path.exists(path):
10 | os.makedirs(path)
11 | params = np.empty(0)
12 | Dict = model.state_dict()
13 | for name in Dict:
14 | param = Dict[name].numpy()
15 | if "num_batches" in name:
16 | continue
17 | param = param.reshape(param.size)
18 | params = np.concatenate((params, param))
19 | params.tofile(path+"/"+fName)
20 |
21 | if __name__ == "__main__":
22 |
23 | path = "checkpoints/bestFinetuneHR93_34.weights"
24 |
25 | model = ROBO(bn=False,inch=3,halfRes=True)
26 | model.load_state_dict(torch.load(path, map_location={'cuda:0': 'cpu'}))
27 |
28 | saveParams("checkpoints/",model,fName="weightsHR.dat")
29 |
30 | path = "checkpoints/bestFinetune2C93_43.weights"
31 |
32 | model = ROBO(bn=False,inch=2,halfRes=False)
33 | model.load_state_dict(torch.load(path, map_location={'cuda:0': 'cpu'}))
34 |
35 | saveParams("checkpoints/",model,fName="weights2C.dat")
36 |
37 | path = "checkpoints/bestFinetune2CHR93_32.weights"
38 |
39 | model = ROBO(bn=False,inch=2,halfRes=True)
40 | model.load_state_dict(torch.load(path, map_location={'cuda:0': 'cpu'}))
41 |
42 | saveParams("checkpoints/",model,fName="weights2CHR.dat")
43 |
44 | path = "checkpoints/bestFinetuneBN97_79.weights"
45 |
46 | model = ROBO(bn=True,inch=3,halfRes=False)
47 | model.load_state_dict(torch.load(path, map_location={'cuda:0': 'cpu'}))
48 |
49 | saveParams("checkpoints/",model,fName="weightsBN.dat")
50 |
51 | path = "checkpoints/bestFinetune93_41.weights"
52 |
53 | model = ROBO(bn=False,inch=3,halfRes=False)
54 | model.load_state_dict(torch.load(path, map_location={'cuda:0': 'cpu'}))
55 |
56 | saveParams("checkpoints/",model,fName="weights.dat")
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ROBO
2 | Robust Real-time Object Detection for the Nao Robots
3 |
4 | ## Introduction
5 | This Repo contains the code for our submission for the RoboCup 2019 Symposium. It allows you to train your own models, evaluate, or to use our pre-trained models. Some of the code is based on this excellent repo: https://github.com/eriklindernoren/PyTorch-YOLOv3
6 |
7 | The code and dataset for ROBO-UNet and its variants is found [here](https://github.com/szemenyeim/RoboCupVision).
8 |
9 | ## Requirements
10 |
11 | - PyTorch 1.0
12 | - Progressbar
13 |
14 | ## Dataset and Pretrained models
15 | The datasets contain images in YUV format, using 512x384 resultion. For every image, the annotations are found in a .txt file of the same name (YOLO format). You can download the dataset used from the following links:
16 |
17 | - [Synthetic train](https://deeplearning.iit.bme.hu/Public/ROBO/ROBO_Train.zip)
18 | - [Synthetic test](https://deeplearning.iit.bme.hu/Public/ROBO/ROBO_Test.zip)
19 | - [Finetune](https://deeplearning.iit.bme.hu/Public/ROBO/ROBO_Finetune.zip)
20 |
21 | The pretrained models are available from [here](https://deeplearning.iit.bme.hu/Public/ROBO/checkpoints.zip)
22 |
23 | ## Train your own models
24 | 1. To train your own models, first extract the dataset to a folder of your choice.
25 | 2. Regenerate the train and test image lists by running `python yoloFolder.py --root `
26 | 3. Run the training on the synthetic database using `python train.py`
27 | 4. Finetune on the real database with `python train.py --finetune`
28 |
29 | You have several other options to use:
30 |
31 | `--bn` trains the ROBO-BN model
32 |
33 | `--yu` uses only 2 input channels `(Y and (U+V)/2)`
34 |
35 | `--hr` trains the ROBO-HR model
36 |
37 | `--transfer ` trains only the first N layers on the real database, fintunes the rest
38 |
39 | ## Evaluate and detect
40 | Run `python test.py` to evaluate the model, and `python detect.py` to perform detection on the datasets. These scripts have the same input arguments as the train script.
41 |
42 | ## Export your models for RoboDNN
43 | You can run your models on the Nao robot using RoboDNN. You can export weights files using the paramSave.py script. The correcsponding config files are in the config subfolder.
44 |
--------------------------------------------------------------------------------
/yoloFolder.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import cv2
3 | import argparse
4 | import os.path as osp
5 |
6 | def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
7 | # initialize the dimensions of the image to be resized and
8 | # grab the image size
9 | dim = None
10 | (h, w) = image.shape[:2]
11 |
12 | # if both the width and height are None, then return the
13 | # original image
14 | if width is None and height is None:
15 | return image
16 |
17 | # check to see if the width is None
18 | if width is None:
19 | # calculate the ratio of the height and construct the
20 | # dimensions
21 | r = height / float(h)
22 | dim = (int(w * r), height)
23 |
24 | # otherwise, the height is None
25 | else:
26 | # calculate the ratio of the width and construct the
27 | # dimensions
28 | r = width / float(w)
29 | dim = (width, int(h * r))
30 |
31 | # resize the image
32 | resized = cv2.resize(image, dim, interpolation = inter)
33 |
34 | # return the resized image
35 | return resized
36 |
37 | if __name__ =="__main__":
38 |
39 | parser = argparse.ArgumentParser()
40 | parser.add_argument("--root", help="Path pointing to the YOLO folder", type=str,required=True)
41 | opt = parser.parse_args()
42 | root = opt.root
43 |
44 | trPath = osp.join(root,"YOLO/Train/")
45 | trFile = "./data/RoboCup/train.txt"
46 |
47 | with open(trFile,"w+") as file:
48 | for fName in glob.glob1(trPath,"*.png"):
49 | file.write(trPath+fName + "\n")
50 | file.close()
51 |
52 | tePath = osp.join(root,"YOLO/Test/")
53 | teFile = "./data/RoboCup/test.txt"
54 |
55 | with open(teFile, "w+") as file:
56 | for fName in glob.glob1(tePath, "*.png"):
57 | file.write(tePath + fName + "\n")
58 | file.close()
59 |
60 | trPath = osp.join(root,"YOLO/Finetune/train/")
61 | trFile = "./data/RoboCup/FinetuneTrain.txt"
62 |
63 | with open(trFile,"w+") as file:
64 | for fName in glob.glob1(trPath,"*.png"):
65 | file.write(trPath+fName + "\n")
66 | file.close()
67 |
68 | tePath = osp.join(root,"YOLO/Finetune/test/")
69 | teFile = "./data/RoboCup/FinetuneTest.txt"
70 |
71 | with open(teFile, "w+") as file:
72 | for fName in glob.glob1(tePath, "*.png"):
73 | file.write(tePath + fName + "\n")
74 | file.close()
75 |
76 |
--------------------------------------------------------------------------------
/config/robo-2c.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | mean = 0.4637419,0.47166784,0.48316576
3 | std = 0.45211827,0.16890674,0.18645908
4 | width = 512
5 | height = 384
6 | channels = 2
7 |
8 | [convolutional]
9 | filters=4
10 | size=3
11 | stride=2
12 | pad=1
13 | activation=linear
14 | hasBias = false
15 |
16 | [batchnorm]
17 | activation = leaky
18 |
19 | [convolutional]
20 | filters=8
21 | size=3
22 | stride=2
23 | pad=1
24 | activation=linear
25 | hasBias = false
26 |
27 | [batchnorm]
28 | activation = leaky
29 |
30 | [convolutional]
31 | filters=16
32 | size=3
33 | stride=2
34 | pad=1
35 | activation=linear
36 | hasBias = false
37 |
38 | [batchnorm]
39 | activation = leaky
40 |
41 | [convolutional]
42 | filters=16
43 | size=3
44 | stride=1
45 | pad=1
46 | activation=linear
47 | hasBias = false
48 |
49 | [batchnorm]
50 | activation = leaky
51 |
52 | [convolutional]
53 | filters=32
54 | size=3
55 | stride=2
56 | pad=1
57 | activation=linear
58 | hasBias = false
59 |
60 | [batchnorm]
61 | activation = leaky
62 |
63 | [convolutional]
64 | filters=32
65 | size=3
66 | stride=1
67 | pad=1
68 | activation=linear
69 | hasBias = false
70 |
71 | [batchnorm]
72 | activation = leaky
73 |
74 | [convolutional]
75 | filters=64
76 | size=3
77 | stride=2
78 | pad=1
79 | activation=linear
80 | hasBias = false
81 |
82 | [batchnorm]
83 | activation = leaky
84 |
85 | [convolutional]
86 | filters=64
87 | size=3
88 | stride=1
89 | pad=1
90 | activation=linear
91 | hasBias = false
92 |
93 | [batchnorm]
94 | activation = leaky
95 |
96 | [convolutional]
97 | filters=64
98 | size=3
99 | stride=1
100 | pad=1
101 | activation=linear
102 | hasBias = false
103 |
104 | [batchnorm]
105 | activation = leaky
106 |
107 | [convolutional]
108 | filters=64
109 | size=3
110 | stride=1
111 | pad=1
112 | activation=linear
113 | hasBias = false
114 |
115 | [batchnorm]
116 | activation = leaky
117 |
118 | [convolutional]
119 | filters=64
120 | size=3
121 | stride=1
122 | pad=1
123 | activation=linear
124 | hasBias = false
125 |
126 | [batchnorm]
127 | activation = leaky
128 |
129 | [convolutional]
130 | filters=128
131 | size=3
132 | stride=2
133 | pad=1
134 | activation=linear
135 | hasBias = false
136 |
137 | [batchnorm]
138 | activation = leaky
139 |
140 | [convolutional]
141 | filters=64
142 | size=3
143 | stride=1
144 | pad=1
145 | activation=linear
146 | hasBias = false
147 |
148 | [batchnorm]
149 | activation = leaky
150 |
151 | [convolutional]
152 | filters=128
153 | size=3
154 | stride=1
155 | pad=1
156 | activation=linear
157 | hasBias = false
158 |
159 | [batchnorm]
160 | activation = leaky
161 |
162 | [convolutional]
163 | filters=64
164 | size=3
165 | stride=1
166 | pad=1
167 | activation=linear
168 | hasBias = false
169 |
170 | [batchnorm]
171 | activation = leaky
172 |
173 | [convolutional]
174 | filters=128
175 | size=3
176 | stride=1
177 | pad=1
178 | activation=linear
179 | hasBias = false
180 |
181 | [batchnorm]
182 | activation = leaky
183 |
184 | [route]
185 | from = 21
186 |
187 | [convolutional]
188 | size=1
189 | stride=1
190 | pad=0
191 | filters=10
192 | activation=linear
193 |
194 | [route]
195 | from = 31
196 |
197 | [convolutional]
198 | size=1
199 | stride=1
200 | pad=0
201 | filters=10
202 | activation=linear
203 |
204 | [concat]
205 | from = 33
206 | oned = 1
207 |
--------------------------------------------------------------------------------
/config/robo.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | mean = 0.4637419,0.47166784,0.48316576
3 | std = 0.45211827,0.16890674,0.18645908
4 | width = 512
5 | height = 384
6 | channels = 3
7 |
8 | [convolutional]
9 | filters=4
10 | size=3
11 | stride=2
12 | pad=1
13 | activation=linear
14 | hasBias = false
15 |
16 | [batchnorm]
17 | activation = leaky
18 |
19 | [convolutional]
20 | filters=8
21 | size=3
22 | stride=2
23 | pad=1
24 | activation=linear
25 | hasBias = false
26 |
27 | [batchnorm]
28 | activation = leaky
29 |
30 | [convolutional]
31 | filters=16
32 | size=3
33 | stride=2
34 | pad=1
35 | activation=linear
36 | hasBias = false
37 |
38 | [batchnorm]
39 | activation = leaky
40 |
41 | [convolutional]
42 | filters=16
43 | size=3
44 | stride=1
45 | pad=1
46 | activation=linear
47 | hasBias = false
48 |
49 | [batchnorm]
50 | activation = leaky
51 |
52 | [convolutional]
53 | filters=32
54 | size=3
55 | stride=2
56 | pad=1
57 | activation=linear
58 | hasBias = false
59 |
60 | [batchnorm]
61 | activation = leaky
62 |
63 | [convolutional]
64 | filters=32
65 | size=3
66 | stride=1
67 | pad=1
68 | activation=linear
69 | hasBias = false
70 |
71 | [batchnorm]
72 | activation = leaky
73 |
74 | [convolutional]
75 | filters=64
76 | size=3
77 | stride=2
78 | pad=1
79 | activation=linear
80 | hasBias = false
81 |
82 | [batchnorm]
83 | activation = leaky
84 |
85 | [convolutional]
86 | filters=64
87 | size=3
88 | stride=1
89 | pad=1
90 | activation=linear
91 | hasBias = false
92 |
93 | [batchnorm]
94 | activation = leaky
95 |
96 | [convolutional]
97 | filters=64
98 | size=3
99 | stride=1
100 | pad=1
101 | activation=linear
102 | hasBias = false
103 |
104 | [batchnorm]
105 | activation = leaky
106 |
107 | [convolutional]
108 | filters=64
109 | size=3
110 | stride=1
111 | pad=1
112 | activation=linear
113 | hasBias = false
114 |
115 | [batchnorm]
116 | activation = leaky
117 |
118 | [convolutional]
119 | filters=64
120 | size=3
121 | stride=1
122 | pad=1
123 | activation=linear
124 | hasBias = false
125 |
126 | [batchnorm]
127 | activation = leaky
128 |
129 | [convolutional]
130 | filters=128
131 | size=3
132 | stride=2
133 | pad=1
134 | activation=linear
135 | hasBias = false
136 |
137 | [batchnorm]
138 | activation = leaky
139 |
140 | [convolutional]
141 | filters=64
142 | size=3
143 | stride=1
144 | pad=1
145 | activation=linear
146 | hasBias = false
147 |
148 | [batchnorm]
149 | activation = leaky
150 |
151 | [convolutional]
152 | filters=128
153 | size=3
154 | stride=1
155 | pad=1
156 | activation=linear
157 | hasBias = false
158 |
159 | [batchnorm]
160 | activation = leaky
161 |
162 | [convolutional]
163 | filters=64
164 | size=3
165 | stride=1
166 | pad=1
167 | activation=linear
168 | hasBias = false
169 |
170 | [batchnorm]
171 | activation = leaky
172 |
173 | [convolutional]
174 | filters=128
175 | size=3
176 | stride=1
177 | pad=1
178 | activation=linear
179 | hasBias = false
180 |
181 | [batchnorm]
182 | activation = leaky
183 |
184 | [route]
185 | from = 21
186 |
187 | [convolutional]
188 | size=1
189 | stride=1
190 | pad=0
191 | filters=10
192 | activation=linear
193 |
194 | [route]
195 | from = 31
196 |
197 | [convolutional]
198 | size=1
199 | stride=1
200 | pad=0
201 | filters=10
202 | activation=linear
203 |
204 | [concat]
205 | from = 33
206 | oned = 1
207 |
--------------------------------------------------------------------------------
/config/robo-hr.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | mean = 0.4637419,0.47166784,0.48316576
3 | std = 0.45211827,0.16890674,0.18645908
4 | width = 256
5 | height = 192
6 | channels = 3
7 |
8 | [convolutional]
9 | filters=8
10 | size=3
11 | stride=2
12 | pad=1
13 | activation=linear
14 | hasBias = false
15 |
16 | [batchnorm]
17 | activation = leaky
18 |
19 | [convolutional]
20 | filters=16
21 | size=3
22 | stride=2
23 | pad=1
24 | activation=linear
25 | hasBias = false
26 |
27 | [batchnorm]
28 | activation = leaky
29 |
30 | [convolutional]
31 | filters=16
32 | size=3
33 | stride=1
34 | pad=1
35 | activation=linear
36 | hasBias = false
37 |
38 | [batchnorm]
39 | activation = leaky
40 |
41 | [convolutional]
42 | filters=32
43 | size=3
44 | stride=2
45 | pad=1
46 | activation=linear
47 | hasBias = false
48 |
49 | [batchnorm]
50 | activation = leaky
51 |
52 | [convolutional]
53 | filters=32
54 | size=3
55 | stride=1
56 | pad=1
57 | activation=linear
58 | hasBias = false
59 |
60 | [batchnorm]
61 | activation = leaky
62 |
63 | [convolutional]
64 | filters=64
65 | size=3
66 | stride=2
67 | pad=1
68 | activation=linear
69 | hasBias = false
70 |
71 | [batchnorm]
72 | activation = leaky
73 |
74 | [convolutional]
75 | filters=64
76 | size=3
77 | stride=1
78 | pad=1
79 | activation=linear
80 | hasBias = false
81 |
82 | [batchnorm]
83 | activation = leaky
84 |
85 | [convolutional]
86 | filters=64
87 | size=3
88 | stride=1
89 | pad=1
90 | activation=linear
91 | hasBias = false
92 |
93 | [batchnorm]
94 | activation = leaky
95 |
96 | [convolutional]
97 | filters=64
98 | size=3
99 | stride=1
100 | pad=1
101 | activation=linear
102 | hasBias = false
103 |
104 | [batchnorm]
105 | activation = leaky
106 |
107 | [convolutional]
108 | filters=64
109 | size=3
110 | stride=1
111 | pad=1
112 | activation=linear
113 | hasBias = false
114 |
115 | [batchnorm]
116 | activation = leaky
117 |
118 | [convolutional]
119 | filters=128
120 | size=3
121 | stride=2
122 | pad=1
123 | activation=linear
124 | hasBias = false
125 |
126 | [batchnorm]
127 | activation = leaky
128 |
129 | [convolutional]
130 | filters=64
131 | size=3
132 | stride=1
133 | pad=1
134 | activation=linear
135 | hasBias = false
136 |
137 | [batchnorm]
138 | activation = leaky
139 |
140 | [convolutional]
141 | filters=128
142 | size=3
143 | stride=1
144 | pad=1
145 | activation=linear
146 | hasBias = false
147 |
148 | [batchnorm]
149 | activation = leaky
150 |
151 | [convolutional]
152 | filters=64
153 | size=3
154 | stride=1
155 | pad=1
156 | activation=linear
157 | hasBias = false
158 |
159 | [batchnorm]
160 | activation = leaky
161 |
162 | [convolutional]
163 | filters=128
164 | size=3
165 | stride=1
166 | pad=1
167 | activation=linear
168 | hasBias = false
169 |
170 | [batchnorm]
171 | activation = leaky
172 |
173 | [route]
174 | from = 19
175 |
176 | [convolutional]
177 | size=1
178 | stride=1
179 | pad=0
180 | filters=10
181 | activation=linear
182 |
183 | [route]
184 | from = 29
185 |
186 | [convolutional]
187 | size=1
188 | stride=1
189 | pad=0
190 | filters=10
191 | activation=linear
192 |
193 | [concat]
194 | from = 31
195 | oned = 1
196 |
--------------------------------------------------------------------------------
/YOLOExtractor.py:
--------------------------------------------------------------------------------
1 | import os
2 | import glob
3 | import cv2
4 | import numpy as np
5 |
6 | imgPath = "E:/RoboCup/YOLO/Train/"
7 | labelPath = "E:/RoboCup/YOLO/Masks/Train/"
8 |
9 | labelDict = {}
10 | legendDict = {}
11 |
12 |
13 | def loadLabelConfig():
14 | """
15 | Reads LabelConfig.txt to dictionary
16 | """
17 | with open(labelPath + "LabelConfig.cfg") as file:
18 | data = file.readlines()
19 | data = [x.replace("\n", "") for x in data]
20 | data = [x.split(":") for x in data]
21 | for i in data:
22 | labelDict[i[0]] = i[1]
23 |
24 |
25 | def readLegendFile():
26 | """
27 | Loads the legend file generated by UETrainingSetGenerator into a
28 | dictionary structure
29 | """
30 | with open(labelPath + "segmentationLegend.leg", "r") as currFile:
31 | fileData = currFile.readline().split(" ")
32 | currLegendIndex = 0
33 | for i in fileData:
34 | i = i.split(":")
35 | if (len(i) < 2): # catching occunring whitespaces at file endings
36 | continue
37 |
38 | currLegendIndex += int(i[0])
39 | legendDict[str(currLegendIndex)] = i[1]
40 |
41 |
42 | def getTag(key):
43 | legendKeyArray = sorted(map(int, legendDict.keys()))
44 | for legendKey in legendKeyArray:
45 | if (key - 1 < legendKey):
46 | return (legendDict[str(legendKey)])
47 |
48 |
49 | def getLabel(key):
50 | currTag = getTag(key)
51 | return (int(labelDict[currTag]))
52 |
53 |
54 | def processMask(maskName, imageHeight = 480):
55 | """
56 | Processes given maskFile into 2d-array structure.
57 | """
58 | maskArray = []
59 | with open(labelPath + maskName, "r") as currFile:
60 | for i in range(imageHeight): # 480
61 | # read line from segMaskFile
62 | currLineData = currFile.readline()
63 | # gather segNames from File
64 | currLineData = currLineData.split(" ")
65 | maskArray.append(currLineData[:-1])
66 | return maskArray
67 |
68 | import re
69 |
70 | def sorted_nicely( l ):
71 | """ Sort the given iterable in the way that humans expect."""
72 | convert = lambda text: int(text) if text.isdigit() else text
73 | alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
74 | return sorted(l, key = alphanum_key)
75 |
76 | if __name__ == "__main__":
77 |
78 | readLegendFile()
79 | loadLabelConfig()
80 |
81 | labels = sorted_nicely(glob.glob1(labelPath,"*.txt"))
82 | images = sorted_nicely(glob.glob1(imgPath,"*.png"))
83 |
84 | for i,(imageN,labelN) in enumerate(zip(images,labels)):
85 | print(i)
86 | label = np.array(processMask(labelN),'uint8')
87 | file = open(imgPath + imageN.split(".")[0] + ".txt","w+")
88 | for i in range(1,62):
89 | a = np.where(label == i)
90 | if a[0].size == 0 or a[1].size == 0:
91 | continue
92 | bbox = getLabel(i)-1, (np.max(a[1])+np.min(a[1]))/1280.0, (np.max(a[0])+np.min(a[0]))/960.0, \
93 | (np.max(a[1])-np.min(a[1]))/640.0, (np.max(a[0])-np.min(a[0]))/480.0
94 | if bbox[0] < 0:
95 | continue
96 | if bbox[3] > 0.012 or bbox[4] > 0.015:
97 | for elem in bbox:
98 | file.write(str(elem))
99 | file.write(" ")
100 | file.write("\n")
101 | file.close()
102 |
103 |
104 |
105 |
--------------------------------------------------------------------------------
/config/robo-bn.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | mean = 0.4637419,0.47166784,0.48316576
3 | std = 0.45211827,0.16890674,0.18645908
4 | width = 512
5 | height = 384
6 | channels = 3
7 |
8 | [convolutional]
9 | filters=8
10 | size=3
11 | stride=2
12 | pad=1
13 | activation=linear
14 | hasBias = false
15 |
16 | [batchnorm]
17 | activation = leaky
18 |
19 | [convolutional]
20 | filters=16
21 | size=3
22 | stride=2
23 | pad=1
24 | activation=linear
25 | hasBias = false
26 |
27 | [batchnorm]
28 | activation = leaky
29 |
30 | [convolutional]
31 | filters=32
32 | size=3
33 | stride=2
34 | pad=1
35 | activation=linear
36 | hasBias = false
37 |
38 | [batchnorm]
39 | activation = leaky
40 |
41 | [convolutional]
42 | filters=16
43 | size=1
44 | stride=1
45 | pad=0
46 | activation=linear
47 | hasBias = false
48 |
49 | [batchnorm]
50 | activation = leaky
51 |
52 | [convolutional]
53 | filters=32
54 | size=3
55 | stride=1
56 | pad=1
57 | activation=linear
58 | hasBias = false
59 |
60 | [batchnorm]
61 | activation = leaky
62 |
63 | [convolutional]
64 | filters=64
65 | size=3
66 | stride=2
67 | pad=1
68 | activation=linear
69 | hasBias = false
70 |
71 | [batchnorm]
72 | activation = leaky
73 |
74 | [convolutional]
75 | filters=32
76 | size=1
77 | stride=1
78 | pad=0
79 | activation=linear
80 | hasBias = false
81 |
82 | [batchnorm]
83 | activation = leaky
84 |
85 | [convolutional]
86 | filters=64
87 | size=3
88 | stride=1
89 | pad=1
90 | activation=linear
91 | hasBias = false
92 |
93 | [batchnorm]
94 | activation = leaky
95 |
96 | [convolutional]
97 | filters=128
98 | size=3
99 | stride=2
100 | pad=1
101 | activation=linear
102 | hasBias = false
103 |
104 | [batchnorm]
105 | activation = leaky
106 |
107 | [convolutional]
108 | filters=64
109 | size=1
110 | stride=1
111 | pad=0
112 | activation=linear
113 | hasBias = false
114 |
115 | [batchnorm]
116 | activation = leaky
117 |
118 | [convolutional]
119 | filters=128
120 | size=3
121 | stride=1
122 | pad=1
123 | activation=linear
124 | hasBias = false
125 |
126 | [batchnorm]
127 | activation = leaky
128 |
129 | [convolutional]
130 | filters=64
131 | size=1
132 | stride=1
133 | pad=0
134 | activation=linear
135 | hasBias = false
136 |
137 | [batchnorm]
138 | activation = leaky
139 |
140 | [convolutional]
141 | filters=128
142 | size=3
143 | stride=1
144 | pad=1
145 | activation=linear
146 | hasBias = false
147 |
148 | [batchnorm]
149 | activation = leaky
150 |
151 | [convolutional]
152 | filters=256
153 | size=3
154 | stride=2
155 | pad=1
156 | activation=linear
157 | hasBias = false
158 |
159 | [batchnorm]
160 | activation = leaky
161 |
162 | [convolutional]
163 | filters=128
164 | size=1
165 | stride=1
166 | pad=0
167 | activation=linear
168 | hasBias = false
169 |
170 | [batchnorm]
171 | activation = leaky
172 |
173 | [convolutional]
174 | filters=256
175 | size=3
176 | stride=1
177 | pad=1
178 | activation=linear
179 | hasBias = false
180 |
181 | [batchnorm]
182 | activation = leaky
183 |
184 | [convolutional]
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=0
189 | activation=linear
190 | hasBias = false
191 |
192 | [batchnorm]
193 | activation = leaky
194 |
195 | [convolutional]
196 | filters=256
197 | size=3
198 | stride=1
199 | pad=1
200 | activation=linear
201 | hasBias = false
202 |
203 | [batchnorm]
204 | activation = leaky
205 |
206 | [route]
207 | from = 25
208 |
209 | [convolutional]
210 | size=1
211 | stride=1
212 | pad=0
213 | filters=10
214 | activation=linear
215 |
216 | [route]
217 | from = 35
218 |
219 | [convolutional]
220 | size=1
221 | stride=1
222 | pad=0
223 | filters=10
224 | activation=linear
225 |
226 | [concat]
227 | from = 37
228 | oned = 1
229 |
--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | from models import *
4 | from utils.utils import *
5 | from utils.datasets import *
6 |
7 | import os
8 | import sys
9 | import time
10 | import datetime
11 | import argparse
12 | import cv2
13 |
14 | import torch
15 | from torch.utils.data import DataLoader
16 | from torchvision import datasets
17 | from torch.autograd import Variable
18 | import progressbar
19 |
20 |
21 | if __name__ == '__main__':
22 | parser = argparse.ArgumentParser()
23 | parser.add_argument('--weights_path', type=str, default='checkpoints/DBestFinetunePruned.weights', help='path to weights file')
24 | parser.add_argument('--class_path', type=str, default='data/robo.names', help='path to class label file')
25 | parser.add_argument('--conf_thres', type=float, default=0.5, help='object confidence threshold')
26 | parser.add_argument('--nms_thres', type=float, default=0.4, help='iou thresshold for non-maximum suppression')
27 | parser.add_argument('--batch_size', type=int, default=1, help='size of the batches')
28 | parser.add_argument('--n_cpu', type=int, default=4, help='number of cpu threads to use during batch generation')
29 | parser.add_argument('--img_size', type=int, default=(384,512), help='size of each image dimension')
30 | parser.add_argument("--finetune", help="Finetuning", action="store_true", default=False)
31 | parser.add_argument("--bn", help="Use bottleneck", action="store_true", default=False)
32 | parser.add_argument("--yu", help="Use 2 channels", action="store_true", default=False)
33 | parser.add_argument("--hr", help="Use half res", action="store_true", default=False)
34 | opt = parser.parse_args()
35 | print(opt)
36 |
37 | cuda = torch.cuda.is_available()
38 |
39 | image_folder = "E:/RoboCup/YOLO/Finetune/test/" if opt.finetune else "E:/RoboCup/YOLO/Test/"
40 |
41 | weights_path = "checkpoints/bestFinetune" if opt.finetune else "checkpoints/best"
42 |
43 | if opt.yu:
44 | weights_path += "2C"
45 | if opt.bn:
46 | weights_path += "BN"
47 | if opt.hr:
48 | weights_path += "HR"
49 |
50 | weights_path += ".weights"
51 |
52 | os.makedirs('output', exist_ok=True)
53 |
54 | # Set up model
55 | channels = 2 if opt.yu else 3
56 | model = ROBO(inch=channels,bn=opt.bn,halfRes=opt.hr)
57 | model.load_state_dict(torch.load(weights_path,map_location={'cuda:0': 'cpu'}))
58 |
59 | print(count_zero_weights(model))
60 |
61 | if cuda:
62 | model.cuda()
63 |
64 | model.eval() # Set in evaluation mode
65 |
66 | dataloader = DataLoader(ImageFolder(image_folder, synth=opt.finetune, type='%s/*.png', yu=opt.yu, hr=opt.hr),
67 | batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_cpu)
68 |
69 | classes = load_classes(opt.class_path) # Extracts class labels from file
70 |
71 | Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
72 |
73 | imgs = [] # Stores image paths
74 | img_detections = [] # Stores detections for each image index
75 |
76 | print ('\nPerforming object detection:')
77 | bar = progressbar.ProgressBar(0, len(dataloader), redirect_stdout=False)
78 | elapsed_time = 0
79 | for batch_i, (img_paths, input_imgs) in enumerate(dataloader):
80 | # Configure input
81 | input_imgs = input_imgs.type(Tensor)
82 |
83 | # Get detections
84 | with torch.no_grad():
85 | start_time = time.time()
86 | detections = model(input_imgs)
87 | elapsed_time += time.time() - start_time
88 | detections = non_max_suppression(detections, 80, opt.conf_thres, opt.nms_thres)
89 |
90 | # Log progress
91 | bar.update(batch_i)
92 |
93 | # Save image and detections
94 | imgs.extend(img_paths)
95 | img_detections.extend(detections)
96 |
97 | bar.finish()
98 | print("\nAverage time: %.2f" % (elapsed_time*1000/len(dataloader)))
99 | print ('\nSaving images:')
100 | # Iterate through images and save plot of detections
101 | bar = progressbar.ProgressBar(0, len(imgs), redirect_stdout=False)
102 | for img_i, (path, detections) in enumerate(zip(imgs, img_detections)):
103 |
104 | # Create plot
105 | img = np.array(Image.open(path).convert('RGB'))
106 |
107 | # The amount of padding that was added
108 | pad_x = 0
109 | pad_y = 0
110 | # Image height and width after padding is removed
111 | unpad_h = opt.img_size[0] - pad_y
112 | unpad_w = opt.img_size[1] - pad_x
113 |
114 | img = cv2.cvtColor(img,cv2.COLOR_YUV2BGR)
115 |
116 | # Draw bounding boxes and labels of detections
117 | if detections is not None:
118 | unique_labels = detections[:, -1].cpu().unique()
119 | n_cls_preds = len(unique_labels)
120 | bbox_colors = [(0,0,255),(255,0,255),(255,0,0),(0,255,255)]
121 | for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
122 |
123 | # Rescale coordinates to original dimensions
124 | box_h = ((y2 - y1) / unpad_h) * img.shape[0]
125 | box_w = ((x2 - x1) / unpad_w) * img.shape[1]
126 | y1 = (y1 - pad_y // 2) * 1
127 | x1 = (x1 - pad_x // 2) * 1
128 | y2 = (y2 - pad_y // 2) * 1
129 | x2 = (x2 - pad_x // 2) * 1
130 |
131 | color = bbox_colors[int(cls_pred)]
132 | # Create a Rectangle patch
133 | cv2.rectangle(img,(x1,y1),(x2,y2),color,2)
134 |
135 | # Save generated image with detections
136 | cv2.imwrite('output/%d.png' % (img_i),img)
137 | bar.update(img_i)
138 | bar.finish()
139 |
--------------------------------------------------------------------------------
/compute_anchors.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Feb 20, 2017
3 | @author: jumabek
4 | '''
5 | from os import listdir
6 | from os.path import isfile, join
7 | import argparse
8 | # import cv2
9 | import numpy as np
10 | import sys
11 | import os
12 | from scipy.cluster.vq import kmeans, whiten
13 | import shutil
14 | import random
15 | import math
16 |
17 | width_in_cfg_file = 416.
18 | height_in_cfg_file = 416.
19 |
20 |
21 | def IOU(x, centroids):
22 | similarities = []
23 | k = len(centroids)
24 | for centroid in centroids:
25 | c_w, c_h = centroid
26 | w, h = x
27 | if c_w >= w and c_h >= h:
28 | similarity = w * h / (c_w * c_h)
29 | elif c_w >= w and c_h <= h:
30 | similarity = w * c_h / (w * h + (c_w - w) * c_h)
31 | elif c_w <= w and c_h >= h:
32 | similarity = c_w * h / (w * h + c_w * (c_h - h))
33 | else: # means both w,h are bigger than c_w and c_h respectively
34 | similarity = (c_w * c_h) / (w * h)
35 | similarities.append(similarity) # will become (k,) shape
36 | return np.array(similarities)
37 |
38 |
39 | def avg_IOU(X, centroids):
40 | n, d = X.shape
41 | sum = 0.
42 | for i in range(X.shape[0]):
43 | # note IOU() will return array which contains IoU for each centroid and X[i] // slightly ineffective, but I am too lazy
44 | sum += max(IOU(X[i], centroids))
45 | return sum / n
46 |
47 |
48 | def write_anchors_to_file(centroids, X, anchor_file):
49 | f = open(anchor_file, 'w')
50 |
51 | anchors = centroids.copy()
52 | print(anchors.shape)
53 |
54 | for i in range(anchors.shape[0]):
55 | anchors[i][0] = round(anchors[i][0]*512)
56 | anchors[i][1] = round(anchors[i][1]*384)
57 |
58 |
59 | print('Anchors = ', anchors)
60 |
61 | for i in range(anchors.shape[0]-1):
62 | f.write('%0.2f,%0.2f, ' % (anchors[i, 0], anchors[i, 1]))
63 |
64 | # there should not be comma after last anchor, that's why
65 | f.write('%0.2f,%0.2f\n' % (anchors[-1, 0], anchors[-1, 1]))
66 |
67 | if X is not None:
68 | f.write('%f\n' % (avg_IOU(X, centroids)))
69 | print()
70 |
71 |
72 | def kmeans2(X, centroids, eps, anchor_file):
73 | N = X.shape[0]
74 | iterations = 0
75 | k, dim = centroids.shape
76 | prev_assignments = np.ones(N) * (-1)
77 | iter = 0
78 | old_D = np.zeros((N, k))
79 |
80 | while True:
81 | D = []
82 | iter += 1
83 | for i in range(N):
84 | d = 1 - IOU(X[i], centroids)
85 | D.append(d)
86 | D = np.array(D) # D.shape = (N,k)
87 |
88 | print("iter {}: dists = {}".format(iter, np.sum(np.abs(old_D - D))))
89 |
90 | # assign samples to centroids
91 | assignments = np.argmin(D, axis=1)
92 |
93 | if (assignments == prev_assignments).all():
94 | print("Centroids = ", centroids)
95 | write_anchors_to_file(centroids, X, anchor_file)
96 | return
97 |
98 | # calculate new centroids
99 | centroid_sums = np.zeros((k, dim), np.float)
100 | for i in range(N):
101 | centroid_sums[assignments[i]] += X[i]
102 | for j in range(k):
103 | centroids[j] = centroid_sums[j] / (np.sum(assignments == j))
104 |
105 | prev_assignments = assignments.copy()
106 | old_D = D.copy()
107 |
108 |
109 | def main(argv):
110 | parser = argparse.ArgumentParser()
111 | parser.add_argument('-filelist', default='./data/RoboCup/FinetuneTrain.txt',
112 | help='path to filelist\n')
113 | parser.add_argument('-output_dir', default='./data/RoboCup/anchors', type=str,
114 | help='Output anchor directory\n')
115 | parser.add_argument('-num_clusters', default=3, type=int,
116 | help='number of clusters\n')
117 |
118 | args = parser.parse_args()
119 |
120 | nclass = 4
121 |
122 | if not os.path.exists(args.output_dir):
123 | os.mkdir(args.output_dir)
124 |
125 | f = open(args.filelist)
126 |
127 | lines = [line.rstrip('\n') for line in f.readlines()]
128 |
129 | annotation_dims = []
130 | for i in range(nclass):
131 | annotation_dims.append([])
132 |
133 | size = np.zeros((1, 1, 3))
134 | for line in lines:
135 |
136 | # line = line.replace('images','labels')
137 | # line = line.replace('img1','labels')
138 | line = line.replace('JPEGImages', 'labels')
139 |
140 | line = line.replace('.jpg', '.txt')
141 | line = line.replace('.png', '.txt')
142 | print(line)
143 | f2 = open(line)
144 | for line in f2.readlines():
145 | line = line.rstrip('\n')
146 | c, _, _, w, h = line.split(' ')
147 | # print(w,h)
148 | annotation_dims[int(c)].append(tuple(map(float, (w, h))))
149 |
150 | anchors = np.zeros([nclass,2])
151 | for i in range(nclass):
152 | dims = np.array(annotation_dims[i])
153 | anchors[i] = np.mean(dims,0)
154 | anchor_file = join(args.output_dir, 'anchorsFinetune%d.txt' % (args.num_clusters))
155 | write_anchors_to_file(anchors,None,anchor_file)
156 |
157 |
158 | '''annotation_dims = np.array(annotation_dims)
159 |
160 | eps = 0.005
161 |
162 | if args.num_clusters == 0:
163 | for num_clusters in range(1, 11): # we make 1 through 10 clusters
164 | anchor_file = join(args.output_dir, 'anchors%d.txt' % (num_clusters))
165 |
166 | indices = [random.randrange(annotation_dims.shape[0]) for i in range(num_clusters)]
167 | centroids = annotation_dims[indices]
168 | kmeans(annotation_dims, centroids, eps, anchor_file)
169 | print('centroids.shape', centroids.shape)
170 | else:
171 | anchor_file = join(args.output_dir, 'anchors%d.txt' % (args.num_clusters))
172 | indices = [random.randrange(annotation_dims.shape[0]) for i in range(args.num_clusters)]
173 | centroids = annotation_dims[indices]
174 | kmeans(annotation_dims, centroids, eps, anchor_file)
175 | print('centroids.shape', centroids.shape)'''
176 |
177 |
178 | if __name__ == "__main__":
179 | main(sys.argv)
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | from models import *
4 | from utils.utils import *
5 | from utils.datasets import *
6 | from utils.parse_config import *
7 |
8 | import os
9 | import argparse
10 |
11 | import torch
12 | from torch.utils.data import DataLoader
13 | import torch.optim as optim
14 |
15 | import progressbar
16 |
17 | def l1reg(model):
18 | regularization_loss = 0
19 | for param in model.parameters():
20 | regularization_loss += torch.sum(torch.abs(param))
21 | return regularization_loss
22 |
23 | def add_dimension_glasso(var, dim=0):
24 | return var.pow(2).sum(dim=dim).add(1e-8).pow(1/2.).sum()
25 |
26 | def gl1reg(model):
27 | reg = 0
28 | for param in model.parameters():
29 | dim = param.size()
30 | if dim.__len__() > 2:
31 | reg += add_dimension_glasso(param, (1,2,3))
32 | #reg += add_dimension_glasso(param, (0,2,3))
33 | return reg
34 |
35 | def train(epoch,epochs,bestLoss,indices = None):
36 | #############
37 | ####TRAIN####
38 | #############
39 |
40 | lossx = 0
41 | lossy = 0
42 | lossw = 0
43 | lossh = 0
44 | lossconf = 0
45 | lossreg = 0
46 | losstotal = 0
47 | recall = 0
48 | prec = 0
49 |
50 | recs = [0,0]
51 | precs = [0,0]
52 |
53 | model.train()
54 |
55 | bar = progressbar.ProgressBar(0, len(trainloader), redirect_stdout=False)
56 |
57 | for batch_i, (_, imgs, targets) in enumerate(trainloader):
58 | imgs = imgs.type(Tensor)
59 | targets = [x.type(Tensor) for x in targets]
60 |
61 | optimizer.zero_grad()
62 |
63 | loss = model(imgs, targets)
64 | reg = Tensor([0.0])
65 | if indices is None:
66 | reg = decay * regularize(model)
67 | loss += reg
68 |
69 | loss.backward()
70 |
71 | if indices is not None:
72 | pIdx = 0
73 | for param in model.parameters():
74 | if param.dim() > 1:
75 | if param.grad is not None:
76 | param.grad[indices[pIdx]] = 0
77 | pIdx += 1
78 |
79 | optimizer.step()
80 | bar.update(batch_i)
81 |
82 | lossx += model.losses["x"]
83 | lossy += model.losses["y"]
84 | lossw += model.losses["w"]
85 | lossh += model.losses["h"]
86 | lossconf += model.losses["conf"]
87 | lossreg += reg.item()
88 | losstotal += loss.item()
89 | recall += model.losses["recall"]
90 | prec += model.losses["precision"]
91 | recs[0] += model.recprec[0]
92 | recs[1] += model.recprec[2]
93 | precs[0] += model.recprec[1]
94 | precs[1] += model.recprec[3]
95 |
96 | bar.finish()
97 | prune = count_zero_weights(model,glasso)
98 | print(
99 | "[Epoch Train %d/%d lr: %.4f][Losses: x %f, y %f, w %f, h %f, conf %f, reg %f, pruned %f, total %f, recall: %.5f (%.5f / %.5f), precision: %.5f (%.5f / %.5f)]"
100 | % (
101 | epoch + 1,
102 | epochs,
103 | scheduler.get_lr()[-1]/learning_rate,
104 | lossx / float(len(trainloader)),
105 | lossy / float(len(trainloader)),
106 | lossw / float(len(trainloader)),
107 | lossh / float(len(trainloader)),
108 | lossconf / float(len(trainloader)),
109 | lossreg / float(len(trainloader)),
110 | prune,
111 | losstotal / float(len(trainloader)),
112 | recall / float(len(trainloader)),
113 | recs[0] / float(len(trainloader)),
114 | recs[1] / float(len(trainloader)),
115 | prec / float(len(trainloader)),
116 | precs[0] / float(len(trainloader)),
117 | precs[1] / float(len(trainloader)),
118 | )
119 | )
120 |
121 | if indices is None:
122 | scheduler.step()
123 |
124 | name = "bestFinetune" if finetune else "best"
125 | name += "2C" if opt.yu else ""
126 | name += "BN" if opt.bn else ""
127 | name += "HR" if opt.hr else ""
128 | if transfer != 0:
129 | name += "T%d" % transfer
130 | if indices is not None:
131 | pruneP = round(prune * 100)
132 | comp = round(sum(model.get_computations(True))/1000000)
133 | name = name + ("%d_%d" %(pruneP,comp))
134 |
135 | '''if bestLoss < (recall + prec):
136 | print("Saving best model")
137 | bestLoss = (recall + prec)
138 | torch.save(model.state_dict(), "checkpoints/%s.weights" % name)'''
139 |
140 | return bestLoss
141 |
142 |
143 | def valid(epoch,epochs,bestLoss,pruned):
144 | #############
145 | ####VALID####
146 | #############
147 |
148 | model.eval()
149 |
150 | mAP, APs = computeAP(model,valloader,0.5,0.45,4,(384,512),False,32)
151 | prune = count_zero_weights(model,glasso)
152 |
153 | name = "bestFinetune" if finetune else "best"
154 | name += "2C" if opt.yu else ""
155 | name += "BN" if opt.bn else ""
156 | name += "HR" if opt.hr else ""
157 | if transfer != 0:
158 | name += "T%d" % transfer
159 | if pruned:
160 | pruneP = round(prune * 100)
161 | comp = round(sum(model.get_computations(True))/1000000)
162 | name = name + ("%d_%d" %(pruneP,comp))
163 |
164 | print("[Epoch Val %d/%d mAP: %.4f][Ball: %.4f Crossing: %.4f Goalpost: %.4f Robot: %.4f]" % (epoch + 1,epochs,mAP,APs[0],APs[1],APs[2],APs[3]))
165 |
166 | if bestLoss < (mAP):
167 | print("Saving best model")
168 | bestLoss = (mAP)
169 | torch.save(model.state_dict(), "checkpoints/%s.weights" % name)
170 |
171 | return bestLoss
172 |
173 | if __name__ == '__main__':
174 |
175 | parser = argparse.ArgumentParser()
176 | parser.add_argument("--finetune", help="Finetuning", action="store_true", default=False)
177 | parser.add_argument("--lr", help="Learning rate", type=float, default=1e-3)
178 | parser.add_argument("--decay", help="Weight decay", type=float, default=1e-4)
179 | parser.add_argument("--transfer", help="Layers to truly train", action="store_true")
180 | parser.add_argument("--bn", help="Use bottleneck", action="store_true")
181 | parser.add_argument("--yu", help="Use 2 channels", action="store_true", default=False)
182 | parser.add_argument("--hr", help="Use half res", action="store_true", default=False)
183 | parser.add_argument("--singleDec", help="Just use a single decay value", action="store_true", default=False)
184 | parser.add_argument("--glasso", help="Use group lasso regularization", action="store_true", default=False)
185 | opt = parser.parse_args()
186 |
187 | finetune = opt.finetune
188 | learning_rate = opt.lr/2 if opt.transfer else opt.lr
189 | dec = opt.decay if finetune else opt.decay/10
190 | transfers = ([3, 5, 8, 11] if opt.bn else [3, 5, 7, 9]) if opt.transfer else [0]
191 | decays = [dec*25, dec*10, dec*5, dec*2.5, dec] if (finetune and not opt.transfer) else [dec]
192 | if opt.singleDec:
193 | decays = [decays[0]]
194 | halfRes = opt.hr
195 | glasso = opt.glasso
196 | regularize = gl1reg if glasso else l1reg
197 | if glasso:
198 | decays = [d*100 for d in decays]
199 |
200 | classPath = "data/robo.names"
201 | data_config_path = "config/roboFinetune.data" if finetune else "config/robo.data"
202 | img_size = (192,256) if halfRes else (384,512)
203 | weights_path = "checkpoints/best%s%s%s.weights" % ("2C" if opt.yu else "","BN" if opt.bn else "", "HR" if opt.hr else "")
204 | n_cpu = 8
205 | batch_size = 64
206 | channels = 2 if opt.yu else 3
207 | epochs = 125 if opt.transfer == 0 else 150
208 |
209 | os.makedirs("output", exist_ok=True)
210 | os.makedirs("checkpoints", exist_ok=True)
211 |
212 | classes = load_classes(classPath)
213 |
214 | # Get data configuration
215 | data_config = parse_data_config(data_config_path)
216 | train_path = data_config["train"]
217 | val_path = data_config["valid"]
218 |
219 | cuda = torch.cuda.is_available()
220 | Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
221 |
222 | # Get dataloader
223 | trainloader = torch.utils.data.DataLoader(
224 | ListDataset(train_path,img_size=img_size, train=True, synth=finetune, yu=opt.yu), batch_size=batch_size, shuffle=True, num_workers=n_cpu
225 | )
226 | valloader = torch.utils.data.DataLoader(
227 | ListDataset(val_path,img_size=img_size, train=False, synth=finetune, yu=opt.yu), batch_size=batch_size, shuffle=False, num_workers=n_cpu
228 | )
229 |
230 | for transfer in transfers:
231 | if len(transfers) > 1:
232 | print("######################################################")
233 | print("############# Finetune with transfer: %d #############" % transfer)
234 | print("######################################################")
235 | for decay in decays:
236 |
237 | if len(decays) > 1:
238 | print("######################################################")
239 | print("############ Finetune with decay: %.1E ############" % decay)
240 | print("######################################################")
241 |
242 | torch.random.manual_seed(12345678)
243 | if cuda:
244 | torch.cuda.manual_seed(12345678)
245 |
246 | # Initiate model
247 | model = ROBO(inch=channels,bn=opt.bn,halfRes = halfRes)
248 | comp = model.get_computations()
249 | print(comp)
250 | print(sum(comp))
251 |
252 | if finetune:
253 | model.load_state_dict(torch.load(weights_path))
254 |
255 | if cuda:
256 | model = model.cuda()
257 |
258 | bestLoss = 0
259 |
260 | optimizer = torch.optim.Adam([
261 | {'params': model.downPart[0:transfer].parameters(), 'lr': learning_rate*10},
262 | {'params': model.downPart[transfer:].parameters()},
263 | {'params': model.classifiers.parameters()}
264 | ],lr=learning_rate)
265 | eta_min = learning_rate/25 if opt.transfer else learning_rate/10
266 | scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,epochs,eta_min=eta_min)
267 |
268 | for epoch in range(epochs):
269 | #if finetune:
270 | train(epoch,epochs,100)
271 | bestLoss = valid(epoch,epochs,bestLoss,False)
272 | #else:
273 | #bestLoss = train(epoch,epochs,bestLoss)
274 |
275 | if finetune and (transfer == 0):
276 | model.load_state_dict(torch.load("checkpoints/bestFinetune%s%s%s.weights" % ("2C" if opt.yu else "","BN" if opt.bn else "","HR" if opt.hr else "")))
277 | with torch.no_grad():
278 | indices = pruneModel(model.parameters(),glasso)
279 |
280 | optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate/40)
281 | print("Finetuning")
282 |
283 | bestLoss = 0
284 |
285 | for epoch in range(25):
286 | train(epoch, 25, 100, indices=indices)
287 | bestLoss = valid(epoch,25,bestLoss,True)
288 |
--------------------------------------------------------------------------------
/YOLOLabeller.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import os
4 | import os.path as osp
5 | from glob import glob1
6 | import copy
7 | import pickle
8 | import re
9 |
10 | def sorted_nicely( l ):
11 | """ Sort the given iterable in the way that humans expect."""
12 | convert = lambda text: int(text) if text.isdigit() else text
13 | alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
14 | return sorted(l, key = alphanum_key)
15 |
16 | global sbox, ebox, img, colors, drawing
17 | colors = [(0,0,255),(255,0,255),(255,0,0),(0,255,255)]
18 |
19 | def on_mouse(event, x, y, flags, params):
20 | global sbox, ebox, img, drawing
21 | if event == cv2.EVENT_LBUTTONDOWN:
22 | sbox = (x, y)
23 | drawing = True
24 |
25 | elif event == cv2.EVENT_MOUSEMOVE:
26 | if drawing:
27 | ebox = (x, y)
28 | img2 = img.copy()
29 | cv2.rectangle(img2,sbox,ebox,colors[classIdx],1)
30 | cv2.imshow("video", img2)
31 |
32 | elif event == cv2.EVENT_LBUTTONUP:
33 | ebox = (x, y)
34 | img2 = img.copy()
35 | cv2.rectangle(img2,sbox,ebox,colors[classIdx],1)
36 | cv2.imshow("video", img2)
37 | drawing = False
38 |
39 |
40 |
41 | if __name__ == '__main__':
42 |
43 | global img, drawing
44 |
45 | drawing = False
46 |
47 | path = "/Users/martonszemenyei/Projects/ROBO/data/YOLO/sydney/"
48 |
49 | names = sorted_nicely(glob1(path, "*.png"))
50 |
51 | cv2.namedWindow("video")
52 | cv2.setMouseCallback("video",on_mouse)
53 |
54 | BBLists = []
55 | classIdx = 0
56 |
57 | for frameCntr,name in enumerate(names):
58 |
59 | img = cv2.imread(path+name)
60 | if img.shape[0] != 384:
61 | img = cv2.resize(img,(512,384))
62 | cv2.imwrite(path+name,img)
63 | img = cv2.cvtColor(img,cv2.COLOR_RGB2BGR)
64 | img = cv2.cvtColor(img,cv2.COLOR_YUV2BGR)
65 |
66 | orig = img.copy()
67 | print(frameCntr)
68 |
69 | if len(BBLists) <= frameCntr:
70 | BBLists.append([])#copy.deepcopy(BBLists[-1]) if len(BBLists) else [])
71 | if osp.exists(path + name.split(".")[0] + ".txt"):
72 | file = open(path + name.split(".")[0] + ".txt", "r")
73 | BBLists[frameCntr] = []
74 | while True:
75 | line = file.readline().split(" ")
76 | if len(line) < 5:
77 | break
78 | BB = []
79 | xc = int(float(line[1])*img.shape[1])
80 | yc = int(float(line[2])*img.shape[0])
81 | w = int(float(line[3])*img.shape[1])
82 | h = int(float(line[4])*img.shape[0])
83 | BB.append((xc-w//2,yc-h//2))
84 | BB.append((xc+w//2,yc+h//2))
85 | BB.append(int(line[0]))
86 | BBLists[frameCntr].append(BB)
87 | for BB in BBLists[frameCntr]:
88 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1)
89 |
90 | BBSel = -1
91 |
92 | BBNum = len(BBLists[frameCntr])
93 |
94 | drawing = False
95 | cv2.imshow("video", img)
96 |
97 | while True:
98 |
99 | key = cv2.waitKey(20)
100 |
101 | if key == 27:
102 | exit(0)
103 | elif key == 13:
104 | cv2.rectangle(img, sbox, ebox, colors[classIdx], 1)
105 | cv2.imshow("video", img)
106 | BBLists[frameCntr].append([sbox, ebox, classIdx])
107 | BBNum = len(BBLists[frameCntr])
108 | # k = next image
109 | elif key == 107:
110 | classIdx = 0
111 | BBSel = -1
112 | break
113 | # x = del all BBs
114 | elif key == 120:
115 | BBLists[frameCntr] = []
116 | img = orig.copy()
117 | cv2.imshow("video", img)
118 | elif key == 48:
119 | classIdx = 0
120 | elif key == 49:
121 | classIdx = 1
122 | elif key == 50:
123 | classIdx = 2
124 | elif key == 51:
125 | classIdx = 3
126 | # n = next BB
127 | elif key == 110:
128 | if BBNum > 0:
129 | BBSel += 1
130 | if BBSel == BBNum:
131 | BBSel = 0
132 | sbox = BBLists[frameCntr][BBSel][0]
133 | ebox = BBLists[frameCntr][BBSel][1]
134 | img2 = img.copy()
135 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1)
136 | cv2.imshow("video", img2)
137 | # w = widen vertically
138 | elif key == 119:
139 | if BBSel >= 0:
140 | sbox = BBLists[frameCntr][BBSel][0]
141 | ebox = BBLists[frameCntr][BBSel][1]
142 | ebox = (ebox[0], ebox[1]+1)
143 | BBLists[frameCntr][BBSel][1] = ebox
144 | img = orig.copy()
145 | for BB in BBLists[frameCntr]:
146 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1)
147 | img2 = img.copy()
148 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1)
149 | cv2.imshow("video", img2)
150 | # s = compress vertically
151 | elif key == 115:
152 | if BBSel >= 0:
153 | sbox = BBLists[frameCntr][BBSel][0]
154 | ebox = BBLists[frameCntr][BBSel][1]
155 | ebox = (ebox[0], ebox[1]-1)
156 | BBLists[frameCntr][BBSel][1] = ebox
157 | img = orig.copy()
158 | for BB in BBLists[frameCntr]:
159 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1)
160 | img2 = img.copy()
161 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1)
162 | cv2.imshow("video", img2)
163 | # a = widen horizontally
164 | elif key == 97:
165 | if BBSel >= 0:
166 | sbox = BBLists[frameCntr][BBSel][0]
167 | ebox = BBLists[frameCntr][BBSel][1]
168 | ebox = (ebox[0]+1, ebox[1])
169 | BBLists[frameCntr][BBSel][1] = ebox
170 | img = orig.copy()
171 | for BB in BBLists[frameCntr]:
172 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1)
173 | img2 = img.copy()
174 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1)
175 | cv2.imshow("video", img2)
176 | # d = compress horizontally
177 | elif key == 100:
178 | if BBSel >= 0:
179 | sbox = BBLists[frameCntr][BBSel][0]
180 | ebox = BBLists[frameCntr][BBSel][1]
181 | ebox = (ebox[0]-1, ebox[1])
182 | BBLists[frameCntr][BBSel][1] = ebox
183 | img = orig.copy()
184 | for BB in BBLists[frameCntr]:
185 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1)
186 | img2 = img.copy()
187 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1)
188 | cv2.imshow("video", img2)
189 | # t = move up
190 | elif key == 116:
191 | if BBSel >= 0:
192 | sbox = BBLists[frameCntr][BBSel][0]
193 | ebox = BBLists[frameCntr][BBSel][1]
194 | sbox = (sbox[0], sbox[1]+1)
195 | BBLists[frameCntr][BBSel][0] = sbox
196 | img = orig.copy()
197 | for BB in BBLists[frameCntr]:
198 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1)
199 | img2 = img.copy()
200 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1)
201 | cv2.imshow("video", img2)
202 | # g = move down
203 | elif key == 103:
204 | if BBSel >= 0:
205 | sbox = BBLists[frameCntr][BBSel][0]
206 | ebox = BBLists[frameCntr][BBSel][1]
207 | sbox = (sbox[0], sbox[1]-1)
208 | BBLists[frameCntr][BBSel][0] = sbox
209 | img = orig.copy()
210 | for BB in BBLists[frameCntr]:
211 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1)
212 | img2 = img.copy()
213 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1)
214 | cv2.imshow("video", img2)
215 | # f = move left
216 | elif key == 102:
217 | if BBSel >= 0:
218 | sbox = BBLists[frameCntr][BBSel][0]
219 | ebox = BBLists[frameCntr][BBSel][1]
220 | sbox = (sbox[0]+1, sbox[1])
221 | BBLists[frameCntr][BBSel][0] = sbox
222 | img = orig.copy()
223 | for BB in BBLists[frameCntr]:
224 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1)
225 | img2 = img.copy()
226 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1)
227 | cv2.imshow("video", img2)
228 | # h = move right
229 | elif key == 104:
230 | if BBSel >= 0:
231 | sbox = BBLists[frameCntr][BBSel][0]
232 | ebox = BBLists[frameCntr][BBSel][1]
233 | sbox = (sbox[0]-1, sbox[1])
234 | BBLists[frameCntr][BBSel][0] = sbox
235 | img = orig.copy()
236 | for BB in BBLists[frameCntr]:
237 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1)
238 | img2 = img.copy()
239 | cv2.rectangle(img2, sbox, ebox, (255,255,255), 1)
240 | cv2.imshow("video", img2)
241 | # r = remove BB
242 | elif key == 114:
243 | if BBSel >= 0:
244 | BBLists[frameCntr].pop(BBSel)
245 | BBSel = -1
246 | img = orig.copy()
247 | for BB in BBLists[frameCntr]:
248 | cv2.rectangle(img, BB[0], BB[1], colors[BB[2]], 1)
249 | cv2.imshow("video", img)
250 | BBNum = len(BBLists[frameCntr])
251 |
252 | file = open(path + name.split(".")[0] + ".txt","w")
253 | for BB in BBLists[frameCntr]:
254 | center = ((BB[0][0] + BB[1][0])/(2*img.shape[1]), (BB[0][1] + BB[1][1])/(2*img.shape[0]))
255 | size = (abs(BB[1][0] - BB[0][0]) / img.shape[1],abs(BB[1][1] - BB[0][1]) / img.shape[0])
256 | label = BB[2]
257 | file.write(str(label))
258 | file.write(" ")
259 | file.write(str(center[0]))
260 | file.write(" ")
261 | file.write(str(center[1]))
262 | file.write(" ")
263 | file.write(str(size[0]))
264 | file.write(" ")
265 | file.write(str(size[1]))
266 | file.write("\n")
267 | file.close()
--------------------------------------------------------------------------------
/models.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | import torch
4 | import torch.nn as nn
5 |
6 | from utils.utils import build_targets
7 | from collections import defaultdict
8 |
9 | class Conv(nn.Module):
10 | def __init__(self,inch,ch,stride=1,size=3,doBN = True):
11 | super(Conv,self).__init__()
12 | self.conv = nn.Conv2d(inch,ch,kernel_size=size,stride=stride,padding=size//2, bias=not doBN)
13 | self.bn = nn.BatchNorm2d(ch)
14 | self.relu = nn.LeakyReLU(0.1)
15 |
16 | self.size = size
17 | self.inch = inch
18 | self.stride = stride
19 | self.ch = ch
20 | self.doBN = doBN
21 |
22 | def forward(self, x):
23 | x = self.conv(x)
24 | if self.doBN:
25 | x = self.bn(x)
26 | return self.relu(x)
27 |
28 | def getComp(self,W,H):
29 | W = W // self.stride
30 | H = H // self.stride
31 |
32 | return self.size*self.size*W*H*self.inch*self.ch*2 + (W*H*self.ch*4 if self.doBN else 0), W, H
33 |
34 | def getParams(self):
35 | return self.ch*(self.inch*self.size*self.size + 4 if self.doBN else 1)
36 |
37 | class YOLOLayer(nn.Module):
38 | """Detection layer"""
39 |
40 | def __init__(self, anchors, num_classes, img_dim):
41 | super(YOLOLayer, self).__init__()
42 | self.anchors = anchors
43 | self.num_anchors = len(anchors)
44 | self.num_classes = num_classes
45 | self.bbox_attrs = 5 #+ num_classes
46 | self.image_dim = img_dim
47 | self.ignore_thres = 0.5
48 | self.lambda_coord = 1
49 |
50 | self.mse_loss = nn.MSELoss(reduction='mean') # Coordinate loss
51 | self.bce_loss = nn.BCELoss(reduction='mean') # Confidence loss
52 | #self.ce_loss = nn.CrossEntropyLoss() # Class loss
53 |
54 | def forward(self, x, targets=None):
55 | nA = self.num_anchors
56 | nB = x.size(0)
57 | nGy = x.size(2)
58 | nGx = x.size(3)
59 | stride = self.image_dim / nGy
60 |
61 | # Tensors for cuda support
62 | FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
63 | LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
64 | ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
65 |
66 | prediction = x.view(nB, nA, self.bbox_attrs, nGy, nGx).permute(0, 1, 3, 4, 2).contiguous()
67 |
68 | # Get outputs
69 | x = torch.sigmoid(prediction[..., 0]) # Center x
70 | y = torch.sigmoid(prediction[..., 1]) # Center y
71 | w = prediction[..., 2] # Width
72 | h = prediction[..., 3] # Height
73 | pred_conf = torch.sigmoid(prediction[..., 4]) # Conf
74 | #pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.
75 |
76 | # Calculate offsets for each grid
77 | grid_x = torch.arange(nGx).repeat(nGy, 1).view([1, 1, nGy, nGx]).type(FloatTensor)
78 | grid_y = torch.arange(nGy).repeat(nGx, 1).t().view([1, 1, nGy, nGx]).type(FloatTensor)
79 | scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])
80 | anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
81 | anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))
82 |
83 | # Add offset and scale with anchors
84 | pred_boxes = FloatTensor(prediction[..., :4].shape)
85 | pred_boxes[..., 0] = x.detach() + grid_x
86 | pred_boxes[..., 1] = y.detach() + grid_y
87 | pred_boxes[..., 2] = torch.exp(w.detach()) * anchor_w
88 | pred_boxes[..., 3] = torch.exp(h.detach()) * anchor_h
89 |
90 | # Training
91 | if targets is not None:
92 |
93 | if x.is_cuda:
94 | self.mse_loss = self.mse_loss.cuda()
95 | self.bce_loss = self.bce_loss.cuda()
96 | #self.ce_loss = self.ce_loss.cuda()
97 |
98 | nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, corr = build_targets(
99 | pred_boxes=pred_boxes.cpu().detach(),
100 | pred_conf=pred_conf.cpu().detach(),
101 | #pred_cls=pred_cls.cpu().detach(),
102 | target=targets.cpu().detach(),
103 | anchors=scaled_anchors.cpu().detach(),
104 | num_anchors=nA,
105 | num_classes=self.num_classes,
106 | grid_size_y=nGy,
107 | grid_size_x=nGx,
108 | ignore_thres=self.ignore_thres,
109 | img_dim=self.image_dim,
110 | )
111 |
112 | nProposals = int((pred_conf > 0.5).sum().item())
113 | recall = float(nCorrect / nGT) if nGT else 1
114 | nCorrPrec = int((corr).sum().item())
115 | precision = float(nCorrPrec / nProposals) if nProposals > 0 else 0
116 |
117 | # Handle masks
118 | mask = mask.type(ByteTensor)
119 | conf_mask = conf_mask.type(ByteTensor)
120 |
121 | # Handle target variables
122 | tx = tx.type(FloatTensor)
123 | ty = ty.type(FloatTensor)
124 | tw = tw.type(FloatTensor)
125 | th = th.type(FloatTensor)
126 | tconf = tconf.type(FloatTensor)
127 | #tcls = tcls.type(LongTensor)
128 |
129 | # Get conf mask where gt and where there is no gt
130 | conf_mask_true = mask
131 | conf_mask_false = conf_mask - mask
132 |
133 | mask = mask.bool()
134 | conf_mask_false = conf_mask_false.bool()
135 | conf_mask_true = conf_mask_true.bool()
136 |
137 | # Mask outputs to ignore non-existing objects
138 | loss_x = self.mse_loss(x[mask], tx[mask])
139 | loss_y = self.mse_loss(y[mask], ty[mask])
140 | loss_w = self.mse_loss(w[mask], tw[mask])
141 | loss_h = self.mse_loss(h[mask], th[mask])
142 | loss_conf = 30*self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + 1*self.bce_loss(
143 | pred_conf[conf_mask_true], tconf[conf_mask_true]
144 | )
145 | #loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))
146 | loss = loss_x + loss_y + loss_w + loss_h + loss_conf #+ loss_cls
147 |
148 | return (
149 | loss,
150 | loss_x.item(),
151 | loss_y.item(),
152 | loss_w.item(),
153 | loss_h.item(),
154 | loss_conf.item(),
155 | 0,
156 | recall,
157 | precision,
158 | )
159 |
160 | else:
161 | # If not in training phase return predictions
162 | output = torch.cat(
163 | (
164 | pred_boxes.view(nB, -1, 4) * stride,
165 | pred_conf.view(nB, -1, 1),
166 | #pred_cls.view(nB, -1, self.num_classes),
167 | ),
168 | -1,
169 | )
170 | return output
171 |
172 | class ROBO(nn.Module):
173 | def __init__(self, inch=3, ch=4, img_shape=(384,512), bn = False, halfRes=False):
174 | super(ROBO,self).__init__()
175 |
176 | self.img_shape = (img_shape[0] // 2,img_shape[1] // 2) if halfRes else img_shape
177 |
178 | self.bn = bn
179 | self.halfRes = halfRes
180 |
181 | self.loss_names = ["x", "y", "w", "h", "conf", "cls", "recall", "precision"]
182 |
183 | self.branchLayers = [
184 | 10 if halfRes else 11,
185 | -1
186 | ]
187 |
188 | self.anchors = [
189 | (42,39),
190 | (29,16),
191 | (31,109),
192 | (79,106),
193 | ]
194 | if bn:
195 | ch *= 2
196 | self.downPart = nn.ModuleList([
197 | None if halfRes else Conv(inch,ch,2), # Stride: 2
198 | Conv(inch if halfRes else ch,ch*2,2), # Stride: 4
199 | Conv(ch*2,ch*4,2), # Stride: 8
200 | Conv(ch*4,ch*2,1,1),
201 | Conv(ch*2,ch*4,1),
202 | Conv(ch*4,ch*8,2), # Stride: 16
203 | Conv(ch*8,ch*4,1,1),
204 | Conv(ch*4,ch*8,1),
205 | Conv(ch*8,ch*16,2), # Stride: 32
206 | Conv(ch*16,ch*8,1,1),
207 | Conv(ch*8,ch*16,1),
208 | Conv(ch*16,ch*8,1,1),
209 | Conv(ch*8,ch*16,1), # First Classifier
210 | Conv(ch*16,ch*32,2), # Stride: 64
211 | Conv(ch*32,ch*16,1,1),
212 | Conv(ch*16,ch*32,1),
213 | Conv(ch*32,ch*16,1,1),
214 | Conv(ch*16,ch*32,1) # Second Classifier
215 | ])
216 | self.classifiers = nn.ModuleList([
217 | nn.Conv2d(ch*16,10,1),
218 | nn.Conv2d(ch*32,10,1)
219 | ])
220 | else:
221 | self.downPart = nn.ModuleList([
222 | None if halfRes else Conv(inch,ch,2), # Stride: 2
223 | Conv(inch if halfRes else ch,ch*2,2), # Stride: 4
224 | Conv(ch*2,ch*4,2), # Stride: 8
225 | Conv(ch*4,ch*4,1),
226 | Conv(ch*4,ch*8,2), # Stride: 16
227 | Conv(ch*8,ch*8,1),
228 | Conv(ch*8,ch*16,2), # Stride: 32
229 | Conv(ch*16,ch*16,1),
230 | Conv(ch*16,ch*16,1),
231 | Conv(ch*16,ch*16,1),
232 | Conv(ch*16,ch*16,1), # First Classifier
233 | Conv(ch*16,ch*32,2), # Stride: 64
234 | Conv(ch*32,ch*16,1),
235 | Conv(ch*16,ch*32,1),
236 | Conv(ch*32,ch*16,1),
237 | Conv(ch*16,ch*32,1) # Second Classifier
238 | ])
239 | self.classifiers = nn.ModuleList([
240 | nn.Conv2d(ch*16,10,1),
241 | nn.Conv2d(ch*32,10,1)
242 | ])
243 | self.yolo = nn.ModuleList([
244 | YOLOLayer(self.anchors[0:2], 2, img_shape[0]),
245 | YOLOLayer(self.anchors[2:4], 2, img_shape[0])
246 | ])
247 |
248 | def forward(self, x, targets = None):
249 |
250 | is_training = targets is not None
251 | output = []
252 | self.losses = defaultdict(float)
253 | outNum = 0
254 | self.recprec = [0, 0, 0, 0]
255 | layer_outputs = [x]
256 |
257 | for layer in self.downPart:
258 | if layer is not None:
259 | layer_outputs.append(layer(layer_outputs[-1]))
260 |
261 | for idx, cl, yolo in zip(self.branchLayers,self.classifiers,self.yolo):
262 | out = cl(layer_outputs[idx])
263 | if is_training:
264 | out, *losses = yolo(out, targets[outNum])
265 | self.recprec[outNum * 2] += (losses[-2])
266 | self.recprec[outNum * 2 + 1] += (losses[-1])
267 | for name, loss in zip(self.loss_names, losses):
268 | self.losses[name] += loss
269 | # Test phase: Get detections
270 | else:
271 | out = yolo(out)
272 | output.append(out)
273 | outNum += 1
274 |
275 |
276 | self.losses["recall"] /= outNum
277 | self.losses["precision"] /= outNum
278 | return sum(output) if is_training else torch.cat(output, 1)
279 |
280 |
281 | def get_computations(self,pruned = False):
282 | H, W = self.img_shape
283 | computations = []
284 |
285 | for module in self.downPart:
286 | if module is not None:
287 | ratio = float(module.conv.weight.nonzero().size(0)) / float(module.conv.weight.numel()) if pruned else 1
288 | if module is not None:
289 | comp, W, H = module.getComp(W,H)
290 | computations.append(comp * ratio)
291 |
292 | H, W = self.img_shape[0] // 32, self.img_shape[1] // 32
293 | computations.append(H*W*64*10*2 * (2 if self.bn else 1))
294 | computations.append(H*W*128*10//2 * (2 if self.bn else 1))
295 |
296 | return computations
297 |
298 | def getParams(self):
299 | params = sum([layer.getParams() for layer in self.downPart if layer is not None])
300 | params += 64*10*2 * (2 if self.bn else 1)
301 | params += 128*10//2 * (2 if self.bn else 1)
302 | return params
303 |
304 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 | 1543402315301
208 |
209 |
210 | 1543402315301
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
228 |
229 |
230 |
231 |
232 |
233 | np.min(img[:,:,0])
234 | Python
235 | EXPRESSION
236 |
237 |
238 | np.min(img)
239 | Python
240 | EXPRESSION
241 |
242 |
243 | np.max(img)
244 | Python
245 | EXPRESSION
246 |
247 |
248 | target[b,t,:]
249 | Python
250 | EXPRESSION
251 |
252 |
253 |
254 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | from models import *
4 | from utils.utils import *
5 | from utils.datasets import *
6 | from utils.parse_config import *
7 |
8 | import sys
9 |
10 | import argparse
11 | import progressbar
12 |
13 | import torch
14 | from torch.utils.data import DataLoader
15 |
16 | def getmAP(all_annotations,all_detections):
17 | mAPs = np.zeros((2, 5))
18 | APs = np.zeros((2, 4, 5))
19 | thresholds = np.array([[4, 8, 16, 32, 64], [0.75, 0.5, 0.25, 0.1, 0.05]])
20 | for useIoU in range(2):
21 | for threshIdx in range(5):
22 | average_precisions = {}
23 | for label in range(num_classes):
24 | true_positives = []
25 | scores = []
26 | num_annotations = 0
27 |
28 | for i in range(len(all_annotations)):
29 | detections = all_detections[i][label]
30 | annotations = all_annotations[i][label]
31 |
32 | num_annotations += annotations.shape[0]
33 | detected_annotations = []
34 |
35 | for *bbox, score in detections:
36 | scores.append(score)
37 |
38 | if annotations.shape[0] == 0:
39 | true_positives.append(0)
40 | continue
41 |
42 | if useIoU > 0:
43 | overlaps = bbox_iou_numpy(np.expand_dims(bbox, axis=0), annotations)
44 | assigned_annotation = np.argmax(overlaps, axis=1)
45 | max_overlap = overlaps[0, assigned_annotation]
46 |
47 | if max_overlap >= thresholds[
48 | useIoU, threshIdx] and assigned_annotation not in detected_annotations:
49 | true_positives.append(1)
50 | detected_annotations.append(assigned_annotation)
51 | else:
52 | true_positives.append(0)
53 | else:
54 | distances = bbox_dist(bbox, annotations)
55 | assigned_annotation = np.argmin(distances)
56 | min_dist = distances[assigned_annotation]
57 |
58 | if min_dist <= thresholds[
59 | useIoU, threshIdx] and assigned_annotation not in detected_annotations:
60 | true_positives.append(1)
61 | detected_annotations.append(assigned_annotation)
62 | else:
63 | true_positives.append(0)
64 |
65 | # no annotations -> AP for this class is 0
66 | if num_annotations == 0:
67 | average_precisions[label] = 0
68 | continue
69 |
70 | true_positives = np.array(true_positives)
71 | false_positives = np.ones_like(true_positives) - true_positives
72 | # sort by score
73 | indices = np.argsort(-np.array(scores))
74 | false_positives = false_positives[indices]
75 | true_positives = true_positives[indices]
76 |
77 | # compute false positives and true positives
78 | false_positives = np.cumsum(false_positives)
79 | true_positives = np.cumsum(true_positives)
80 |
81 | # compute recall and precision
82 | recall = true_positives / num_annotations
83 | precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
84 |
85 | # compute average precision
86 | average_precision = compute_ap(recall, precision)
87 | average_precisions[label] = average_precision
88 |
89 | for c, ap in average_precisions.items():
90 | APs[useIoU, c, threshIdx] = ap
91 |
92 | mAP = np.mean(list(average_precisions.values()))
93 | mAPs[useIoU, threshIdx] = mAP
94 | return mAPs, APs
95 |
96 | if __name__ == '__main__':
97 | parser = argparse.ArgumentParser()
98 | parser.add_argument("--model_config_path", type=str, default="config/robo-down-small.cfg", help="path to model config file")
99 | parser.add_argument("--class_path", type=str, default="data/robo.names", help="path to class label file")
100 | parser.add_argument('--batch_size', type=int, default=64, help='size of the batches')
101 | parser.add_argument("--iou_thres", type=float, default=0.5, help="iou threshold required to qualify as detected")
102 | parser.add_argument("--conf_thres", type=float, default=0.5, help="object confidence threshold")
103 | parser.add_argument("--nms_thres", type=float, default=0.45, help="iou thresshold for non-maximum suppression")
104 | parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation")
105 | parser.add_argument("--img_size", type=int, default=(384,512), help="size of each image dimension")
106 | parser.add_argument("--transfer", help="Layers to truly train", action="store_true", default=False)
107 | parser.add_argument("--finetune", help="Finetuning", action="store_true", default=False)
108 | parser.add_argument("--bn", help="Use bottleneck", action="store_true", default=False)
109 | parser.add_argument("--yu", help="Use 2 channels", action="store_true", default=False)
110 | parser.add_argument("--hr", help="Use half res", action="store_true", default=False)
111 | parser.add_argument("--lprop", help="Use half res", action="store_true", default=False)
112 | opt = parser.parse_args()
113 |
114 | cuda = torch.cuda.is_available()
115 |
116 | lprop = opt.lprop
117 |
118 | data_config_path = "config/roboFinetune.data" if opt.finetune else "config/robo.data"
119 | img_size = (192,256) if opt.hr else (384,512)
120 |
121 | name = "checkpoints/bestFinetune" if opt.finetune else "checkpoints/best"
122 | if opt.yu:
123 | name += "2C"
124 | if opt.bn:
125 | name += "BN"
126 | if opt.hr:
127 | name += "HR"
128 |
129 | weights_path = []
130 | if opt.transfer:
131 | weights_path = sorted(glob.glob(name + "T*.weights"),reverse=True)
132 | elif opt.finetune:
133 | weights_path = sorted(glob.glob(name + "*_*.weights"),reverse=True)
134 | weights_path += [name + ".weights"]
135 | if not opt.bn:
136 | weights_path = [path for path in weights_path if "BN" not in path]
137 | if not opt.yu:
138 | weights_path = [path for path in weights_path if "2C" not in path]
139 | if not opt.hr:
140 | weights_path = [path for path in weights_path if "HR" not in path]
141 | if lprop:
142 | weights_path = [weights_path[0]]
143 |
144 | # Get data configuration
145 | data_config = parse_data_config(data_config_path)
146 | test_path = data_config["valid"]
147 | if lprop:
148 | test_path = "../../Data/RoboCup" if sys.platform != 'win32' else "D:/Datasets/RoboCup"
149 | num_classes = int(data_config["classes"])
150 | channels = 2 if opt.yu else 3
151 | seq_len = 4
152 |
153 | # Initiate model
154 | for path in weights_path:
155 | print(path)
156 | model = ROBO(inch=channels,bn=opt.bn, halfRes=opt.hr)
157 | print(model.getParams())
158 | model.load_state_dict(torch.load(path,map_location={'cuda:0': 'cpu'}))
159 |
160 | print(count_zero_weights(model))
161 |
162 | #with torch.no_grad():
163 | #pruneModel(model.parameters())
164 |
165 | computations = model.get_computations(True)
166 |
167 | print(computations)
168 | print(sum(computations))
169 |
170 | if cuda:
171 | model = model.cuda()
172 |
173 | model.eval()
174 |
175 | # Get dataloader
176 | if lprop:
177 | dataset = LPDataSet(test_path, train=False, finetune=opt.finetune, yu=opt.yu, img_size=img_size,len_seq=seq_len)
178 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=opt.n_cpu, collate_fn=my_collate)
179 | else:
180 | dataset = ListDataset(test_path, train=False, synth=opt.finetune, yu=opt.yu, img_size=img_size)
181 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_cpu)
182 |
183 | Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
184 |
185 | print("Compute mAP...")
186 |
187 | all_detections = []
188 | all_lp_detections = []
189 | all_annotations = []
190 |
191 | bar = progressbar.ProgressBar(0, len(dataloader), redirect_stdout=False)
192 |
193 | for batch_i, data in enumerate(dataloader):
194 |
195 | if lprop:
196 | imgs, targets, cvimgs = data
197 | cvimgs = cvimgs[0]
198 | else:
199 | _, imgs, targets = data
200 | imgs = imgs.type(Tensor)
201 |
202 | with torch.no_grad():
203 | outputs = model(imgs)
204 | outputs = non_max_suppression(outputs, 80, conf_thres=opt.conf_thres, nms_thres=opt.nms_thres)
205 |
206 | for output, annotations in zip(outputs, targets):
207 |
208 | all_detections.append([np.array([]) for _ in range(num_classes)])
209 | if output is not None:
210 | # Get predicted boxes, confidence scores and labels
211 | pred_boxes = output[:, :5].cpu().numpy()
212 | scores = output[:, 4].cpu().numpy()
213 | pred_labels = output[:, -1].cpu().numpy()
214 |
215 | # Order by confidence
216 | sort_i = np.argsort(scores)
217 | pred_labels = pred_labels[sort_i]
218 | pred_boxes = pred_boxes[sort_i]
219 |
220 | for label in range(num_classes):
221 | all_detections[-1][label] = pred_boxes[pred_labels == label]
222 |
223 | all_annotations.append([np.array([]) for _ in range(num_classes)])
224 | if any(annotations[:, -1] > 0):
225 |
226 | annotation_labels = annotations[annotations[:, -1] > 0, 0].numpy()
227 | _annotation_boxes = annotations[annotations[:, -1] > 0, 1:]
228 |
229 | # Reformat to x1, y1, x2, y2 and rescale to image dimensions
230 | annotation_boxes = np.empty_like(_annotation_boxes)
231 | annotation_boxes[:, 0] = (_annotation_boxes[:, 0] - _annotation_boxes[:, 2] / 2)*opt.img_size[1]
232 | annotation_boxes[:, 1] = (_annotation_boxes[:, 1] - _annotation_boxes[:, 3] / 2)*opt.img_size[0]
233 | annotation_boxes[:, 2] = (_annotation_boxes[:, 0] + _annotation_boxes[:, 2] / 2)*opt.img_size[1]
234 | annotation_boxes[:, 3] = (_annotation_boxes[:, 1] + _annotation_boxes[:, 3] / 2)*opt.img_size[0]
235 | #annotation_boxes *= opt.img_size
236 |
237 | for label in range(num_classes):
238 | all_annotations[-1][label] = annotation_boxes[annotation_labels == label, :]
239 |
240 | if lprop:
241 | for i in range(seq_len):
242 | if i == 0:
243 | all_lp_detections.append(labelProp(cvimgs[i],cvimgs[i+1],all_detections[-seq_len+1]))
244 | else:
245 | all_lp_detections.append(labelProp(cvimgs[i],cvimgs[i-1],all_detections[-seq_len+i-1]))
246 |
247 | bar.update(batch_i)
248 | bar.finish()
249 |
250 | mAPs, APs = getmAP(all_annotations,all_detections)
251 | if not lprop:
252 | for c in range(4):
253 | print("Class %d:" % c)
254 | for i in range(2):
255 | print("Dist: " if i < 1 else "IoU: ",APs[i,c,:])
256 | print("mAP:")
257 | for i in range(2):
258 | print("Dist: " if i < 1 else "IoU: ",mAPs[i,:])
259 |
260 | if lprop:
261 | LPmAPs, LPAPs = getmAP(all_annotations, all_lp_detections)
262 | print("LP")
263 | print("mAP:")
264 | for i in range(2):
265 | print("Dist: " if i < 1 else "IoU: ",LPmAPs[i,:])
266 |
--------------------------------------------------------------------------------
/utils/datasets.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import random
3 | import os
4 | import os.path as osp
5 | import numpy as np
6 |
7 | import torch
8 |
9 | from torch.utils.data import Dataset
10 | from PIL import Image
11 | import torchvision.transforms as transforms
12 | import torchvision.transforms.functional as F
13 | import numbers
14 | import cv2
15 | import re
16 |
17 | def tryint(s):
18 | try:
19 | return int(s)
20 | except:
21 | return s
22 |
23 | def alphanum_key(s):
24 | """ Turn a string into a list of string and number chunks.
25 | "z23a" -> ["z", 23, "a"]
26 | """
27 | return [ tryint(c) for c in re.split('([0-9]+)', s) ]
28 |
29 | def my_collate(batch):
30 | imgs,targets,cvimgs = zip(*batch)
31 | return torch.cat(imgs),torch.cat(targets),cvimgs
32 |
33 | def get_immediate_subdirectories(a_dir):
34 | return [name for name in os.listdir(a_dir)
35 | if os.path.isdir(os.path.join(a_dir, name))]
36 |
37 | class RandomAffineCust(object):
38 | """Random affine transformation of the image keeping center invariant
39 |
40 | Args:
41 | degrees (sequence or float or int): Range of degrees to select from.
42 | If degrees is a number instead of sequence like (min, max), the range of degrees
43 | will be (-degrees, +degrees). Set to 0 to deactivate rotations.
44 | translate (tuple, optional): tuple of maximum absolute fraction for horizontal
45 | and vertical translations. For example translate=(a, b), then horizontal shift
46 | is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is
47 | randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default.
48 | scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is
49 | randomly sampled from the range a <= scale <= b. Will keep original scale by default.
50 | shear (sequence or float or int, optional): Range of degrees to select from.
51 | If degrees is a number instead of sequence like (min, max), the range of degrees
52 | will be (-degrees, +degrees). Will not apply shear by default
53 | resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
54 | An optional resampling filter. See `filters`_ for more information.
55 | If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
56 | fillcolor (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0)
57 |
58 | .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
59 |
60 | """
61 |
62 | def __init__(self, degrees, translate=None, scale=None, resample=False, fillcolor=0):
63 | if isinstance(degrees, numbers.Number):
64 | if degrees < 0:
65 | raise ValueError("If degrees is a single number, it must be positive.")
66 | self.degrees = (-degrees, degrees)
67 | else:
68 | assert isinstance(degrees, (tuple, list)) and len(degrees) == 2, \
69 | "degrees should be a list or tuple and it must be of length 2."
70 | self.degrees = degrees
71 |
72 | if translate is not None:
73 | assert isinstance(translate, (tuple, list)) and len(translate) == 2, \
74 | "translate should be a list or tuple and it must be of length 2."
75 | for t in translate:
76 | if not (0.0 <= t <= 1.0):
77 | raise ValueError("translation values should be between 0 and 1")
78 | self.translate = translate
79 |
80 | if scale is not None:
81 | assert isinstance(scale, (tuple, list)) and len(scale) == 2, \
82 | "scale should be a list or tuple and it must be of length 2."
83 | for s in scale:
84 | if s <= 0:
85 | raise ValueError("scale values should be positive")
86 | self.scale = scale
87 |
88 | self.resample = resample
89 | self.fillcolor = fillcolor
90 |
91 | @staticmethod
92 | def get_params(degrees, translate, scale_ranges, img_size):
93 | """Get parameters for affine transformation
94 |
95 | Returns:
96 | sequence: params to be passed to the affine transformation
97 | """
98 | angle = random.uniform(degrees[0], degrees[1])
99 | if translate is not None:
100 | max_dx = translate[0] * img_size[0]
101 | max_dy = translate[1] * img_size[1]
102 | translations = (np.round(random.uniform(-max_dx, max_dx)),
103 | np.round(random.uniform(-max_dy, max_dy)))
104 | else:
105 | translations = (0, 0)
106 |
107 | if scale_ranges is not None:
108 | scale = random.uniform(scale_ranges[0], scale_ranges[1])
109 | else:
110 | scale = 1.0
111 |
112 | shear = 0.0
113 |
114 | return angle, translations, scale, shear
115 |
116 | def __call__(self, img, label):
117 | """
118 | img (PIL Image): Image to be transformed.
119 |
120 | Returns:
121 | PIL Image: Affine transformed image.
122 | """
123 | ret = self.get_params(self.degrees, self.translate, self.scale, img.size)
124 |
125 | angle = np.deg2rad(ret[0])
126 | translations = (ret[1][0]/img.size[0],ret[1][1]/img.size[1])
127 | scale = ret[2]
128 | imgRatio = img.size[0]/img.size[1]
129 | x = (label[:,1]-0.5)*imgRatio
130 | y = label[:,2]-0.5
131 | label[:,1] = (x*np.cos(angle) - y*np.sin(angle))*scale/imgRatio + 0.5 + translations[0]
132 | label[:,2] = (x*np.sin(angle) + y*np.cos(angle))*scale + 0.5 + translations[1]
133 | label[:, 3] *= scale
134 | label[:, 4] *= scale
135 |
136 | o_img = F.affine(img, *ret, resample=self.resample, fillcolor=self.fillcolor)
137 | return o_img, label
138 |
139 | class ImageFolder(Dataset):
140 | def __init__(self, folder_path, type = '%s/*.*', synth = False, yu = False, hr = False):
141 | self.files = sorted(glob.glob(type % folder_path))
142 | self.yu = yu
143 | self.hr = hr
144 | self.resize = transforms.Resize((192,256))
145 | self.mean = [0.4637419, 0.47166784, 0.48316576] if synth else [0.36224657, 0.41139355, 0.28278301]
146 | self.std = [0.45211827, 0.16890674, 0.18645908] if synth else [0.3132638, 0.21061972, 0.34144647]
147 | self.transform = transforms.Compose([
148 | transforms.ToTensor(),
149 | transforms.Normalize(mean=self.mean,std=self.std)
150 | ])
151 |
152 | def __getitem__(self, index):
153 | img_path = self.files[index % len(self.files)].rstrip()
154 | img = Image.open(img_path)
155 |
156 | if self.hr:
157 | img = self.resize(img)
158 |
159 | input_img = self.transform(img)
160 |
161 | if self.yu:
162 | input_img[1] = input_img[2]*0.5 + input_img[1]*0.5
163 | input_img = input_img[0:2]
164 |
165 | return img_path, input_img
166 |
167 | def __len__(self):
168 | return len(self.files)
169 |
170 |
171 | class ListDataset(Dataset):
172 | def __init__(self, list_path, img_size=(384,512), train=True, synth = False, yu=False):
173 | with open(list_path, 'r') as file:
174 | self.img_files = file.readlines()
175 | self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in self.img_files]
176 | self.img_shape = img_size
177 | self.max_objects = 50
178 | self.train = train
179 | self.synth = synth
180 | self.img_size = img_size
181 | self.yu = yu
182 | self.jitter = ColorJitter(0.3,0.3,0.3,3.1415/6,0.05)
183 | self.resize = transforms.Resize(img_size)
184 | self.affine = RandomAffineCust(5,(0.025,0.025),(0.9,1.1),fillcolor=0)
185 | self.mean = [0.36269532, 0.41144562, 0.282713] if synth else [0.40513613, 0.48072927, 0.48718367]
186 | self.std = [0.31111388, 0.21010718, 0.34060917] if synth else [0.44540985, 0.15460468, 0.18062305]
187 | self.normalize = transforms.Normalize(mean=self.mean,std=self.std)
188 |
189 | def __getitem__(self, index):
190 |
191 | #---------
192 | # Image
193 | #---------
194 | img_path = self.img_files[index % len(self.img_files)].rstrip()
195 | img = Image.open(img_path)
196 |
197 | if self.img_size[0] != img.size[1] and self.img_size[1] != img.size[0]:
198 | img = self.resize(img)
199 |
200 | w, h = img.size
201 |
202 | # ---------
203 | # Label
204 | # ---------
205 | label_path = self.label_files[index % len(self.img_files)].rstrip()
206 | labels = np.loadtxt(label_path).reshape(-1, 5)
207 |
208 | if self.train:
209 | img,labels = self.affine(img,labels)
210 |
211 | p = 0
212 | input_img = transforms.functional.to_tensor(img)
213 | input_img = self.normalize(input_img)
214 | if self.train:
215 | p = torch.rand(1).item()
216 | if p > 0.5:
217 | input_img = input_img.flip(2)
218 | input_img = self.jitter(input_img)
219 |
220 | if self.yu:
221 | input_img[1] = input_img[2]*0.5 + input_img[1]*0.5
222 | input_img = input_img[0:2]
223 |
224 | if p > 0.5:
225 | labels[:,1] = 1 - labels[:,1]
226 |
227 | # Squeeze centers inside image
228 | labels[:, 1] = np.clip(labels[:, 1], a_min=0, a_max = 0.999)
229 | labels[:, 2] = np.clip(labels[:, 2], a_min=0, a_max = 0.999)
230 |
231 | smallLabels = np.array([lab for lab in labels if lab[0] < 2])
232 | bigLabels = np.array([lab for lab in labels if lab[0] >= 2])
233 |
234 | if self.train:
235 | # Fill matrix
236 | filled_labels_small = np.zeros((self.max_objects//2, 5))
237 | filled_labels_big = np.zeros((self.max_objects//2, 5))
238 | if smallLabels is not None and smallLabels.shape[0] > 0:
239 | filled_labels_small[range(len(smallLabels))[:self.max_objects]] = smallLabels[:self.max_objects]
240 | filled_labels_small = torch.from_numpy(filled_labels_small)
241 | if bigLabels is not None and bigLabels.shape[0] > 0:
242 | bigLabels[:,0] -= 2
243 | filled_labels_big[range(len(bigLabels))[:self.max_objects]] = bigLabels[:self.max_objects]
244 | filled_labels_big = torch.from_numpy(filled_labels_big)
245 |
246 | return img_path, input_img, (filled_labels_small,filled_labels_big)
247 | else:
248 | filled_labels = np.zeros((self.max_objects, 5))
249 | if labels is not None:
250 | filled_labels[range(len(labels))[:self.max_objects]] = labels[:self.max_objects]
251 | filled_labels = torch.from_numpy(filled_labels)
252 |
253 | return img_path, input_img, filled_labels
254 |
255 |
256 | def __len__(self):
257 | return len(self.img_files)
258 |
259 | def myRGB2YUV(img):
260 | mtx = torch.FloatTensor([[0.299,0.587,0.114],[-0.14713,-0.28886,0.436],[0.615,-0.51499,-0.10001]])
261 | return torch.einsum('nm,mbc->nbc',mtx,img)
262 |
263 | class ColorJitter(object):
264 | def __init__(self,b=0.3,c=0.3,s=0.3,h=3.1415/6,var=0.05):
265 | super(ColorJitter,self).__init__()
266 | self.b = b
267 | self.c = c
268 | self.s = s
269 | self.h = h
270 | self.var = var
271 |
272 | def __call__(self, img):
273 | b_val = random.uniform(-self.b,self.b)
274 | c_val = random.uniform(1-self.c,1+self.c)
275 | s_val = random.uniform(1-self.s,1+self.s)
276 | h_val = random.uniform(-self.h,self.h)
277 |
278 | mtx = torch.FloatTensor([[s_val*np.cos(h_val),-np.sin(h_val)],[np.sin(h_val),s_val*np.cos(h_val)]])
279 |
280 | img += torch.randn_like(img)*self.var
281 | img[0] = (img[0]+b_val)*c_val
282 | if self.s > 0 and self.h > 0:
283 | img[1:] = torch.einsum('nm,mbc->nbc',mtx,img[1:])
284 |
285 | return img
286 |
287 | class LPDataSet(Dataset):
288 | def __init__(self, root, img_size=(384,512), train=True, finetune = False, yu=False, len_seq = 2):
289 | self.finetune = finetune
290 | self.img_size = img_size
291 | self.yu = yu
292 | self.len_seq = len_seq
293 | self.max_objects = 50
294 | self.root = osp.join(root,"LabelProp")
295 | self.split = "train" if train else "val"
296 | self.resize = transforms.Resize(img_size)
297 | self.mean = [0.34190056, 0.4833289, 0.48565758] if finetune else [0.36269532, 0.41144562, 0.282713]
298 | self.std = [0.47421749, 0.13846053, 0.1714848] if finetune else [0.31111388, 0.21010718, 0.34060917]
299 | self.normalize = transforms.Normalize(mean=self.mean,std=self.std)
300 | self.images = []
301 | self.labels = []
302 | self.predictions = []
303 |
304 |
305 | data_dir = osp.join(self.root,"Real" if finetune else "Synthetic")
306 | data_dir = osp.join(data_dir, self.split)
307 |
308 | for dir in get_immediate_subdirectories(data_dir):
309 | currDir = osp.join(data_dir,dir)
310 | img_dir = osp.join(currDir,"images")
311 | images = []
312 | for file in sorted(glob.glob1(img_dir, "*.png"), key=alphanum_key):
313 | images.append(osp.join(img_dir, file))
314 | self.images.append(images)
315 | self.labels.append([path.replace('.png', '.txt').replace('.jpg', '.txt') for path in images])
316 |
317 | def __len__(self):
318 | length = 0
319 | for imgs in self.images:
320 | length += len(imgs) - self.len_seq + 1
321 | return length
322 |
323 | def __getitem__(self, index):
324 | dirindex = 0
325 | itemindex = index
326 |
327 | #print index
328 |
329 | for imgs in self.images:
330 | #print(dirindex, itemindex, len(imgs))
331 | if itemindex >= len(imgs) - self.len_seq + 1:
332 | dirindex += 1
333 | itemindex -= (len(imgs) - self.len_seq + 1)
334 | else:
335 | break
336 |
337 | #print(dirindex, itemindex)
338 | labels = []
339 | imgs = []
340 | cvimgs = []
341 | for i in range(self.len_seq):
342 | img_file = self.images[dirindex][itemindex+i]
343 | label_file = self.labels[dirindex][itemindex+i].rstrip()
344 |
345 | img = Image.open(img_file).convert('RGB')
346 | label = np.loadtxt(label_file).reshape(-1, 5)
347 | # Squeeze centers inside image
348 | label[:, 1] = np.clip(label[:, 1], a_min=0, a_max = 0.999)
349 | label[:, 2] = np.clip(label[:, 2], a_min=0, a_max = 0.999)
350 |
351 | if self.img_size[0] != img.size[1] and self.img_size[1] != img.size[0]:
352 | img = self.resize(img)
353 |
354 | img_ten = cv2.cvtColor(np.array(img),cv2.COLOR_RGB2YUV)
355 | img_ten = transforms.functional.to_tensor(img_ten).float()
356 | img_ten = self.normalize(img_ten)
357 | if self.yu:
358 | img_ten[1] = img_ten[2] * 0.5 + img_ten[1] * 0.5
359 | img_ten = img_ten[0:2]
360 | img_ten = img_ten.unsqueeze(0)
361 |
362 | filled_label = np.zeros((self.max_objects, 5))
363 | if label is not None:
364 | filled_label[range(len(label))[:self.max_objects]] = label[:self.max_objects]
365 | filled_label = torch.from_numpy(filled_label).unsqueeze(0)
366 |
367 | labels.append(filled_label)
368 | imgs.append(img_ten)
369 | cvimgs.append(cv2.resize(cv2.cvtColor(np.array(img),cv2.COLOR_RGB2GRAY),(160,120)))
370 |
371 | imgs = torch.cat(imgs)
372 | labels = torch.cat(labels)
373 | return imgs, labels, cvimgs
--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import math
3 | import time
4 | import torch
5 | import torch.nn as nn
6 | import numpy as np
7 | import glob
8 | from PIL import Image
9 | import progressbar
10 | import cv2
11 | import os
12 |
13 |
14 | def load_classes(path):
15 | """
16 | Loads class labels at 'path'
17 | """
18 | fp = open(path, "r")
19 | names = fp.read().split("\n")[:-1]
20 | return names
21 |
22 |
23 | def compute_ap(recall, precision):
24 | """ Compute the average precision, given the recall and precision curves.
25 | Code originally from https://github.com/rbgirshick/py-faster-rcnn.
26 |
27 | # Arguments
28 | recall: The recall curve (list).
29 | precision: The precision curve (list).
30 | # Returns
31 | The average precision as computed in py-faster-rcnn.
32 | """
33 | # correct AP calculation
34 | # first append sentinel values at the end
35 | mrec = np.concatenate(([0.0], recall, [1.0]))
36 | mpre = np.concatenate(([0.0], precision, [0.0]))
37 |
38 | # compute the precision envelope
39 | for i in range(mpre.size - 1, 0, -1):
40 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
41 |
42 | # to calculate area under PR curve, look for points
43 | # where X axis (recall) changes value
44 | i = np.where(mrec[1:] != mrec[:-1])[0]
45 |
46 | # and sum (\Delta recall) * prec
47 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
48 | return ap
49 |
50 |
51 | def bbox_iou(box1, box2, x1y1x2y2=True):
52 | """
53 | Returns the IoU of two bounding boxes
54 | """
55 | if not x1y1x2y2:
56 | # Transform from center and width to exact coordinates
57 | b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
58 | b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
59 | b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
60 | b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
61 | else:
62 | # Get the coordinates of bounding boxes
63 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
64 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
65 |
66 | # get the corrdinates of the intersection rectangle
67 | inter_rect_x1 = torch.max(b1_x1, b2_x1)
68 | inter_rect_y1 = torch.max(b1_y1, b2_y1)
69 | inter_rect_x2 = torch.min(b1_x2, b2_x2)
70 | inter_rect_y2 = torch.min(b1_y2, b2_y2)
71 | # Intersection area
72 | inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
73 | inter_rect_y2 - inter_rect_y1 + 1, min=0
74 | )
75 | # Union Area
76 | b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
77 | b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
78 |
79 | iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
80 |
81 | return iou
82 |
83 |
84 | def bbox_iou_numpy(box1, box2):
85 | """Computes IoU between bounding boxes.
86 | Parameters
87 | ----------
88 | box1 : ndarray
89 | (N, 4) shaped array with bboxes
90 | box2 : ndarray
91 | (M, 4) shaped array with bboxes
92 | Returns
93 | -------
94 | : ndarray
95 | (N, M) shaped array with IoUs
96 | """
97 | area = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])
98 |
99 | iw = np.minimum(np.expand_dims(box1[:, 2], axis=1), box2[:, 2]) - np.maximum(
100 | np.expand_dims(box1[:, 0], 1), box2[:, 0]
101 | )
102 | ih = np.minimum(np.expand_dims(box1[:, 3], axis=1), box2[:, 3]) - np.maximum(
103 | np.expand_dims(box1[:, 1], 1), box2[:, 1]
104 | )
105 |
106 | iw = np.maximum(iw, 0)
107 | ih = np.maximum(ih, 0)
108 |
109 | ua = np.expand_dims((box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1]), axis=1) + area - iw * ih
110 |
111 | ua = np.maximum(ua, np.finfo(float).eps)
112 |
113 | intersection = iw * ih
114 |
115 | return intersection / ua
116 |
117 |
118 | def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
119 | """
120 | Removes detections with lower object confidence score than 'conf_thres' and performs
121 | Non-Maximum Suppression to further filter detections.
122 | Returns detections with shape:
123 | (x1, y1, x2, y2, object_conf, class_score, class_pred)
124 | """
125 |
126 | # From (center x, center y, width, height) to (x1, y1, x2, y2)
127 | box_corner = prediction.new(prediction.shape)
128 | box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
129 | box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
130 | box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
131 | box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
132 | prediction[:, :, :4] = box_corner[:, :, :4]
133 |
134 | output = [None for _ in range(len(prediction))]
135 | for image_i, image_pred in enumerate(prediction):
136 | # Filter out confidence scores below threshold
137 | conf_mask = (image_pred[:, 4] >= conf_thres).squeeze()
138 | classPred = torch.cat((torch.zeros(192),torch.ones(192),2*torch.ones(48),3*torch.ones(48))).unsqueeze(1)
139 | if torch.cuda.is_available():
140 | classPred = classPred.cuda()
141 | classPred = classPred[conf_mask]
142 | image_pred = image_pred[conf_mask]
143 | # If none are remaining => process next image
144 | if not image_pred.size(0):
145 | continue
146 | # Get score and class with highest confidence
147 | class_conf = image_pred[:, 4].unsqueeze(1)
148 | # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
149 | detections = torch.cat((image_pred[:, :5], class_conf.float(), classPred.float()), 1)
150 | # Iterate through all predicted classes
151 | unique_labels = detections[:, -1].cpu().unique()
152 | if prediction.is_cuda:
153 | unique_labels = unique_labels.cuda()
154 | for c in unique_labels:
155 | # Get the detections with the particular class
156 | detections_class = detections[detections[:, -1] == c]
157 | # Sort the detections by maximum objectness confidence
158 | _, conf_sort_index = torch.sort(detections_class[:, 4], descending=True)
159 | detections_class = detections_class[conf_sort_index]
160 | # Perform non-maximum suppression
161 | max_detections = []
162 | while detections_class.size(0):
163 | # Get detection with highest confidence and save as max detection
164 | max_detections.append(detections_class[0].unsqueeze(0))
165 | # Stop if we're at the last detection
166 | if len(detections_class) == 1:
167 | break
168 | # Get the IOUs for all boxes with lower confidence
169 | ious = bbox_iou(max_detections[-1], detections_class[1:])
170 | # Remove detections with IoU >= NMS threshold
171 | detections_class = detections_class[1:][ious < nms_thres]
172 |
173 | max_detections = torch.cat(max_detections).data
174 | # Add max detections to outputs
175 | output[image_i] = (
176 | max_detections if output[image_i] is None else torch.cat((output[image_i], max_detections))
177 | )
178 |
179 | return output
180 |
181 | def get_immediate_subdirectories(a_dir):
182 | return [name for name in os.listdir(a_dir)
183 | if os.path.isdir(os.path.join(a_dir, name))]
184 |
185 | def labelProp(img_gr,prevImg,BBS):
186 | of = cv2.calcOpticalFlowFarneback(prevImg, img_gr, None, pyr_scale=0.5,levels=2,winsize=15,iterations=2,poly_n=7,poly_sigma=1.5,flags=0)
187 | scale = 4.0
188 | ret = []
189 | for classBB in BBS:
190 | newClassBB = []
191 | for BB in classBB:
192 | xMin = max(0, int(BB[0]/scale))
193 | yMin = max(0, int(BB[1]/scale))
194 | xMax = min(img_gr.shape[1] - 1, int(math.ceil(BB[2]/scale)))
195 | yMax = min(img_gr.shape[0] - 1, int(math.ceil(BB[3]/scale)))
196 | patch = of[yMin:yMax, xMin:xMax]
197 | meanX = np.mean(patch[:, :, 0])*scale
198 | meanY = np.mean(patch[:, :, 1])*scale
199 | newBB = []
200 | newBB.append(max(0, int(round(BB[0] + meanX))))
201 | newBB.append(max(0, int(round(BB[1] + meanY))))
202 | newBB.append(min(img_gr.shape[1]*scale - 1, int(round(BB[2] + meanX))))
203 | newBB.append(min(img_gr.shape[0]*scale - 1, int(round(BB[3] + meanY))))
204 | newBB.append(BB[4])
205 | newClassBB.append(newBB)
206 | ret.append(newClassBB)
207 | return ret
208 |
209 | def pruneModel(params, ratio = 0.01, glasso=False):
210 | i = 0
211 | indices = []
212 | for param in params:
213 | if param.dim() > 1:
214 | if glasso:
215 | dim = param.size()
216 | if dim.__len__() > 2:
217 | ind = torch.zeros_like(param)
218 | filtCnt = 0
219 | vals = param.pow(2).sum(dim=(1,2,3)).add(1e-8).pow(1 / 2.)
220 | thresh = torch.max(vals) * ratio
221 | for i,v in enumerate(vals):
222 | if v < thresh:
223 | filtCnt += 1
224 | param[i,:] = torch.zeros_like(param[i])
225 | ind[i,:] = torch.ones_like(ind[i])
226 | print("Pruned %f%% of the filters" % (filtCnt/vals.numel()*100))
227 | indices.append(ind.bool())
228 | else:
229 | indices.append(torch.zeros_like(param).bool())
230 | else:
231 | thresh = torch.max(torch.abs(param)) * ratio
232 | print("Pruned %f%% of the weights" % (
233 | float(torch.sum(torch.abs(param) < thresh)) / float(torch.sum(param != 0)) * 100))
234 | param[torch.abs(param) < thresh] = 0
235 | indices.append(torch.abs(param) < thresh)
236 | i += 1
237 |
238 | return indices
239 |
240 | def count_zero_weights(model,glasso=False):
241 | nonzeroWeights = 0
242 | totalWeights = 0
243 | if glasso:
244 | for param in model.parameters():
245 | dim = param.size()
246 | if dim.__len__() > 2:
247 | vals = param.pow(2).sum(dim=(1,2,3)).add(1e-8).pow(1/2.)
248 | max = torch.max(vals)
249 | nonzeroWeights += (vals < max * 0.01).sum().float()
250 | totalWeights += vals.numel()
251 | else:
252 | for param in model.parameters():
253 | max = torch.max(torch.abs(param))
254 | nonzeroWeights += (torch.abs(param) < max*0.01).sum().float()
255 | totalWeights += param.numel()
256 | return float(nonzeroWeights/totalWeights)
257 |
258 | def build_targets(
259 | pred_boxes, pred_conf, target, anchors, num_anchors, num_classes, grid_size_y, grid_size_x, ignore_thres, img_dim
260 | ):
261 | nB = target.size(0)
262 | nA = num_anchors
263 | #nC = num_classes
264 | nGx = grid_size_x
265 | nGy = grid_size_y
266 | mask = torch.zeros(nB, nA, nGy, nGx)
267 | conf_mask = torch.ones(nB, nA, nGy, nGx)
268 | tx = torch.zeros(nB, nA, nGy, nGx)
269 | ty = torch.zeros(nB, nA, nGy, nGx)
270 | tw = torch.zeros(nB, nA, nGy, nGx)
271 | th = torch.zeros(nB, nA, nGy, nGx)
272 | tconf = torch.ByteTensor(nB, nA, nGy, nGx).fill_(0)
273 | corr = torch.ByteTensor(nB, nA, nGy, nGx).fill_(0)
274 |
275 | nGT = 0
276 | nCorrect = 0
277 | for b in range(nB):
278 | for t in range(target.shape[1]):
279 | if target[b, t].sum() == 0:
280 | continue
281 | nGT += 1
282 | # Convert to position relative to box
283 | # One-hot encoding of label
284 | target_label = int(target[b, t, 0])
285 | gx = target[b, t, 1] * nGx
286 | gy = target[b, t, 2] * nGy
287 | gw = target[b, t, 3] * nGx
288 | gh = target[b, t, 4] * nGy
289 | # Get grid box indices
290 | gi = int(gx)
291 | gj = int(gy)
292 |
293 | best_n = target_label
294 | # Get ground truth box
295 | gt_box = torch.FloatTensor(np.array([gx, gy, gw, gh])).unsqueeze(0)
296 | # Get the best prediction
297 | pred_box = pred_boxes[b, best_n, gj, gi].unsqueeze(0)
298 | # Masks
299 | mask[b, best_n, gj, gi] = 1
300 | conf_mask[b, best_n, gj, gi] = 1
301 | # Coordinates
302 | tx[b, best_n, gj, gi] = gx - gi
303 | ty[b, best_n, gj, gi] = gy - gj
304 | # Width and height
305 | tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16)
306 | th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16)
307 | #tcls[b, best_n, gj, gi, target_label] = 1
308 | tconf[b, best_n, gj, gi] = 1
309 |
310 | # Calculate iou between ground truth and best matching prediction
311 | iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)
312 | score = pred_conf[b, best_n, gj, gi]
313 | if (target_label != 3 or iou > 0.5) and score > 0.5:
314 | nCorrect += 1
315 | corr[b, best_n, gj, gi] = 1
316 |
317 | return nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, corr
318 |
319 |
320 | def to_categorical(y, num_classes):
321 | """ 1-hot encodes a tensor """
322 | return torch.from_numpy(np.eye(num_classes, dtype="uint8")[y])
323 |
324 | def bbox_dist(box1,boxes):
325 | distances = np.array([])
326 | for box2 in boxes:
327 | cent1x = (box1[0] + box1[2]) / 2
328 | cent1y = (box1[1] + box1[3]) / 2
329 | cent2x = (box2[0] + box2[2]) / 2
330 | cent2y = (box2[1] + box2[3]) / 2
331 | distances = np.append(distances,np.sqrt(pow(cent1x-cent2x,2) + pow(cent1y-cent2y,2)))
332 | return distances
333 |
334 | def computeAP(model,dataloader,conf_thres,nms_thres,num_classes,img_size,useIoU,thresh):
335 | Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
336 |
337 | all_detections = []
338 | all_annotations = []
339 |
340 | bar = progressbar.ProgressBar(0, len(dataloader), redirect_stdout=False)
341 |
342 | for batch_i, (_, imgs, targets) in enumerate(dataloader):
343 |
344 | if torch.cuda.is_available():
345 | imgs = imgs.cuda()
346 |
347 | with torch.no_grad():
348 | outputs = model(imgs)
349 | outputs = non_max_suppression(outputs, 80, conf_thres=conf_thres, nms_thres=nms_thres)
350 |
351 | for output, annotations in zip(outputs, targets):
352 |
353 | all_detections.append([np.array([]) for _ in range(num_classes)])
354 | if output is not None:
355 | # Get predicted boxes, confidence scores and labels
356 | pred_boxes = output[:, :5].cpu().numpy()
357 | scores = output[:, 4].cpu().numpy()
358 | pred_labels = output[:, -1].cpu().numpy()
359 |
360 | # Order by confidence
361 | sort_i = np.argsort(scores)
362 | pred_labels = pred_labels[sort_i]
363 | pred_boxes = pred_boxes[sort_i]
364 |
365 | for label in range(num_classes):
366 | all_detections[-1][label] = pred_boxes[pred_labels == label]
367 |
368 | all_annotations.append([np.array([]) for _ in range(num_classes)])
369 | if any(annotations[:, -1] > 0):
370 |
371 | annotation_labels = annotations[annotations[:, -1] > 0, 0].numpy()
372 | _annotation_boxes = annotations[annotations[:, -1] > 0, 1:]
373 |
374 | # Reformat to x1, y1, x2, y2 and rescale to image dimensions
375 | annotation_boxes = np.empty_like(_annotation_boxes)
376 | annotation_boxes[:, 0] = (_annotation_boxes[:, 0] - _annotation_boxes[:, 2] / 2) * img_size[1]
377 | annotation_boxes[:, 1] = (_annotation_boxes[:, 1] - _annotation_boxes[:, 3] / 2) * img_size[0]
378 | annotation_boxes[:, 2] = (_annotation_boxes[:, 0] + _annotation_boxes[:, 2] / 2) * img_size[1]
379 | annotation_boxes[:, 3] = (_annotation_boxes[:, 1] + _annotation_boxes[:, 3] / 2) * img_size[0]
380 | # annotation_boxes *= opt.img_size
381 |
382 | for label in range(num_classes):
383 | all_annotations[-1][label] = annotation_boxes[annotation_labels == label, :]
384 |
385 | bar.update(batch_i)
386 | bar.finish()
387 | average_precisions = {}
388 | for label in range(num_classes):
389 | true_positives = []
390 | scores = []
391 | num_annotations = 0
392 |
393 | for i in range(len(all_annotations)):
394 | detections = all_detections[i][label]
395 | annotations = all_annotations[i][label]
396 |
397 | num_annotations += annotations.shape[0]
398 | detected_annotations = []
399 |
400 | for *bbox, score in detections:
401 | scores.append(score)
402 |
403 | if annotations.shape[0] == 0:
404 | true_positives.append(0)
405 | continue
406 |
407 | if useIoU:
408 | overlaps = bbox_iou_numpy(np.expand_dims(bbox, axis=0), annotations)
409 | assigned_annotation = np.argmax(overlaps, axis=1)
410 | max_overlap = overlaps[0, assigned_annotation]
411 |
412 | if max_overlap >= thresh and assigned_annotation not in detected_annotations:
413 | true_positives.append(1)
414 | detected_annotations.append(assigned_annotation)
415 | else:
416 | true_positives.append(0)
417 | else:
418 | distances = bbox_dist(bbox, annotations)
419 | assigned_annotation = np.argmin(distances)
420 | min_dist = distances[assigned_annotation]
421 |
422 | if min_dist <= thresh and assigned_annotation not in detected_annotations:
423 | true_positives.append(1)
424 | detected_annotations.append(assigned_annotation)
425 | else:
426 | true_positives.append(0)
427 |
428 | # no annotations -> AP for this class is 0
429 | if num_annotations == 0:
430 | average_precisions[label] = 0
431 | continue
432 |
433 | true_positives = np.array(true_positives)
434 | false_positives = np.ones_like(true_positives) - true_positives
435 | # sort by score
436 | indices = np.argsort(-np.array(scores))
437 | false_positives = false_positives[indices]
438 | true_positives = true_positives[indices]
439 |
440 | # compute false positives and true positives
441 | false_positives = np.cumsum(false_positives)
442 | true_positives = np.cumsum(true_positives)
443 |
444 | # compute recall and precision
445 | recall = true_positives / num_annotations
446 | precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
447 |
448 | # compute average precision
449 | average_precision = compute_ap(recall, precision)
450 | average_precisions[label] = average_precision
451 |
452 | mAP = np.mean(list(average_precisions.values()))
453 |
454 | return mAP,list(average_precisions.values())
455 |
--------------------------------------------------------------------------------