├── .gitignore
├── ChasingTrainFramework_GeneralOneClassDetection
├── README.md
├── __init__.py
├── data_iterator_base
│ ├── __init__.py
│ └── data_batch.py
├── data_provider_base
│ ├── __init__.py
│ ├── base_data_adapter.py
│ ├── base_provider.py
│ ├── pickle_provider.py
│ └── text_list_adapter.py
├── image_augmentation
│ ├── __init__.py
│ └── augmentor.py
├── inference_speed_eval
│ ├── __init__.py
│ ├── inference_speed_eval_with_mxnet_cudnn.py
│ └── inference_speed_eval_with_tensorrt_cudnn.py
├── logging_GOCD.py
├── loss_layer_farm
│ ├── __init__.py
│ ├── cross_entropy_with_focal_loss_for_one_class_detection.py
│ ├── cross_entropy_with_hnm_for_one_class_detection.py
│ ├── loss.py
│ ├── mean_squared_error_with_hnm_for_one_class_detection.py
│ └── mean_squared_error_with_ohem_for_one_class_detection.py
├── solver_GOCD.py
└── train_GOCD.py
├── LICENSE
├── README.md
├── face_detection
├── README.md
├── accuracy_evaluation
│ ├── evaluation_on_fddb.py
│ ├── evaluation_on_widerface.py
│ └── predict.py
├── config_farm
│ ├── __init__.py
│ ├── configuration_10_320_20L_5scales_v2.py
│ └── configuration_10_560_25L_8scales_v1.py
├── data_iterator_farm
│ ├── __init__.py
│ ├── multithread_dataiter_for_cross_entropy_v1.py
│ └── multithread_dataiter_for_cross_entropy_v2.py
├── data_provider_farm
│ ├── __init__.py
│ ├── data_folder
│ │ └── .gitkeep
│ ├── pickle_provider.py
│ └── text_list_adapter.py
├── demo
│ └── demo.py
├── deploy_tensorrt
│ ├── README.md
│ ├── debug_image
│ │ ├── test1.jpg
│ │ ├── test2.jpg
│ │ ├── test3.jpg
│ │ ├── test5.jpg
│ │ └── test6.jpg
│ ├── predict_tensorrt.py
│ └── to_onnx.py
├── inference_speed_evaluation
│ ├── README.md
│ └── inference_speed_eval.py
├── metric_farm
│ ├── __init__.py
│ └── metric_default.py
├── net_farm
│ ├── __init__.py
│ ├── naivenet.py
│ ├── naivenet20_resv2.gv
│ ├── naivenet20_resv2.gv.svg
│ └── naivenet_structures.xlsx
├── qualitative_results
│ ├── v1_qualitative_1.jpg
│ ├── v1_qualitative_2.jpg
│ ├── v1_qualitative_3.jpg
│ ├── v1_qualitative_4.jpg
│ └── v1_qualitative_5.jpg
└── saved_model
│ ├── configuration_10_320_20L_5scales_v2
│ └── .gitkeep
│ └── configuration_10_560_25L_8scales_v1
│ └── .gitkeep
├── head_detection
├── README.md
├── accuracy_evaluation
│ ├── evaluation_on_brainwash.py
│ ├── predict.py
│ └── test_images
│ │ ├── 2.jpg
│ │ ├── 247.jpg
│ │ ├── 322.jpg
│ │ ├── 342.jpg
│ │ ├── 377.jpg
│ │ ├── 411.jpg
│ │ ├── 5.jpg
│ │ ├── 7.jpg
│ │ └── 72.jpg
├── config_farm
│ ├── __init__.py
│ └── configuration_10_160_17L_4scales_v1.py
├── data_iterator_farm
│ ├── __init__.py
│ └── multithread_dataiter_for_cross_entropy_v1.py
├── data_provider_farm
│ ├── pickle_provider.py
│ ├── reformat_brainwash.py
│ └── text_list_adapter.py
├── inference_speed_evaluation
│ └── inference_speed_eval.py
├── metric_farm
│ ├── __init__.py
│ └── metric_default.py
└── symbol_farm
│ ├── __init__.py
│ ├── symbol_10_160_17L_4scales_v1.py
│ ├── symbol_10_160_17L_4scales_v1_deploy.json
│ └── symbol_structures.xlsx
├── license_plate_detection
├── README.md
├── accuracy_evaluation
│ ├── evaluation_on_CCPD.py
│ ├── predict.py
│ └── test_images
│ │ ├── test1.jpg_result.jpg
│ │ ├── test2.jpg_result.jpg
│ │ ├── test3.jpg_result.jpg
│ │ ├── test4.jpg_result.jpg
│ │ ├── test5.jpg_result.jpg
│ │ ├── test6.jpg_result.jpg
│ │ └── test7.jpg_result.jpg
├── config_farm
│ ├── __init__.py
│ └── configuration_64_512_16L_3scales_v1.py
├── data_iterator_farm
│ ├── __init__.py
│ └── multithread_dataiter_for_cross_entropy_v1.py
├── data_provider_farm
│ ├── __init__.py
│ ├── pickle_provider.py
│ ├── reformat_CCPD.py
│ └── text_list_adapter.py
├── inference_speed_evaluation
│ └── inference_speed_eval.py
├── metric_farm
│ ├── __init__.py
│ └── metric_default.py
└── symbol_farm
│ ├── __init__.py
│ ├── symbol_64_512_16L_3scales_v1.py
│ ├── symbol_64_512_16L_3scales_v1_deploy.json
│ └── symbol_structures.xlsx
├── pedestrian_detection
├── README.md
├── accuracy_evaluation
│ ├── predict.py
│ └── test_images
│ │ ├── 1064.jpg
│ │ ├── 1081.jpg
│ │ ├── 1104.jpg
│ │ ├── 1199.jpg
│ │ ├── 1212.jpg
│ │ ├── 1461.jpg
│ │ ├── 2210.jpg
│ │ ├── 2221.jpg
│ │ ├── 2396.jpg
│ │ ├── 2407.jpg
│ │ ├── 2756.jpg
│ │ ├── 3043.jpg
│ │ ├── 326.jpg
│ │ ├── 3368.jpg
│ │ ├── 3812.jpg
│ │ ├── 3914.jpg
│ │ ├── 3981.jpg
│ │ ├── 3988.jpg
│ │ └── 877.jpg
├── config_farm
│ ├── __init__.py
│ └── configuration_30_320_20L_4scales_v1.py
├── data_iterator_farm
│ ├── __init__.py
│ └── multithread_dataiter_for_cross_entropy_v1.py
├── data_provider_farm
│ ├── __init__.py
│ ├── pickle_provider.py
│ ├── reformat_caltech.py
│ └── text_list_adapter.py
├── inference_speed_evaluation
│ └── inference_speed_eval.py
├── metric_farm
│ ├── __init__.py
│ └── metric_default.py
└── symbol_farm
│ ├── __init__.py
│ ├── symbol_30_320_20L_4scales_v1.py
│ ├── symbol_30_320_20L_4scales_v1_deploy.json
│ └── symbol_structures.xlsx
└── vehicle_detection
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # IPython
76 | profile_default/
77 | ipython_config.py
78 |
79 | # pyenv
80 | .python-version
81 |
82 | # celery beat schedule file
83 | celerybeat-schedule
84 |
85 | # SageMath parsed files
86 | *.sage.py
87 |
88 | # Environments
89 | .env
90 | .venv
91 | env/
92 | venv/
93 | ENV/
94 | env.bak/
95 | venv.bak/
96 |
97 | # Spyder project settings
98 | .spyderproject
99 | .spyproject
100 |
101 | # Rope project settings
102 | .ropeproject
103 |
104 | # mkdocs documentation
105 | /site
106 |
107 | # mypy
108 | .mypy_cache/
109 | .dmypy.json
110 | dmypy.json
111 | <<<<<<< HEAD
112 |
113 | .idea/
114 |
115 | # pytorch model
116 | *.pth
117 |
118 | # pkl format dataset
119 | *.pkl
120 |
121 | # mxnet model
122 | *.params
123 |
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/README.md:
--------------------------------------------------------------------------------
1 | ## ChasingTrainFramework_GeneralSingleClassDetection
2 | ChasingTrainFramework_GeneralSingleClassDetection is a simple
3 | wrapper based on MXNet Module API for general one class detection.
4 | `Chasing` is just a project code.
5 |
6 | ### Framework Introduction
7 | * **data_iterator_base** provide some utils for batch iterator. The design of a data
8 | iterator relies on the specific task. So we do not provide a default iterator here.
9 |
10 | * **data_provider_base** reformat, pack raw data. In most cases, we can load all data into
11 | the memory for fast access.
12 |
13 | * **image_augmentation** provide some often used augmentations.
14 |
15 | * **inference_speed_eval** provide two ways for inference speed evaluation -- MXNet with CUDNN and TensorRT with CUDNN.
16 |
17 | * **loss_layer_farm** provide customized loss type like hard negative mining, focal loss.
18 |
19 | * **logging_GOCD** is a logging wrapper.
20 |
21 | * **solver_GOCD** execute training process.
22 |
23 | * **train_GOCD** is the entrance of the framework.
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/__init__.py
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/data_iterator_base/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/data_iterator_base/__init__.py
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/data_iterator_base/data_batch.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 |
4 | class DataBatch:
5 | def __init__(self, torch_module):
6 | self._data = []
7 | self._label = []
8 | self.torch_module = torch_module
9 |
10 | def append_data(self, new_data):
11 | self._data.append(self.__as_tensor(new_data))
12 |
13 | def append_label(self, new_label):
14 | self._label.append(self.__as_tensor(new_label))
15 |
16 | def __as_tensor(self, in_data):
17 | return self.torch_module.from_numpy(in_data)
18 |
19 | @property
20 | def data(self):
21 | return self._data
22 |
23 | @property
24 | def label(self):
25 | return self._label
26 |
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/__init__.py
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/base_data_adapter.py:
--------------------------------------------------------------------------------
1 | """
2 | This module is to read, modify and return a single sample.
3 | It only works in data packing phase.
4 | """
5 |
6 |
7 | class DataAdapterBaseclass(object):
8 |
9 | def __init__(self):
10 | pass
11 |
12 | def __del__(self):
13 | pass
14 |
15 | def get_one(self):
16 | """
17 | return only one sample each time
18 | :return:
19 | """
20 | raise NotImplementedError()
21 |
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/base_provider.py:
--------------------------------------------------------------------------------
1 | """
2 | This module takes an adapter as data supplier, pack data and provide data for data iterators
3 |
4 | """
5 |
6 |
7 | class ProviderBaseclass(object):
8 | """
9 | This is the baseclass of packer. Any other detailed packer must inherit this class.
10 | """
11 |
12 | def __init__(self):
13 | pass
14 |
15 | def __str__(self):
16 | return self.__class__.__name__
17 |
18 | def __del__(self):
19 | pass
20 |
21 | def write(self):
22 | """
23 | Write a single sample to the files
24 | :return:
25 | """
26 | raise NotImplementedError()
27 |
28 | def read_by_index(self, index):
29 | """
30 | Read a single sample
31 | :return:
32 | """
33 | raise NotImplementedError()
34 |
35 |
36 | if __name__ == '__main__':
37 | provider = ProviderBaseclass()
38 | print(provider)
39 |
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/pickle_provider.py:
--------------------------------------------------------------------------------
1 | '''
2 | This provider accepts an adapter, save dataset in pickle file and load all dataset to memory for data iterators
3 | '''
4 |
5 | import cv2
6 | import numpy
7 | import pickle
8 |
9 | from .base_provider import ProviderBaseclass
10 | from .text_list_adapter import TextListAdapter
11 |
12 |
13 | class PickleProvider(ProviderBaseclass):
14 | """
15 | This class provides methods to save and read data.
16 | By default, images are compressed using JPG format.
17 | If data_adapter is not None, it means saving data, or it is reading data
18 | """
19 |
20 | def __init__(self,
21 | pickle_file_path,
22 | encode_quality=90,
23 | data_adapter=None):
24 | ProviderBaseclass.__init__(self)
25 |
26 | if data_adapter: # write data
27 |
28 | self.data_adapter = data_adapter
29 | self.data = {}
30 | self.counter = 0
31 | self.pickle_file_path = pickle_file_path
32 |
33 | else: # read data
34 |
35 | self.data = pickle.load(open(pickle_file_path, 'rb'))
36 | # get positive and negative indeices
37 | self._positive_index = []
38 | self._negative_index = []
39 | for k, v in self.data.items():
40 | if v[1] == 0: # negative
41 | self._negative_index.append(k)
42 | else: # positive
43 | self._positive_index.append(k)
44 |
45 | self.compression_mode = '.jpg'
46 | self.encode_params = [cv2.IMWRITE_JPEG_QUALITY, encode_quality]
47 |
48 | @property
49 | def positive_index(self):
50 | return self._positive_index
51 |
52 | @property
53 | def negative_index(self):
54 | return self._negative_index
55 |
56 | def write(self):
57 |
58 | for data_item in self.data_adapter.get_one():
59 |
60 | temp_sample = []
61 | im, bboxes = data_item
62 | ret, buf = cv2.imencode(self.compression_mode, im, self.encode_params)
63 | if buf is None or buf.size == 0:
64 | print('buf is wrong.')
65 | continue
66 | if not ret:
67 | print('An error is occurred while com:pression.')
68 | continue
69 | temp_sample.append(buf)
70 |
71 | if isinstance(bboxes, str): # 负样本
72 | temp_sample.append(0)
73 | temp_sample.append(int(bboxes))
74 | else:
75 | temp_sample.append(1)
76 | temp_sample.append(bboxes)
77 |
78 | self.data[self.counter] = temp_sample
79 | print('Successfully save the %d-th data item.' % self.counter)
80 | self.counter += 1
81 |
82 | pickle.dump(self.data, open(self.pickle_file_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
83 |
84 | def read_by_index(self, index):
85 | im_buf, flag, bboxes = self.data[index]
86 | im = cv2.imdecode(im_buf, cv2.IMREAD_COLOR)
87 | return im, flag, bboxes
88 |
89 |
90 | def write_file():
91 | data_list_file_path = './data_folder/data_list_2019-05-07-14-47-19.txt'
92 | LFPD_adapter = TextListAdapter(data_list_file_path)
93 |
94 | pickle_file_path = './data_folder/data_2019-05-07-14-47-19.pkl'
95 | encode_quality = 90
96 | LFPD_packer = PickleProvider(pickle_file_path, encode_quality, LFPD_adapter)
97 | LFPD_packer.write()
98 |
99 |
100 | def read_file():
101 | pickle_file_path = './data_folder/data_2019-05-07-14-47-19.pkl'
102 |
103 | LFPD_provider = PickleProvider(pickle_file_path)
104 | positive_index = LFPD_provider.positive_index
105 | negative_index = LFPD_provider.negative_index
106 | print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index)))
107 | # all_index = positive_index+negative_index
108 | import random
109 | random.shuffle(positive_index)
110 |
111 | for i, index in enumerate(positive_index):
112 | im, flag, bboxes_numpy = LFPD_provider.read_by_index(index)
113 | if isinstance(bboxes_numpy, numpy.ndarray):
114 | for n in range(bboxes_numpy.shape[0]):
115 | cv2.rectangle(im, (bboxes_numpy[n, 0], bboxes_numpy[n, 1]),
116 | (bboxes_numpy[n, 0] + bboxes_numpy[n, 2], bboxes_numpy[n, 1] + bboxes_numpy[n, 3]), (0, 255, 0), 1)
117 | cv2.imshow('im', im)
118 | cv2.waitKey()
119 |
120 |
121 | if __name__ == '__main__':
122 | pass
123 | write_file()
124 | # read_file()
125 | # generate_rec_20181202()
126 |
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/text_list_adapter.py:
--------------------------------------------------------------------------------
1 | '''
2 | This adapter accepts a text as input which describes the annotated data.
3 | Each line in text are formatted as:
4 | [image absolute path],[pos/neg flag],[num of bboxes],[x1],[y1],[width1],[height1],[x2],[y2],[width2],[height2]......
5 | '''
6 |
7 | import cv2
8 | import numpy
9 | from .base_data_adapter import DataAdapterBaseclass
10 |
11 |
12 | class TextListAdapter(DataAdapterBaseclass):
13 |
14 | def __init__(self, data_list_file_path):
15 |
16 | DataAdapterBaseclass.__init__(self)
17 | fin = open(data_list_file_path, 'r')
18 | self.lines = fin.readlines()
19 | fin.close()
20 | self.line_counter = 0
21 |
22 | def __del__(self):
23 | pass
24 |
25 | def get_one(self):
26 | """
27 | This function use 'yield' to return samples
28 | """
29 | while self.line_counter < len(self.lines):
30 |
31 | line = self.lines[self.line_counter].strip('\n').split(',')
32 | if line[1] == '1': # 如果是正样本,需要校验bbox的个数是否一样
33 | assert len(line[3:]) == 4 * int(line[2])
34 |
35 | im = cv2.imread(line[0], cv2.IMREAD_UNCHANGED)
36 |
37 | if line[1] == '0':
38 | yield im, '0'
39 | self.line_counter += 1
40 | continue
41 |
42 | num_bboxes = int(line[2])
43 | bboxes = []
44 | for i in range(num_bboxes):
45 | x = float(line[3 + i * 4])
46 | y = float(line[3 + i * 4 + 1])
47 | width = float(line[3 + i * 4 + 2])
48 | height = float(line[3 + i * 4 + 3])
49 |
50 | bboxes.append([x, y, width, height])
51 |
52 | bboxes = numpy.array(bboxes, dtype=numpy.float32)
53 | yield im, bboxes
54 |
55 | self.line_counter += 1
56 |
57 |
58 | if __name__ == '__main__':
59 | pass
60 |
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/image_augmentation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/image_augmentation/__init__.py
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/image_augmentation/augmentor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | This module provides many types of image augmentation. One can choose appropriate augmentation for
4 | detection, segmentation and classification.
5 | """
6 | import cv2
7 | import numpy
8 | import random
9 |
10 |
11 | class Augmentor(object):
12 | """
13 | All augmentation operations are static methods of this class.
14 | """
15 |
16 | def __init__(self):
17 | pass
18 |
19 | @staticmethod
20 | def histogram_equalisation(image):
21 | """
22 | do histogram equlisation for grayscale image
23 | :param image: input image with single channel 8bits
24 | :return: processed image
25 | """
26 | if image.ndim != 2:
27 | print('Input image is not grayscale!')
28 | return None
29 | if image.dtype != numpy.uint8:
30 | print('Input image is not uint8!')
31 | return None
32 |
33 | result = cv2.equalizeHist(image)
34 | return result
35 |
36 | @staticmethod
37 | def grayscale(image):
38 | """
39 | convert BGR image to grayscale image
40 | :param image: input image with BGR channels
41 | :return:
42 | """
43 | if image.ndim != 3:
44 | return None
45 | if image.dtype != numpy.uint8:
46 | print('Input image is not uint8!')
47 | return None
48 |
49 | result = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
50 | return result
51 |
52 | @staticmethod
53 | def inversion(image):
54 | """
55 | invert the image (255-)
56 | :param image: input image with BGR or grayscale
57 | :return:
58 | """
59 | if image.dtype != numpy.uint8:
60 | print('Input image is not uint8!')
61 | return None
62 |
63 | result = 255 - image
64 | return result
65 |
66 | @staticmethod
67 | def binarization(image, block_size=5, C=10):
68 | """
69 | convert input image to binary image
70 | cv2.adaptiveThreshold is used, for detailed information, refer to opencv docs
71 | :param image:
72 | :return:
73 | """
74 | if image.ndim == 3:
75 | image_grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
76 | else:
77 | image_grayscale = image
78 |
79 | binary_image = cv2.adaptiveThreshold(image_grayscale, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
80 | cv2.THRESH_BINARY, block_size, C)
81 | return binary_image
82 |
83 | @staticmethod
84 | def brightness(image, min_factor=0.5, max_factor=1.5):
85 | '''
86 | adjust the image brightness
87 | :param image:
88 | :param min_factor:
89 | :param max_factor:
90 | :return:
91 | '''
92 | if image.dtype != numpy.uint8:
93 | print('Input image is not uint8!')
94 | return None
95 |
96 | factor = numpy.random.uniform(min_factor, max_factor)
97 | result = image * factor
98 | if factor > 1:
99 | result[result > 255] = 255
100 | result = result.astype(numpy.uint8)
101 | return result
102 |
103 | @staticmethod
104 | def saturation(image, min_factor=0.5, max_factor=1.5):
105 | '''
106 | adjust the image saturation
107 | :param image:
108 | :param min_factor:
109 | :param max_factor:
110 | :return:
111 | '''
112 | if image.dtype != numpy.uint8:
113 | print('Input image is not uint8!')
114 | return None
115 |
116 | image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
117 | factor = numpy.random.uniform(min_factor, max_factor)
118 |
119 | result = numpy.zeros(image.shape, dtype=numpy.float32)
120 | result[:, :, 0] = image[:, :, 0] * factor + image_gray * (1 - factor)
121 | result[:, :, 1] = image[:, :, 1] * factor + image_gray * (1 - factor)
122 | result[:, :, 2] = image[:, :, 2] * factor + image_gray * (1 - factor)
123 | result[result > 255] = 255
124 | result[result < 0] = 0
125 | result = result.astype(numpy.uint8)
126 | return result
127 |
128 | @staticmethod
129 | def contrast(image, min_factor=0.5, max_factor=1.5):
130 | '''
131 | adjust the image contrast
132 | :param image:
133 | :param min_factor:
134 | :param max_factor:
135 | :return:
136 | '''
137 | if image.dtype != numpy.uint8:
138 | print('Input image is not uint8!')
139 | return None
140 |
141 | image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
142 | gray_mean = numpy.mean(image_gray)
143 | temp = numpy.ones((image.shape[0], image.shape[1]), dtype=numpy.float32) * gray_mean
144 | factor = numpy.random.uniform(min_factor, max_factor)
145 |
146 | result = numpy.zeros(image.shape, dtype=numpy.float32)
147 | result[:, :, 0] = image[:, :, 0] * factor + temp * (1 - factor)
148 | result[:, :, 1] = image[:, :, 1] * factor + temp * (1 - factor)
149 | result[:, :, 2] = image[:, :, 2] * factor + temp * (1 - factor)
150 |
151 | result[result > 255] = 255
152 | result[result < 0] = 0
153 | result = result.astype(numpy.uint8)
154 |
155 | return result
156 |
157 | @staticmethod
158 | def blur(image, mode='random', kernel_size=3, sigma=1):
159 | """
160 |
161 | :param image:
162 | :param mode: options 'normalized' 'gaussian' 'median'
163 | :param kernel_size:
164 | :param sigma: used for gaussian blur
165 | :return:
166 | """
167 | if image.dtype != numpy.uint8:
168 | print('Input image is not uint8!')
169 | return None
170 |
171 | if mode == 'random':
172 | mode = random.choice(['normalized', 'gaussian', 'median'])
173 |
174 | if mode == 'normalized':
175 | result = cv2.blur(image, (kernel_size, kernel_size))
176 | elif mode == 'gaussian':
177 | result = cv2.GaussianBlur(image, (kernel_size, kernel_size), sigmaX=sigma, sigmaY=sigma)
178 | elif mode == 'median':
179 | result = cv2.medianBlur(image, kernel_size)
180 | else:
181 | print('Blur mode is not supported: %s.' % mode)
182 | result = image
183 | return result
184 |
185 | @staticmethod
186 | def rotation(image, degree=10, mode='crop', scale=1):
187 | """
188 |
189 | :param image:
190 | :param degree:
191 | :param mode: 'crop'-keep original size, 'fill'-keep full image
192 | :param scale:
193 | :return:
194 | """
195 | if image.dtype != numpy.uint8:
196 | print('Input image is not uint8!')
197 | return None
198 |
199 | h, w = image.shape[:2]
200 | center_x, center_y = w / 2, h / 2
201 | M = cv2.getRotationMatrix2D((center_x, center_y), degree, scale)
202 |
203 | if mode == 'crop':
204 | new_w, new_h = w, h
205 | else:
206 | cos = numpy.abs(M[0, 0])
207 | sin = numpy.abs(M[0, 1])
208 | new_w = int(h * sin + w * cos)
209 | new_h = int(h * cos + w * sin)
210 | M[0, 2] += (new_w / 2) - center_x
211 | M[1, 2] += (new_h / 2) - center_y
212 |
213 | result = cv2.warpAffine(image, M, (new_w, new_h))
214 | return result
215 |
216 | @staticmethod
217 | def flip(image, orientation='h'):
218 | '''
219 |
220 | :param image:
221 | :param orientation:
222 | :return:
223 | '''
224 | if image.dtype != numpy.uint8:
225 | print('Input image is not uint8!')
226 | return None
227 |
228 | if orientation == 'h':
229 | return cv2.flip(image, 1)
230 | elif orientation == 'v':
231 | return cv2.flip(image, 0)
232 | else:
233 | print('Unsupported orientation: %s.' % orientation)
234 | return image
235 |
236 | @staticmethod
237 | def resize(image, size_in_pixel=None, size_in_scale=None):
238 | """
239 |
240 | :param image:
241 | :param size_in_pixel: tuple (width, height)
242 | :param size_in_scale: tuple (width_scale, height_scale)
243 | :return:
244 | """
245 | if image.dtype != numpy.uint8:
246 | print('Input image is not uint8!')
247 | return None
248 |
249 | if size_in_pixel is not None:
250 | return cv2.resize(image, size_in_pixel)
251 | elif size_in_scale is not None:
252 | return cv2.resize(image, (0, 0), fx=size_in_scale[0], fy=size_in_scale[1])
253 | else:
254 | print('size_in_pixel and size_in_scale are both None.')
255 | return image
256 |
257 | @staticmethod
258 | def crop(image, x, y, width, height):
259 | """
260 |
261 | :param image:
262 | :param x: crop area top-left x coordinate
263 | :param y: crop area top-left y coordinate
264 | :param width: crop area width
265 | :param height: crop area height
266 | :return:
267 | """
268 | if image.dtype != numpy.uint8:
269 | print('Input image is not uint8!')
270 | return None
271 |
272 | if image.ndim == 3:
273 | return image[y:y + height, x:x + width, :]
274 | else:
275 | return image[y:y + height, x:x + width]
276 |
277 | @staticmethod
278 | def random_crop(image, width, height):
279 | """
280 |
281 | :param image:
282 | :param width: crop area width
283 | :param height: crop area height
284 | :return:
285 | """
286 | if image.dtype != numpy.uint8:
287 | print('Input image is not uint8!')
288 | return False, image
289 |
290 | w_interval = image.shape[1] - width
291 | h_interval = image.shape[0] - height
292 |
293 | if image.ndim == 3:
294 | result = numpy.zeros((height, width, 3), dtype=numpy.uint8)
295 | else:
296 | result = numpy.zeros((height, width), dtype=numpy.uint8)
297 |
298 | if w_interval >= 0 and h_interval >= 0:
299 | crop_x, crop_y = random.randint(0, w_interval), random.randint(0, h_interval)
300 | if image.ndim == 3:
301 | result = image[crop_y:crop_y + height, crop_x:crop_x + width, :]
302 | else:
303 | result = image[crop_y:crop_y + height, crop_x:crop_x + width]
304 | elif w_interval < 0 and h_interval >= 0:
305 | put_x = -w_interval / 2
306 | crop_y = random.randint(0, h_interval)
307 | if image.ndim == 3:
308 | result[:, put_x:put_x + image.shape[1], :] = image[crop_y:crop_y + height, :, :]
309 | else:
310 | result[:, put_x:put_x + image.shape[1]] = image[crop_y:crop_y + height, :]
311 | elif w_interval >= 0 and h_interval < 0:
312 | crop_x = random.randint(0, w_interval)
313 | put_y = -h_interval / 2
314 | if image.ndim == 3:
315 | result[put_y:put_y + image.shape[0], :, :] = image[:, crop_x:crop_x + width, :]
316 | else:
317 | result[put_y:put_y + image.shape[0], :] = image[:, crop_x:crop_x + width]
318 | else:
319 | put_x, put_y = -w_interval / 2, -h_interval / 2
320 | if image.ndim == 3:
321 | result[put_y:put_y + image.shape[0], put_x:put_x + image.shape[1], :] = image[:, :, :]
322 | else:
323 | result[put_y:put_y + image.shape[0], put_x:put_x + image.shape[1]] = image[:, :]
324 |
325 | return result
326 |
327 |
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/inference_speed_eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/inference_speed_eval/__init__.py
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/inference_speed_eval/inference_speed_eval_with_mxnet_cudnn.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import sys
3 | import os
4 | import time
5 | import logging
6 |
7 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '1'
8 | logging.getLogger().setLevel(logging.INFO)
9 |
10 |
11 | class InferenceSpeedEval(object):
12 | def __init__(self, symbol_file_path, mxnet_module, input_shape, input_name='data', device_type='gpu', gpu_index=0):
13 | '''
14 |
15 | :param symbol_file_path: symbol file path
16 | :param mxnet_module: mxnet module
17 | :param input_shape: input shape in tuple--(batch_size, num_channel, height, width)
18 | :param input_name: input name defined in symbol, by default 'data'
19 | :param device_type: device type: 'gpu', 'cpu'
20 | :param gpu_index: gpu index
21 | '''
22 | self.symbol_file_path = symbol_file_path
23 | self.mxnet_module = mxnet_module
24 | self.input_name = input_name
25 | self.input_shape = input_shape
26 | self.device_type = device_type
27 | if self.device_type == 'cpu': # CAUTION: x86 cpu inference needs MXNet with mkldnn, or inference speed will be very slow
28 | self.context = self.mxnet_module.cpu()
29 | elif self.device_type == 'gpu':
30 | self.context = self.mxnet_module.gpu(gpu_index)
31 | else:
32 | logging.error('Unknow device_type: %s .' % self.device_type)
33 | sys.exit(1)
34 |
35 | # load symbol file
36 | if not os.path.exists(self.symbol_file_path):
37 | logging.error('Symbol file: %s does not exist!' % symbol_file_path)
38 | sys.exit(1)
39 | self.symbol_net = self.mxnet_module.symbol.load(self.symbol_file_path)
40 |
41 | # create module
42 | self.module = self.mxnet_module.module.Module(symbol=self.symbol_net,
43 | data_names=[self.input_name],
44 | label_names=None,
45 | context=self.context)
46 | self.module.bind(data_shapes=[(self.input_name, self.input_shape)], for_training=False, grad_req='write')
47 |
48 | self.module.init_params(initializer=self.mxnet_module.initializer.Xavier(), allow_missing=True)
49 | self.module.init_optimizer(kvstore=None)
50 |
51 | def run_speed_eval(self, warm_run_loops=10, real_run_loops=100):
52 | random_input_data = [self.mxnet_module.random.uniform(-1.0, 1.0, shape=self.input_shape, ctx=self.context)]
53 | temp_batch = self.mxnet_module.io.DataBatch(random_input_data, [])
54 |
55 | # basic info of this eval
56 | logging.info('Test symbol file: %s' % self.symbol_file_path)
57 | logging.info('Test device: %s' % self.device_type)
58 | logging.info('Test input shape: %s' % str(self.input_shape))
59 |
60 | # warn run
61 | for i in range(warm_run_loops):
62 | self.module.forward(temp_batch)
63 | for output in self.module.get_outputs():
64 | output.asnumpy()
65 |
66 | logging.info('Start real run loops---------------')
67 | tic = time.time()
68 | # real run
69 | for i in range(real_run_loops):
70 | self.module.forward(temp_batch)
71 | for output in self.module.get_outputs():
72 | output.asnumpy()
73 |
74 | toc = time.time()
75 |
76 | print('Finish %d loops in %.02f ms. \n[%.02f ms] for each loop \n[%.02f ms] for each image (namely %.02f FPS)' %
77 | (real_run_loops,
78 | (toc - tic) * 1000,
79 | (toc - tic) * 1000 / real_run_loops,
80 | (toc - tic) * 1000 / real_run_loops / self.input_shape[0],
81 | real_run_loops * self.input_shape[0] / (toc - tic)))
82 |
83 |
84 | if __name__ == '__main__':
85 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python') # set MXNet python path if needed
86 | import mxnet
87 |
88 | symbol_file_path = '/home/heyonghao/projects/tocreate_LFFD_ICCV2019_FaceDetector/symbol_farm/symbol_10_560_25L_8scales_s5_v2_deploy.json'
89 | input_shape = (1, 3, 720, 1280) # (1, 3, 240, 320) (1, 3, 480, 640) (1, 3, 720, 1280) (1, 3, 1080, 1920) (1, 3, 2160, 3840) (1, 3, 4320, 7680)
90 | device_type = 'gpu'
91 | gpu_index = 0
92 |
93 | speedEval = InferenceSpeedEval(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, device_type=device_type, gpu_index=gpu_index)
94 | speedEval.run_speed_eval(warm_run_loops=10, real_run_loops=500)
95 |
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/inference_speed_eval/inference_speed_eval_with_tensorrt_cudnn.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import sys
3 | import os
4 | import time
5 | import logging
6 | import numpy
7 |
8 | import pycuda.driver as cuda
9 | import pycuda.autoinit
10 | import tensorrt as trt
11 |
12 | logging.getLogger().setLevel(logging.INFO)
13 |
14 |
15 | # Simple helper data class that's a little nicer to use than a 2-tuple.
16 | class HostDeviceMem(object):
17 | def __init__(self, host_mem, device_mem):
18 | self.host = host_mem
19 | self.device = device_mem
20 |
21 | def __str__(self):
22 | return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
23 |
24 | def __repr__(self):
25 | return self.__str__()
26 |
27 |
28 | class InferenceSpeedEval(object):
29 | def __init__(self, symbol_file_path, mxnet_module, input_shape, data_mode='fp32'):
30 |
31 | if not os.path.exists(symbol_file_path):
32 | logging.error('symbol file does not exist!')
33 | sys.exit(1)
34 |
35 | if len(input_shape) != 4:
36 | logging.error('input shape should have 4 elements in the order of NCHW.')
37 | sys.exit(1)
38 |
39 | symbol_net = mxnet_module.symbol.load(symbol_file_path)
40 | # create module
41 | module = mxnet_module.module.Module(symbol=symbol_net,
42 | data_names=['data'],
43 | label_names=None,
44 | context=mxnet_module.cpu())
45 | module.bind(data_shapes=[('data', input_shape)], for_training=False, grad_req='write')
46 | module.init_params(initializer=mxnet_module.initializer.Xavier(), allow_missing=True)
47 | arg_params, aux_params = module.get_params()
48 | net_params = dict()
49 | net_params.update(arg_params)
50 | net_params.update(aux_params)
51 | self.onnx_temp_file = 'temp.onnx'
52 | logging.info('Convert mxnet symbol to onnx...')
53 | mxnet_module.contrib.onnx.export_model(symbol_net, net_params, [input_shape], numpy.float32, self.onnx_temp_file, verbose=False)
54 |
55 | # build engine
56 | trt_logger = trt.Logger(trt.Logger.WARNING)
57 | builder = trt.Builder(trt_logger)
58 | builder.max_batch_size = input_shape[0]
59 | builder.average_find_iterations = 2
60 | builder.max_workspace_size = 2 << 30
61 |
62 | if data_mode == 'fp32':
63 | pass
64 | elif data_mode == 'fp16':
65 | if not builder.platform_has_fast_fp16:
66 | logging.error('fp16 is not supported by this platform!')
67 | sys.exit(1)
68 | builder.fp16_mode = True
69 | elif data_mode == 'int8':
70 | logging.error('Currently, not implemented yet.')
71 | sys.exit(1)
72 | if not builder.platform_has_fast_int8:
73 | logging.error('int8 is not supported by this platform!')
74 | sys.exit(1)
75 | builder.int8_mode = True
76 | else:
77 | logging.error('Unknown data_mode: %s' % data_mode)
78 | logging.error('Available choices: \'fp32\'(default), \'fp16\', \'int8\'')
79 | sys.exit(1)
80 |
81 | network = builder.create_network()
82 | parser = trt.OnnxParser(network, trt_logger)
83 | logging.info('Parsing onnx for trt network...')
84 | with open(self.onnx_temp_file, 'rb') as onnx_fin:
85 | parser.parse(onnx_fin.read())
86 |
87 | num_parser_errors = parser.num_errors
88 | if num_parser_errors != 0:
89 | logging.error('Errors occur while parsing the onnx file!')
90 | for i in range(num_parser_errors):
91 | logging.error('Error %d: %s' % (i, parser.get_error(i).desc()))
92 | sys.exit(1)
93 |
94 | logging.info('Start to build trt engine...(this step may cost much time)')
95 | time_start = time.time()
96 | self.engine = builder.build_cuda_engine(network)
97 | time_end = time.time()
98 | logging.info('Engine building time: %.02f s' % (time_end - time_start))
99 |
100 | for binding in self.engine:
101 | if self.engine.binding_is_input(binding):
102 | logging.info('Input name: %s, shape: %s' % (binding, str(self.engine.get_binding_shape(binding))))
103 |
104 | self.executor = self.engine.create_execution_context()
105 | self.max_batch_size = builder.max_batch_size
106 |
107 | def __del__(self):
108 | if os.path.exists(self.onnx_temp_file):
109 | os.remove(self.onnx_temp_file)
110 |
111 | def run_speed_eval(self, warm_run_loops=10, real_run_loops=100):
112 |
113 | def allocate_buffers(engine):
114 | inputs = []
115 | outputs = []
116 | bindings = []
117 | for binding in engine:
118 | size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
119 | dtype = trt.nptype(engine.get_binding_dtype(binding))
120 | # Allocate host and device buffers
121 | host_mem = cuda.pagelocked_empty(size, dtype)
122 | device_mem = cuda.mem_alloc(host_mem.nbytes)
123 | # Append the device buffer to device bindings.
124 | bindings.append(int(device_mem))
125 | # Append to the appropriate list.
126 | if engine.binding_is_input(binding):
127 | inputs.append(HostDeviceMem(host_mem, device_mem))
128 | else:
129 | outputs.append(HostDeviceMem(host_mem, device_mem))
130 | return inputs, outputs, bindings
131 |
132 | inputs, outputs, bindings = allocate_buffers(self.engine)
133 | # warm run
134 | for i in range(warm_run_loops):
135 | [cuda.memcpy_htod(inp.device, inp.host) for inp in inputs]
136 | self.executor.execute(batch_size=self.max_batch_size, bindings=bindings)
137 | [cuda.memcpy_dtoh(out.host, out.device) for out in outputs]
138 |
139 | # real run
140 | logging.info('Start real run loop.')
141 | sum_time_data_copy = 0.
142 | sum_time_inference_only = 0.
143 | for i in range(real_run_loops):
144 | time_start = time.time()
145 | [cuda.memcpy_htod(inp.device, inp.host) for inp in inputs]
146 | sum_time_data_copy += time.time() - time_start
147 |
148 | time_start = time.time()
149 | self.executor.execute(batch_size=self.max_batch_size, bindings=bindings)
150 | sum_time_inference_only += time.time() - time_start
151 |
152 | time_start = time.time()
153 | [cuda.memcpy_dtoh(out.host, out.device) for out in outputs]
154 | sum_time_data_copy += time.time() - time_start
155 |
156 | logging.info('Total time (data transfer & inference) elapsed: %.02f ms. [%.02f ms] for each image (%.02f PFS)'
157 | % ((sum_time_data_copy + sum_time_inference_only) * 1000,
158 | (sum_time_data_copy + sum_time_inference_only) * 1000 / real_run_loops / self.max_batch_size,
159 | real_run_loops * self.max_batch_size / (sum_time_data_copy + sum_time_inference_only)))
160 |
161 |
162 | if __name__ == '__main__':
163 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python')
164 | import mxnet
165 |
166 | symbol_file_path = '/home/heyonghao/projects/tocreate_LFFD_ICCV2019_FaceDetector/symbol_farm/symbol_10_560_25L_8scales_s5_v2_deploy.json'
167 | input_shape = (1, 3, 720, 1280) # (1, 3, 240, 320) (1, 3, 480, 640) (1, 3, 720, 1280) (1, 3, 1080, 1920) (1, 3, 2160, 3840) (1, 3, 4320, 7680)
168 |
169 | speedEval = InferenceSpeedEval(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, data_mode='fp32')
170 | speedEval.run_speed_eval(warm_run_loops=10, real_run_loops=500)
171 |
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/logging_GOCD.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | import logging
3 | import os
4 | import sys
5 |
6 | '''
7 | logging module
8 | '''
9 |
10 |
11 | def init_logging(log_file_path=None, log_file_mode='w', log_overwrite_flag=False, log_level=logging.INFO):
12 | # basically, the basic log offers console output
13 | console_handler = logging.StreamHandler()
14 | formatter = logging.Formatter('%(asctime)s[%(levelname)s]: %(message)s')
15 | console_handler.setFormatter(formatter)
16 |
17 | logging.getLogger().setLevel(log_level)
18 | logging.getLogger().addHandler(console_handler)
19 |
20 | if not log_file_path or log_file_path == '':
21 | print('No log file is specified. The log information is only displayed in console.')
22 | return
23 |
24 | # check that the log_file is already existed or not
25 | if not os.path.exists(log_file_path):
26 | location_dir = os.path.dirname(log_file_path)
27 | if not os.path.exists(location_dir):
28 | os.makedirs(location_dir)
29 |
30 | file_handler = logging.FileHandler(filename=log_file_path, mode=log_file_mode)
31 | file_handler.setFormatter(formatter)
32 | logging.getLogger().addHandler(file_handler)
33 | else:
34 | if log_overwrite_flag:
35 | print('The file [%s] is existed. And it is to be handled according to the arg [file_mode](the default is \'w\').' % log_file_path)
36 | file_handler = logging.FileHandler(filename=log_file_path, mode=log_file_mode)
37 | file_handler.setFormatter(formatter)
38 | logging.getLogger().addHandler(file_handler)
39 | else:
40 | print('The file [%s] is existed. The [overwrite_flag] is False, please change the log file name.')
41 | sys.exit(0)
42 |
43 |
44 | def temp_test():
45 | log_file = './test.log'
46 | file_mode = 'w'
47 | init_logging(log_file_path=log_file, log_file_mode=file_mode, log_overwrite_flag=True, log_level=logging.DEBUG)
48 |
49 |
50 | if __name__ == '__main__':
51 | temp_test()
52 | logging.info('test info')
53 |
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/__init__.py
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/cross_entropy_with_focal_loss_for_one_class_detection.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @date : 19-1-23
3 | # @author : MindBreaker
4 | # @module :
5 |
6 | import mxnet as mx
7 | import numpy as np
8 | import logging
9 |
10 |
11 | class focal_loss_for_twoclass(mx.operator.CustomOp):
12 | '''
13 | 1, the in_data[0], namely the pred, must be applied with softmax before running this loss operator
14 | 2, this CE operator is only for two-class situation, the 0-index indicates pos(foreground), and the 1-index is for neg(background)
15 | '''
16 |
17 | def __init__(self, alpha=0.25, gamma=2):
18 | super(focal_loss_for_twoclass, self).__init__()
19 | self.alpha = alpha
20 | self.gamma = gamma
21 |
22 | def forward(self, is_train, req, in_data, out_data, aux):
23 | pred = in_data[0]
24 | label = in_data[1]
25 | pred_softmax = mx.ndarray.softmax(pred, axis=1)
26 | pred_log = mx.ndarray.log(pred_softmax)
27 | cross_entropy = - label * pred_log
28 |
29 | self.assign(out_data[0], req[0], cross_entropy)
30 |
31 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
32 | pred = in_data[0]
33 | label = in_data[1]
34 | mask = in_data[2]
35 |
36 | pred_softmax = mx.ndarray.softmax(pred, axis=1)
37 |
38 | # print('pos mean prob:', mx.ndarray.mean(pred_softmax[:, 0, :, :][label[:, 0, :, :] > 0.5]).asnumpy())
39 | # print('neg mean prob:', mx.ndarray.mean(pred_softmax[:, 1, :, :][label[:, 1, :, :] > 0.5]).asnumpy())
40 |
41 | # pos_flag = label[:, 0, :, :] > 0.5
42 | # neg_flag = label[:, 1, :, :] > 0.5
43 |
44 | FL_gradient = -self.gamma * mx.ndarray.power(1 - pred_softmax, self.gamma - 1) * mx.ndarray.log(pred_softmax) * pred_softmax + mx.ndarray.power(1 - pred_softmax, self.gamma)
45 |
46 | FL_gradient[:, 0, :, :] *= self.alpha
47 | FL_gradient[:, 1, :, :] *= 1 - self.alpha
48 |
49 | FL_gradient *= (pred_softmax-label)
50 |
51 | FL_gradient /= mx.ndarray.sum(mask).asnumpy()[0]
52 | # print('mean grad:', mx.ndarray.mean(mx.ndarray.abs(FL_gradient)).asnumpy())
53 |
54 | self.assign(in_grad[0], req[0], FL_gradient)
55 |
56 |
57 | @mx.operator.register("focal_loss_for_twoclass")
58 | class focal_loss_for_twoclass_Prop(mx.operator.CustomOpProp):
59 | def __init__(self):
60 | super(focal_loss_for_twoclass_Prop, self).__init__(need_top_grad=False)
61 |
62 | def list_arguments(self):
63 | return ['pred', 'label', 'mask']
64 |
65 | def list_outputs(self):
66 | return ['output']
67 |
68 | def infer_shape(self, in_shape):
69 | data_shape = in_shape[0]
70 | label_shape = in_shape[0]
71 | mask_shape = in_shape[0]
72 | output_shape = in_shape[0]
73 | return [data_shape, label_shape, mask_shape], [output_shape], []
74 |
75 | def create_operator(self, ctx, shapes, dtypes):
76 | return focal_loss_for_twoclass()
77 |
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/cross_entropy_with_hnm_for_one_class_detection.py:
--------------------------------------------------------------------------------
1 | import mxnet as mx
2 |
3 |
4 | class cross_entropy_with_hnm_for_one_class_detection(mx.operator.CustomOp):
5 |
6 | def __init__(self, hnm_ratio):
7 | super(cross_entropy_with_hnm_for_one_class_detection, self).__init__()
8 | self.hnm_ratio = int(hnm_ratio)
9 |
10 | def forward(self, is_train, req, in_data, out_data, aux):
11 | pred = in_data[0]
12 | label = in_data[1]
13 | pred_softmax = mx.ndarray.softmax(pred, axis=1)
14 | pred_log = mx.ndarray.log(pred_softmax)
15 | cross_entropy = - label * pred_log
16 |
17 | self.assign(out_data[0], req[0], cross_entropy)
18 |
19 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
20 | pred = in_data[0]
21 | label = in_data[1]
22 | mask = in_data[2]
23 |
24 | pred_softmax = mx.ndarray.softmax(pred, axis=1)
25 | CE_gradient = pred_softmax - label # Standard CE gradient
26 | loss_mask = mx.ndarray.ones((CE_gradient.shape[0], 1, CE_gradient.shape[2], CE_gradient.shape[3]), ctx=CE_gradient.context)
27 |
28 | if self.hnm_ratio > 0:
29 | pos_flag = (label[:, 0, :, :] > 0.5)
30 | pos_num = mx.ndarray.sum(pos_flag).asnumpy()[0] # 得到正样本的个数
31 |
32 | if pos_num > 0:
33 | neg_flag = (label[:, 1, :, :] > 0.5)
34 | neg_num = mx.ndarray.sum(neg_flag).asnumpy()[0]
35 | neg_num_selected = min(int(self.hnm_ratio * pos_num), int(neg_num))
36 | neg_prob = pred_softmax[:, 1, :, :] * neg_flag # non-negative value
37 | neg_prob_sort = mx.ndarray.sort(neg_prob.reshape((1, -1)), is_ascend=True)
38 |
39 | prob_threshold = neg_prob_sort[0][neg_num_selected].asnumpy()[0]
40 | neg_grad_flag = (neg_prob <= prob_threshold)
41 | loss_mask = mx.ndarray.logical_or(neg_grad_flag, pos_flag)
42 | else:
43 | neg_choice_ratio = 0.1
44 | neg_num_selected = int(pred_softmax[:, 1, :, :].size * neg_choice_ratio)
45 | neg_prob = pred_softmax[:, 1, :, :]
46 | neg_prob_sort = mx.ndarray.sort(neg_prob.reshape((1, -1)), is_ascend=True)
47 | prob_threshold = neg_prob_sort[0][neg_num_selected].asnumpy()[0]
48 | loss_mask = (neg_prob <= prob_threshold)
49 |
50 | for i in range(CE_gradient.shape[1]):
51 | CE_gradient[:, i, :, :] *= loss_mask * mask[:, i, :, :]
52 |
53 | CE_gradient /= mx.ndarray.sum(loss_mask).asnumpy()[0]
54 |
55 | self.assign(in_grad[0], req[0], CE_gradient)
56 |
57 |
58 | @mx.operator.register("cross_entropy_with_hnm_for_one_class_detection")
59 | class cross_entropy_with_hnm_for_one_class_detection_Prop(mx.operator.CustomOpProp):
60 | def __init__(self, hnm_ratio=5):
61 | super(cross_entropy_with_hnm_for_one_class_detection_Prop, self).__init__(need_top_grad=False)
62 | self.hnm_ratio = hnm_ratio
63 |
64 | def list_arguments(self):
65 | return ['pred', 'label', 'mask']
66 |
67 | def list_outputs(self):
68 | return ['output']
69 |
70 | def infer_shape(self, in_shape):
71 | data_shape = in_shape[0]
72 | label_shape = in_shape[0]
73 | mask_shape = in_shape[0]
74 | output_shape = in_shape[0]
75 | return [data_shape, label_shape, mask_shape], [output_shape], []
76 |
77 | def create_operator(self, ctx, shapes, dtypes):
78 | return cross_entropy_with_hnm_for_one_class_detection(self.hnm_ratio)
79 |
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/loss.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 |
7 | class cross_entropy_with_hnm_for_one_class_detection2(nn.Module):
8 | def __init__(self, hnm_ratio, num_output_scales):
9 | super(cross_entropy_with_hnm_for_one_class_detection, self).__init__()
10 | self.hnm_ratio = int(hnm_ratio)
11 | self.num_output_scales = num_output_scales
12 |
13 | def forward(self, outputs, targets):
14 | loss_branch_list = []
15 | for i in range(self.num_output_scales):
16 | pred_score = outputs[i * 2]
17 | pred_bbox = outputs[i * 2 + 1]
18 | gt_mask = targets[i * 2].cuda()
19 | gt_label = targets[i * 2 + 1].cuda()
20 |
21 | pred_score_softmax = torch.softmax(pred_score, dim=1)
22 | # loss_mask = torch.ones(pred_score_softmax.shape[0],
23 | # 1,
24 | # pred_score_softmax.shape[2],
25 | # pred_score_softmax.shape[3])
26 | loss_mask = torch.ones(pred_score_softmax.shape)
27 |
28 | if self.hnm_ratio > 0:
29 | # print('gt_label.shape:', gt_label.shape)
30 | # print('gt_label.size():', gt_label.size())
31 | pos_flag = (gt_label[:, 0, :, :] > 0.5)
32 | pos_num = torch.sum(pos_flag) # get num. of positive examples
33 |
34 | if pos_num > 0:
35 | neg_flag = (gt_label[:, 1, :, :] > 0.5)
36 | neg_num = torch.sum(neg_flag)
37 | neg_num_selected = min(int(self.hnm_ratio * pos_num), int(neg_num))
38 | # non-negative value
39 | neg_prob = torch.where(neg_flag, pred_score_softmax[:, 1, :, :], \
40 | torch.zeros_like(pred_score_softmax[:, 1, :, :]))
41 | neg_prob_sort, _ = torch.sort(neg_prob.reshape(1, -1), descending=False)
42 |
43 | prob_threshold = neg_prob_sort[0][neg_num_selected-1]
44 | neg_grad_flag = (neg_prob <= prob_threshold)
45 | loss_mask = torch.cat([pos_flag.unsqueeze(1), neg_grad_flag.unsqueeze(1)], dim=1)
46 | else:
47 | neg_choice_ratio = 0.1
48 | neg_num_selected = int(pred_score_softmax[:, 1, :, :].numel() * neg_choice_ratio)
49 | neg_prob = pred_score_softmax[:, 1, :, :]
50 | neg_prob_sort, _ = torch.sort(neg_prob.reshape(1, -1), descending=False)
51 | prob_threshold = neg_prob_sort[0][neg_num_selected-1]
52 | neg_grad_flag = (neg_prob <= prob_threshold)
53 | loss_mask = torch.cat([pos_flag.unsqueeze(1), neg_grad_flag.unsqueeze(1)], dim=1)
54 |
55 | # cross entropy with mask
56 | pred_score_softmax_masked = pred_score_softmax[loss_mask]
57 | pred_score_log = torch.log(pred_score_softmax_masked)
58 | score_cross_entropy = -gt_label[:, :2, :, :][loss_mask] * pred_score_log
59 | loss_score = torch.sum(score_cross_entropy) / score_cross_entropy.numel()
60 |
61 | mask_bbox = gt_mask[:, 2:6, :, :]
62 | if torch.sum(mask_bbox) == 0:
63 | loss_bbox = torch.zeros_like(loss_score)
64 | else:
65 | predict_bbox = pred_bbox * mask_bbox
66 | label_bbox = gt_label[:, 2:6, :, :] * mask_bbox
67 | loss_bbox = F.mse_loss(predict_bbox, label_bbox, reduction='mean')
68 | # loss_bbox = F.smooth_l1_loss(predict_bbox, label_bbox, reduction='mean')
69 | # loss_bbox = torch.nn.MSELoss(predict_bbox, label_bbox, size_average=True, reduce=True)
70 | # loss_bbox = torch.nn.SmoothL1Loss(predict_bbox, label_bbox, size_average=True, reduce=True)
71 |
72 | loss_branch = loss_score + loss_bbox
73 | loss_branch_list.append(loss_branch)
74 | return loss_branch_list
75 |
76 |
77 | class cross_entropy_with_hnm_for_one_class_detection(nn.Module):
78 | def __init__(self, hnm_ratio, num_output_scales):
79 | super(cross_entropy_with_hnm_for_one_class_detection, self).__init__()
80 | self.hnm_ratio = int(hnm_ratio)
81 | self.num_output_scales = num_output_scales
82 |
83 | def forward(self, outputs, targets):
84 | loss_cls = 0
85 | loss_reg = 0
86 | loss_branch = []
87 | for i in range(self.num_output_scales):
88 | pred_score = outputs[i * 2]
89 | pred_bbox = outputs[i * 2 + 1]
90 | gt_mask = targets[i * 2].cuda()
91 | gt_label = targets[i * 2 + 1].cuda()
92 |
93 | pred_score_softmax = torch.softmax(pred_score, dim=1)
94 | # loss_mask = torch.ones(pred_score_softmax.shape[0],
95 | # 1,
96 | # pred_score_softmax.shape[2],
97 | # pred_score_softmax.shape[3])
98 | loss_mask = torch.ones(pred_score_softmax.shape)
99 |
100 | if self.hnm_ratio > 0:
101 | # print('gt_label.shape:', gt_label.shape)
102 | # print('gt_label.size():', gt_label.size())
103 | pos_flag = (gt_label[:, 0, :, :] > 0.5)
104 | pos_num = torch.sum(pos_flag) # get num. of positive examples
105 |
106 | if pos_num > 0:
107 | neg_flag = (gt_label[:, 1, :, :] > 0.5)
108 | neg_num = torch.sum(neg_flag)
109 | neg_num_selected = min(int(self.hnm_ratio * pos_num), int(neg_num))
110 | # non-negative value
111 | neg_prob = torch.where(neg_flag, pred_score_softmax[:, 1, :, :], \
112 | torch.zeros_like(pred_score_softmax[:, 1, :, :]))
113 | neg_prob_sort, _ = torch.sort(neg_prob.reshape(1, -1), descending=False)
114 |
115 | prob_threshold = neg_prob_sort[0][neg_num_selected-1]
116 | neg_grad_flag = (neg_prob <= prob_threshold)
117 | loss_mask = torch.cat([pos_flag.unsqueeze(1), neg_grad_flag.unsqueeze(1)], dim=1)
118 | else:
119 | neg_choice_ratio = 0.1
120 | neg_num_selected = int(pred_score_softmax[:, 1, :, :].numel() * neg_choice_ratio)
121 | neg_prob = pred_score_softmax[:, 1, :, :]
122 | neg_prob_sort, _ = torch.sort(neg_prob.reshape(1, -1), descending=False)
123 | prob_threshold = neg_prob_sort[0][neg_num_selected-1]
124 | neg_grad_flag = (neg_prob <= prob_threshold)
125 | loss_mask = torch.cat([pos_flag.unsqueeze(1), neg_grad_flag.unsqueeze(1)], dim=1)
126 |
127 | # cross entropy with mask
128 | pred_score_softmax_masked = pred_score_softmax[loss_mask]
129 | pred_score_log = torch.log(pred_score_softmax_masked)
130 | score_cross_entropy = -gt_label[:, :2, :, :][loss_mask] * pred_score_log
131 | loss_score = torch.sum(score_cross_entropy) / score_cross_entropy.numel()
132 |
133 | mask_bbox = gt_mask[:, 2:6, :, :]
134 | if torch.sum(mask_bbox) == 0:
135 | loss_bbox = torch.zeros_like(loss_score)
136 | else:
137 | predict_bbox = pred_bbox * mask_bbox
138 | label_bbox = gt_label[:, 2:6, :, :] * mask_bbox
139 | loss_bbox = F.mse_loss(predict_bbox, label_bbox, reduction='sum') / torch.sum(mask_bbox)
140 | # loss_bbox = F.smooth_l1_loss(predict_bbox, label_bbox, reduction='sum') / torch.sum(mask_bbox)
141 | # loss_bbox = torch.nn.MSELoss(predict_bbox, label_bbox, size_average=False, reduce=True)
142 | # loss_bbox = torch.nn.SmoothL1Loss(predict_bbox, label_bbox, size_average=False, reduce=True)
143 |
144 | loss_cls += loss_score
145 | loss_reg += loss_bbox
146 | loss_branch.append(loss_score)
147 | loss_branch.append(loss_bbox)
148 | loss = loss_cls + loss_reg
149 | return loss, loss_branch
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/mean_squared_error_with_hnm_for_one_class_detection.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | '''
3 | squared error with hard negative mining
4 | '''
5 | import mxnet as mx
6 |
7 |
8 | class mean_squared_error_with_hnm_for_one_class_detection(mx.operator.CustomOp):
9 | def __init__(self, hnm_ratio):
10 | super(mean_squared_error_with_hnm_for_one_class_detection, self).__init__()
11 | self.hnm_ratio = int(hnm_ratio)
12 |
13 | def forward(self, is_train, req, in_data, out_data, aux):
14 | pred = in_data[0]
15 | self.assign(out_data[0], req[0], pred)
16 |
17 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
18 | pred = in_data[0]
19 | label = in_data[1]
20 | loss = pred - label # Standard gradient in MXNET for Regression loss.
21 | if self.hnm_ratio != 0:
22 | pos_flag = (label > 0)
23 | pos_num = mx.ndarray.sum(pos_flag).asnumpy()[0] # 得到正样本的个数
24 | if pos_num > 0:
25 | neg_flag = (label < 0.0001)
26 | neg_num = mx.ndarray.sum(neg_flag).asnumpy()[0]
27 | neg_num_selected = min(int(self.hnm_ratio * pos_num), int(neg_num))
28 | neg_loss = mx.ndarray.abs(loss * neg_flag) # non-negative value
29 | neg_loss_tem = mx.ndarray.sort(neg_loss.reshape((1, -1)), is_ascend=False)
30 |
31 | top_loss_min = neg_loss_tem[0][neg_num_selected].asnumpy()[0]
32 | neg_loss_flag = (neg_loss >= top_loss_min)
33 | loss_mask = mx.ndarray.logical_or(neg_loss_flag, pos_flag)
34 | else:
35 | neg_choice_ratio = 0.1
36 | neg_num_selected = int(loss.size * neg_choice_ratio)
37 | loss_abs = mx.ndarray.abs(loss)
38 | neg_loss_tem = mx.ndarray.sort(loss_abs.reshape((1, -1)), is_ascend=False)
39 | top_loss_min = neg_loss_tem[0][neg_num_selected].asnumpy()[0]
40 | # logging.info('top_loss_min:%0.4f', top_loss_min)
41 | loss_mask = (loss_abs >= top_loss_min)
42 |
43 | # logging.info('remained_num:%d', mx.ndarray.sum(mask).asnumpy()[0])
44 |
45 | loss *= loss_mask
46 | loss /= loss[0].size
47 | self.assign(in_grad[0], req[0], loss)
48 |
49 |
50 | @mx.operator.register("mean_squared_error_with_hnm_for_one_class_detection")
51 | class mean_squared_error_with_hnm_for_one_class_detection_Prop(mx.operator.CustomOpProp):
52 | def __init__(self, hnm_ratio=10):
53 | super(mean_squared_error_with_hnm_for_one_class_detection_Prop, self).__init__(need_top_grad=False)
54 | self.hnm_ratio = hnm_ratio
55 |
56 | def list_arguments(self):
57 | return ['pred', 'label', 'mask']
58 |
59 | def list_outputs(self):
60 | return ['output']
61 |
62 | def infer_shape(self, in_shape):
63 | data_shape = in_shape[0]
64 | label_shape = in_shape[0]
65 | mask_shape = in_shape[0]
66 | output_shape = in_shape[0]
67 | return [data_shape, label_shape, mask_shape], [output_shape], []
68 |
69 | def create_operator(self, ctx, shapes, dtypes):
70 | return mean_squared_error_with_hnm_for_one_class_detection(self.hnm_ratio)
71 |
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/mean_squared_error_with_ohem_for_one_class_detection.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | '''
3 | squared error with online hard example mining
4 | '''
5 | import mxnet as mx
6 |
7 |
8 | class mean_squared_error_with_ohem_for_one_class_detection(mx.operator.CustomOp):
9 | def __init__(self, ohem_ratio):
10 | super(mean_squared_error_with_ohem_for_one_class_detection, self).__init__()
11 | self.ohem_ratio = ohem_ratio
12 |
13 | def forward(self, is_train, req, in_data, out_data, aux):
14 | pred = in_data[0]
15 | self.assign(out_data[0], req[0], pred)
16 |
17 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
18 | pred = out_data[0]
19 | label = in_data[1]
20 | loss = pred - label
21 |
22 | # perform OHEM
23 | num_select = int(label.size * self.ohem_ratio)
24 | loss_abs = mx.nd.abs(loss)
25 | loss_sort = mx.nd.sort(loss_abs.reshape((1, -1)), is_ascend=False)
26 | min_threshold = loss_sort[0][num_select].asnumpy()[0]
27 | select_flag = loss_abs >= min_threshold
28 | loss *= select_flag
29 | loss /= num_select
30 |
31 | self.assign(in_grad[0], req[0], loss)
32 |
33 |
34 | @mx.operator.register("mean_squared_error_with_ohem_for_one_class_detection")
35 | class mean_squared_error_with_ohem_for_one_class_detection_Prop(mx.operator.CustomOpProp):
36 | def __init__(self, ohem_ratio=0.25):
37 | super(mean_squared_error_with_ohem_for_one_class_detection_Prop, self).__init__(need_top_grad=False)
38 | self.ohem_ratio = ohem_ratio
39 |
40 | def list_arguments(self):
41 | return ['pred', 'label']
42 |
43 | def list_outputs(self):
44 | return ['output']
45 |
46 | def infer_shape(self, in_shape):
47 | pred_shape = in_shape[0]
48 | label_shape = in_shape[0]
49 | output_shape = in_shape[0]
50 | return [pred_shape, label_shape], [output_shape], []
51 |
52 | def create_operator(self, ctx, shapes, dtypes):
53 | return mean_squared_error_with_ohem_for_one_class_detection(self.ohem_ratio)
54 |
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/solver_GOCD.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os
3 | import logging
4 | import time
5 |
6 |
7 | class Solver(object):
8 | def __init__(self,
9 | task_name,
10 | torch_module,
11 | trainset_dataiter,
12 | net,
13 | net_initializer,
14 | optimizer,
15 | lr_scheduler,
16 | gpu_id_list,
17 | num_train_loops,
18 | loss_criterion,
19 | train_metric,
20 | display_interval=10,
21 | val_evaluation_interval=100,
22 | valset_dataiter=None,
23 | val_metric=None,
24 | num_val_loops=0,
25 | pretrained_model_param_path=None,
26 | save_prefix=None,
27 | start_index=0,
28 | model_save_interval=None,
29 | train_metric_update_frequency=1):
30 | self.task_name = task_name
31 | self.torch_module = torch_module
32 | self.trainset_dataiter = trainset_dataiter
33 | self.valset_dataiter = valset_dataiter
34 | self.net = net
35 | self.net_initializer = net_initializer
36 | self.gpu_id_list = gpu_id_list
37 | self.optimizer = optimizer
38 | self.lr_scheduler = lr_scheduler
39 | self.num_train_loops = num_train_loops
40 | self.num_val_loops = num_val_loops
41 | self.loss_criterion = loss_criterion
42 | self.train_metric = train_metric
43 | self.val_metric = val_metric
44 | self.display_interval = display_interval
45 | self.val_evaluation_interval = val_evaluation_interval
46 | self.save_prefix = save_prefix
47 | self.start_index = start_index
48 | self.pretrained_model_param_path = pretrained_model_param_path
49 | self.model_save_interval = model_save_interval
50 |
51 | self.train_metric_update_frequency = \
52 | train_metric_update_frequency if train_metric_update_frequency <= \
53 | display_interval else display_interval
54 |
55 | def fit(self):
56 | logging.info('Start training in gpu %s.-----------', str(self.gpu_id_list))
57 | sum_time = 0
58 | for i in range(self.start_index + 1, self.num_train_loops + 1):
59 | start = time.time()
60 | batch = self.trainset_dataiter.next()
61 | images = batch.data[0].cuda()
62 | targets = batch.label
63 |
64 | images = (images - 127.5) / 127.5
65 |
66 | outputs = self.net(images)
67 |
68 | loss, loss_branch = self.loss_criterion(outputs, targets)
69 |
70 | # update parameters------------------------------------------------
71 | self.optimizer.zero_grad()
72 | loss.backward()
73 | self.optimizer.step()
74 | self.lr_scheduler.step()
75 |
76 | """the train_metric need to debug"""
77 | # display training process----------------------------------------
78 | if i % self.train_metric_update_frequency == 0:
79 | self.train_metric.update(loss_branch)
80 |
81 | sum_time += (time.time() - start)
82 |
83 | if i % self.display_interval == 0:
84 | names, values = self.train_metric.get()
85 |
86 | logging.info('Iter[%d] -- Time elapsed: %.1f s. Speed: %.1f images/s.',
87 | i, sum_time, self.display_interval * \
88 | self.trainset_dataiter.get_batch_size() / sum_time)
89 | for name, value in zip(names, values):
90 | logging.info('%s: --> %.4f', name, value)
91 | logging.info('total loss = %.4f', loss * 10000)
92 |
93 | self.train_metric.reset()
94 | sum_time = 0
95 |
96 | # evaluate the validation set
97 | if i % self.val_evaluation_interval == 0 and self.num_val_loops:
98 | with torch.no_grad():
99 | logging.info('Start validating---------------------------')
100 | for val_loop in range(self.num_val_loops):
101 | val_batch = self.valset_dataiter.next()
102 | val_images = val_batch[0].cuda()
103 | val_targets = val_batch[1:].cuda()
104 |
105 | val_outputs = self.net(val_images)
106 |
107 | self.val_metric.update(val_outputs, val_targets)
108 |
109 | names, values = self.val_metric.get()
110 | logging.info('Iter[%d] validation metric -------------', i)
111 | for name, value in zip(names, values):
112 | logging.info('%s: --> %.4f', name, value)
113 | logging.info('End validating ----------------------------')
114 | self.val_metric.reset()
115 |
116 | # save model-----------------------------------------------------
117 | if i % self.model_save_interval == 0:
118 | torch.save(self.net.state_dict(),
119 | self.save_prefix + '/' + self.task_name + \
120 | '_{}.pth'.format(lr_scheduler._step_count))
--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/train_GOCD.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import logging
3 |
4 |
5 | def start_train(param_dict,
6 | task_name,
7 | torch_module,
8 | gpu_id_list,
9 | train_dataiter,
10 | train_metric,
11 | train_metric_update_frequency,
12 | num_train_loops,
13 | val_dataiter,
14 | val_metric,
15 | num_val_loops,
16 | validation_interval,
17 | optimizer,
18 | lr_scheduler,
19 | net,
20 | net_initializer,
21 | loss_criterion,
22 | pretrained_model_param_path,
23 | display_interval,
24 | save_prefix,
25 | model_save_interval,
26 | start_index
27 | ):
28 |
29 | logging.info('PyTorch Version: %s', str(torch_module.__version__))
30 | logging.info('Training settings:-----------------------------------------------------------------')
31 | for param_name, param_value in param_dict.items():
32 | logging.info(param_name + ':' + str(param_value))
33 | logging.info('-----------------------------------------------------------------------------------')
34 |
35 | # init Solver module-------------------------------------------------------------------------------------
36 | from .solver_GOCD import Solver
37 |
38 | solver = Solver(
39 | task_name=task_name,
40 | torch_module=torch_module,
41 | trainset_dataiter=train_dataiter,
42 | net=net,
43 | net_initializer=net_initializer,
44 | optimizer=optimizer,
45 | lr_scheduler=lr_scheduler,
46 | gpu_id_list=gpu_id_list,
47 | num_train_loops=num_train_loops,
48 | loss_criterion=loss_criterion,
49 | train_metric=train_metric,
50 | display_interval=display_interval,
51 | val_evaluation_interval=validation_interval,
52 | valset_dataiter=val_dataiter,
53 | val_metric=val_metric,
54 | num_val_loops=num_val_loops,
55 | pretrained_model_param_path=pretrained_model_param_path,
56 | save_prefix=save_prefix,
57 | start_index=start_index,
58 | model_save_interval=model_save_interval,
59 | train_metric_update_frequency=train_metric_update_frequency)
60 | solver.fit()
61 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 becauseofAI
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # A Light and Fast Face Detector for Edge Devices
2 | **This repo is updated frequently, keeping up with the latest code is highly recommended.**
3 |
4 | ## Recent Update
5 | * `2019.10.14` The official PyTorch version of LFFD is first online. Now the repo is only preview version. Face detection code for v2 version is released nightly.
6 | * `2019.10.16` Now the face detection code for v2 version can train normally. The code of other tasks will be updated soon.
7 |
8 | ## Introduction
9 | This repo is the official PyTorch source code of paper "[LFFD: A Light and Fast Face Detector for Edge Devices](https://arxiv.org/abs/1904.10633)". Our paper presents a light and fast face detector (**LFFD**) for edge devices.
10 | LFFD considerably balances both accuracy and latency, resulting in small model size, fast inference speed while achieving excellent accuracy.
11 | **Understanding the essence of receptive field makes detection networks interpretable.**
12 |
13 | In practical, we have deployed it in cloud and edge devices (like NVIDIA Jetson series and ARM-based embedding system). The comprehensive performance
14 | of LFFD is robust enough to support our applications.
15 |
16 | In fact, our method is **_a general detection framework that applicable to one class detection_**, such as face detection, pedestrian detection,
17 | head detection, vehicle detection and so on. In general, an object class, whose average ratio of the longer side and the shorter side is
18 | less than 5, is appropriate to apply our framework for detection.
19 |
20 | Several practical advantages:
21 | 1. large scale coverage, and easy to extend to larger scales by adding more layers without much latency gain.
22 | 2. detect small objects (as small as 10 pixels) in images with extremely large resolution (8K or even larger) in only one inference.
23 | 3. easy backbone with very common operators makes it easy to deploy anywhere.
24 |
25 | ## Accuracy and Latency
26 | on the way
27 |
28 | ## Getting Started
29 | We re-implement the proposed method using PyTorch. The MXNet Version is [here](https://github.com/YonghaoHe/A-Light-and-Fast-Face-Detector-for-Edge-Devices)
30 |
31 | #### Prerequirements (global)
32 | * Python>=3.5
33 | * numpy>=1.16 (lower versions should work as well, but not tested)
34 | * PyTorch>=1.0.0 ([install guide](https://pytorch.org/get-started/locally/))
35 | * cv2=3.x (pip3 install opencv-python==3.4.5.20, other version should work as well, but not tested)
36 |
37 | > Tips:
38 | * use PyTorch with cudnn.
39 | * build numpy from source with OpenBLAS. This will improve the training efficiency.
40 | * make sure cv2 links to libjpeg-turbo, not libjpeg. This will improve the jpeg decode efficiency.
41 |
42 | #### Sub-directory description
43 | * [face_detection](face_detection) contains the code of training, evaluation and inference for LFFD,
44 | the main content of this repo. The trained models of different versions are provided for off-the-shelf deployment.
45 | * [head_detection](head_detection) contains the trained models for head detection. The models are obtained by the
46 | proposed general one class detection framework.
47 | * [pedestrian_detection](pedestrian_detection) contains the trained models for pedestrian detection. The models are obtained by the
48 | proposed general one class detection framework.
49 | * [vehicle_detection](vehicle_detection) contains the trained models for vehicle detection. The models are obtained by the
50 | proposed general one class detection framework.
51 | * [ChasingTrainFramework_GeneralOneClassDetection](ChasingTrainFramework_GeneralOneClassDetection) is a simple
52 | wrapper based on MXNet Module API for general one class detection.
53 |
54 | #### Installation
55 | 1. Download the repo:
56 | ```
57 | git clone https://github.com/becauseofAI/lffd-pytorch.git
58 | ```
59 | 2. Refer to the corresponding sub-project for detailed usage. Now only the v2 version of [face_detection](face_detection) can be tried to train.
60 |
61 | ## Citation
62 | If you benefit from our work in your research and product, please kindly cite the paper
63 | ```
64 | @inproceedings{LFFD,
65 | title={LFFD: A Light and Fast Face Detector for Edge Devices},
66 | author={He, Yonghao and Xu, Dezhong and Wu, Lifang and Jian, Meng and Xiang, Shiming and Pan, Chunhong},
67 | booktitle={arXiv:1904.10633},
68 | year={2019}
69 | }
70 | ```
71 |
72 | ## To Do List
73 | - [ ] face detection
74 | - [ ] pedestrian detection
75 | - [ ] head detection
76 | - [ ] vehicle detection
77 | - [ ] license plate detection
78 | - [ ] [reconstruction version](https://github.com/becauseofAI/refinanet)
79 |
80 | ## Contact
81 | becauseofAI[1], Yonghao He[2]
82 |
83 | [1]E-mails: helloai777@gmail.com
84 | [2]E-mails: yonghao.he@ia.ac.cn / yonghao.he@aliyun.com
85 |
86 | **If you are interested in this work, any innovative contributions are welcome!!!**
87 |
88 | **Internship is open at NLPR, CASIA all the time. Send me your resumes!**
89 |
--------------------------------------------------------------------------------
/face_detection/README.md:
--------------------------------------------------------------------------------
1 | ## Face Detection
2 | This subdir includes face detection related codes. Some descriptions has
3 | been presented in repo README.md.
4 |
5 | ### Recent Update
6 | * `2019.10.14` The model v2 can be tried to train nightly.
7 | * `2019.10.16` **The model v2 can be trained normally.**
8 |
9 | ### Brief Introduction to Model Version
10 | * v1 - refer to the paper for details
11 | * v2 - the detection scale is 10-320 (vs 10-560 in v1), the number of layers is 20,
12 | the backbone is modified for faster inference. Refer to `./net_farm/naivenet_structures.xlsx` for details.
13 |
14 | ### Accuracy
15 | on the way
16 |
17 | ### Inference Latency
18 | on the way
19 |
20 | ### User Instructions
21 | > **Now only for traning v2 nightly.**
22 |
23 | First, we introduce the functionality of each sub directory.
24 | * [net_farm](net_farm). This folder contains net definitions for all model versions.
25 | * [metric_farm](metric_farm). This folder contains the metrics for training monitoring.
26 | * [data_provider_farm](data_provider_farm). This folder contains the code of raw data processing/formatting/packing&unpacking.
27 | * [data_iterator_farm](data_iterator_farm). This folder contains the code of multi-threaded data prefetching.
28 | **This is the most important part, since it describe the essence of LFFD!!!**
29 | * [config_farm](config_farm). This folder contains the configurations of all model versions. The training is started by running the corresponding config python script.
30 |
31 | Second, we present a common procedure for running the code for training (taking v2 as an example).
32 |
33 | 1. prepare net model `net_farm/naivenet.py`
34 | 2. prepare the training data by using the code in `data_provider_farm`. We provide a packed
35 | training data of WIDERFACE trainset. Please download from **Data Download**.
36 | 3. adjust the code around the line 241 in `data_iterator_farm/multithread_dataiter_for_cross_entropy_v2`.
37 | 4. set the variables in configuration py script in `config_farm`.
38 | 5. run `python configuration_10_320_20L_5scales_v2.py` in `config_farm` directory.
39 |
40 | ### Data Download
41 | We have packed the training data of WIDERFACE train set. In the data, the faces less than 8 pixels are ignored, and some pure negative
42 | images cropped from the training images are also added. We provide three ways to download the packed data:
43 | * [Baidu Yunpan](https://pan.baidu.com/s/1a8Wk4GNkfPYbKAFSrZzFIQ) (pwd:e7bv)
44 | * [MS OneDrive](https://1drv.ms/u/s!Av9h0YMgxdaSgwiP4nKDasu4m73J?e=v5UfWQ)
45 | * [Google Drive](https://drive.google.com/open?id=1O3nJ6mQKD_sdFpfXmYoK7xnTUg3To7kO)
46 |
47 | After you download the data, you can put it anywhere. Remember to set `param_trainset_pickle_file_path` variable in the configuration file. (we
48 | usually put the data into the folder: `./data_provider_farm/data_folder/`)
49 |
--------------------------------------------------------------------------------
/face_detection/accuracy_evaluation/evaluation_on_fddb.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 | import cv2
5 | import math
6 | import sys
7 | sys.path.append('..')
8 | # change the config as your need
9 | from config_farm import configuration_10_320_20L_5scales_v2 as cfg
10 | import mxnet
11 | from predict import Predict
12 |
13 | # set the proper symbol file and model file
14 | symbol_file_path = '../symbol_farm/symbol_10_320_20L_5scales_v2_deploy.json'
15 | model_file_path = '../saved_model/configuration_10_320_20L_5scales_v2/train_10_320_20L_5scales_v2_iter_1800000.params'
16 | my_predictor = Predict(mxnet=mxnet,
17 | symbol_file_path=symbol_file_path,
18 | model_file_path=model_file_path,
19 | ctx=mxnet.gpu(0),
20 | receptive_field_list=cfg.param_receptive_field_list,
21 | receptive_field_stride=cfg.param_receptive_field_stride,
22 | bbox_small_list=cfg.param_bbox_small_list,
23 | bbox_large_list=cfg.param_bbox_large_list,
24 | receptive_field_center_start=cfg.param_receptive_field_center_start,
25 | num_output_scales=cfg.param_num_output_scales)
26 |
27 |
28 | # set fddb root, the path should look like XXXX/originalPics
29 | fddb_image_root = 'XXXX/originalPics'
30 | # set the list file path, the path should look like XXXX/FDDB-folds/annotatedList.txt
31 | image_list_file = 'XXXX/FDDB-folds/annotatedList.txt'
32 | result_file_name = './fddb_' + os.path.basename(model_file_path).split('.')[0] + '_result.txt'
33 | fin = open(image_list_file, 'r')
34 | fout = open(result_file_name, 'w')
35 | resize_scale = 1.0
36 | score_threshold = 0.11
37 | NMS_threshold = 0.4
38 | counter = 0
39 | for line in fin:
40 | line = line.strip('\n')
41 |
42 | im = cv2.imread(os.path.join(fddb_image_root, line + '.jpg'), cv2.IMREAD_COLOR)
43 |
44 | bboxes = my_predictor.predict(im, resize_scale=resize_scale, score_threshold=score_threshold, top_k=10000, NMS_threshold=NMS_threshold)
45 |
46 | # for bbox in bboxes:
47 | # cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 1)
48 | # cv2.imshow('im', im)
49 | # cv2.waitKey()
50 |
51 | fout.write(line + '\n')
52 | fout.write(str(len(bboxes)) + '\n')
53 | for bbox in bboxes:
54 | fout.write('%d %d %d %d %.03f' % (
55 | math.floor(bbox[0]), math.floor(bbox[1]), math.ceil(bbox[2] - bbox[0]), math.ceil(bbox[3] - bbox[1]),
56 | bbox[4] if bbox[4] <= 1 else 1) + '\n')
57 | counter += 1
58 | print('[%d] %s is processed.' % (counter, line))
59 | fin.close()
60 | fout.close()
61 |
62 |
--------------------------------------------------------------------------------
/face_detection/accuracy_evaluation/evaluation_on_widerface.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os
3 | import sys
4 | import cv2
5 | import math
6 | sys.path.append('..')
7 | # change the config as your need
8 | from config_farm import configuration_10_320_20L_5scales_v2 as cfg
9 | import mxnet
10 | from predict import Predict
11 |
12 | # set the proper symbol file and model file
13 | symbol_file_path = '../symbol_farm/symbol_10_320_20L_5scales_v2_deploy.json'
14 | model_file_path = '../saved_model/configuration_10_320_20L_5scales_v2/train_10_320_20L_5scales_v2_iter_1800000.params'
15 | my_predictor = Predict(mxnet=mxnet,
16 | symbol_file_path=symbol_file_path,
17 | model_file_path=model_file_path,
18 | ctx=mxnet.gpu(0),
19 | receptive_field_list=cfg.param_receptive_field_list,
20 | receptive_field_stride=cfg.param_receptive_field_stride,
21 | bbox_small_list=cfg.param_bbox_small_list,
22 | bbox_large_list=cfg.param_bbox_large_list,
23 | receptive_field_center_start=cfg.param_receptive_field_center_start,
24 | num_output_scales=cfg.param_num_output_scales)
25 |
26 | # set the val root, the path should look like XXXX/WIDER_val/images
27 | val_image_root = 'XXXX/WIDER_val/images'
28 | val_result_txt_save_root = './widerface_val_' + os.path.basename(model_file_path).split('.')[0] + '_result_txt/'
29 | if not os.path.exists(val_result_txt_save_root):
30 | os.makedirs(val_result_txt_save_root)
31 |
32 | resize_scale = 1
33 | score_threshold = 0.11
34 | NMS_threshold = 0.4
35 | counter = 0
36 | for parent, dir_names, file_names in os.walk(val_image_root):
37 | for file_name in file_names:
38 | if not file_name.lower().endswith('jpg'):
39 | continue
40 |
41 | im = cv2.imread(os.path.join(parent, file_name), cv2.IMREAD_COLOR)
42 |
43 | bboxes = my_predictor.predict(im, resize_scale=resize_scale, score_threshold=score_threshold, top_k=10000, NMS_threshold=NMS_threshold)
44 |
45 | # for bbox in bboxes:
46 | # cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 1)
47 | # cv2.imshow('im',im)
48 | # cv2.waitKey()
49 |
50 | event_name = parent.split('/')[-1]
51 | if not os.path.exists(os.path.join(val_result_txt_save_root, event_name)):
52 | os.makedirs(os.path.join(val_result_txt_save_root, event_name))
53 | fout = open(os.path.join(val_result_txt_save_root, event_name, file_name.split('.')[0] + '.txt'), 'w')
54 | fout.write(file_name.split('.')[0] + '\n')
55 | fout.write(str(len(bboxes)) + '\n')
56 | for bbox in bboxes:
57 | fout.write('%d %d %d %d %.03f' % (math.floor(bbox[0]), math.floor(bbox[1]), math.ceil(bbox[2] - bbox[0]), math.ceil(bbox[3] - bbox[1]), bbox[4] if bbox[4] <= 1 else 1) + '\n')
58 | fout.close()
59 | counter += 1
60 | print('[%d] %s is processed.' % (counter, file_name))
61 |
62 |
63 |
--------------------------------------------------------------------------------
/face_detection/accuracy_evaluation/predict.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import sys
3 | import os
4 | import numpy
5 | import cv2
6 | import time
7 |
8 |
9 | # empty data batch class for dynamical properties
10 | class DataBatch:
11 | pass
12 |
13 |
14 | def NMS(boxes, overlap_threshold):
15 | '''
16 |
17 | :param boxes: numpy nx5, n is the number of boxes, 0:4->x1, y1, x2, y2, 4->score
18 | :param overlap_threshold:
19 | :return:
20 | '''
21 | if boxes.shape[0] == 0:
22 | return boxes
23 |
24 | # if the bounding boxes integers, convert them to floats --
25 | # this is important since we'll be doing a bunch of divisions
26 | if boxes.dtype != numpy.float32:
27 | boxes = boxes.astype(numpy.float32)
28 |
29 | # initialize the list of picked indexes
30 | pick = []
31 | # grab the coordinates of the bounding boxes
32 | x1 = boxes[:, 0]
33 | y1 = boxes[:, 1]
34 | x2 = boxes[:, 2]
35 | y2 = boxes[:, 3]
36 | sc = boxes[:, 4]
37 | widths = x2 - x1
38 | heights = y2 - y1
39 |
40 | # compute the area of the bounding boxes and sort the bounding
41 | # boxes by the bottom-right y-coordinate of the bounding box
42 | area = heights * widths
43 | idxs = numpy.argsort(sc) # 从小到大排序
44 |
45 | # keep looping while some indexes still remain in the indexes list
46 | while len(idxs) > 0:
47 | # grab the last index in the indexes list and add the
48 | # index value to the list of picked indexes
49 | last = len(idxs) - 1
50 | i = idxs[last]
51 | pick.append(i)
52 |
53 | # compare secend highest score boxes
54 | xx1 = numpy.maximum(x1[i], x1[idxs[:last]])
55 | yy1 = numpy.maximum(y1[i], y1[idxs[:last]])
56 | xx2 = numpy.minimum(x2[i], x2[idxs[:last]])
57 | yy2 = numpy.minimum(y2[i], y2[idxs[:last]])
58 |
59 | # compute the width and height of the bo( box
60 | w = numpy.maximum(0, xx2 - xx1 + 1)
61 | h = numpy.maximum(0, yy2 - yy1 + 1)
62 |
63 | # compute the ratio of overlap
64 | overlap = (w * h) / area[idxs[:last]]
65 |
66 | # delete all indexes from the index list that have
67 | idxs = numpy.delete(idxs, numpy.concatenate(([last], numpy.where(overlap > overlap_threshold)[0])))
68 |
69 | # return only the bounding boxes that were picked using the
70 | # integer data type
71 | return boxes[pick]
72 |
73 |
74 | class Predict(object):
75 |
76 | def __init__(self,
77 | mxnet,
78 | symbol_file_path,
79 | model_file_path,
80 | ctx,
81 | receptive_field_list,
82 | receptive_field_stride,
83 | bbox_small_list,
84 | bbox_large_list,
85 | receptive_field_center_start,
86 | num_output_scales
87 | ):
88 | self.mxnet = mxnet
89 | self.symbol_file_path = symbol_file_path
90 | self.model_file_path = model_file_path
91 | self.ctx = ctx
92 |
93 | self.receptive_field_list = receptive_field_list
94 | self.receptive_field_stride = receptive_field_stride
95 | self.bbox_small_list = bbox_small_list
96 | self.bbox_large_list = bbox_large_list
97 | self.receptive_field_center_start = receptive_field_center_start
98 | self.num_output_scales = num_output_scales
99 | self.constant = [i / 2.0 for i in self.receptive_field_list]
100 | self.input_height = 480
101 | self.input_width = 640
102 | self.__load_model()
103 |
104 | def __load_model(self):
105 | # load symbol and parameters
106 | print('----> load symbol file: %s\n----> load model file: %s' % (self.symbol_file_path, self.model_file_path))
107 | if not os.path.exists(self.symbol_file_path):
108 | print('The symbol file does not exist!!!!')
109 | sys.exit(1)
110 | if not os.path.exists(self.model_file_path):
111 | print('The model file does not exist!!!!')
112 | sys.exit(1)
113 | self.symbol_net = self.mxnet.symbol.load(self.symbol_file_path)
114 | data_name = 'data'
115 | data_name_shape = (data_name, (1, 3, self.input_height, self.input_width))
116 | self.module = self.mxnet.module.Module(symbol=self.symbol_net,
117 | data_names=[data_name],
118 | label_names=None,
119 | context=self.ctx,
120 | work_load_list=None)
121 | self.module.bind(data_shapes=[data_name_shape],
122 | for_training=False)
123 |
124 | save_dict = self.mxnet.nd.load(self.model_file_path)
125 | self.arg_name_arrays = dict()
126 | self.arg_name_arrays['data'] = self.mxnet.nd.zeros((1, 3, self.input_height, self.input_width), self.ctx)
127 | self.aux_name_arrays = {}
128 | for k, v in save_dict.items():
129 | tp, name = k.split(':', 1)
130 | if tp == 'arg':
131 | self.arg_name_arrays.update({name: v.as_in_context(self.ctx)})
132 | if tp == 'aux':
133 | self.aux_name_arrays.update({name: v.as_in_context(self.ctx)})
134 | self.module.init_params(arg_params=self.arg_name_arrays,
135 | aux_params=self.aux_name_arrays,
136 | allow_missing=True)
137 | print('----> Model is loaded successfully.')
138 |
139 | def predict(self, image, resize_scale=1, score_threshold=0.8, top_k=100, NMS_threshold=0.3, NMS_flag=True, skip_scale_branch_list=[]):
140 |
141 | if image.ndim != 3 or image.shape[2] != 3:
142 | print('Only RGB images are supported.')
143 | return None
144 |
145 | bbox_collection = []
146 |
147 | shorter_side = min(image.shape[:2])
148 | if shorter_side * resize_scale < 128:
149 | resize_scale = float(128) / shorter_side
150 |
151 | input_image = cv2.resize(image, (0, 0), fx=resize_scale, fy=resize_scale)
152 |
153 | input_image = input_image.astype(dtype=numpy.float32)
154 | input_image = input_image[:, :, :, numpy.newaxis]
155 | input_image = input_image.transpose([3, 2, 0, 1])
156 |
157 | data_batch = DataBatch()
158 | data_batch.data = [self.mxnet.ndarray.array(input_image, self.ctx)]
159 |
160 | tic = time.time()
161 | self.module.forward(data_batch=data_batch, is_train=False)
162 | results = self.module.get_outputs()
163 | outputs = []
164 | for output in results:
165 | outputs.append(output.asnumpy())
166 | toc = time.time()
167 | infer_time = (toc - tic) * 1000
168 |
169 | for i in range(self.num_output_scales):
170 | if i in skip_scale_branch_list:
171 | continue
172 |
173 | score_map = numpy.squeeze(outputs[i * 2], (0, 1))
174 |
175 | # score_map_show = score_map * 255
176 | # score_map_show[score_map_show < 0] = 0
177 | # score_map_show[score_map_show > 255] = 255
178 | # cv2.imshow('score_map' + str(i), cv2.resize(score_map_show.astype(dtype=numpy.uint8), (0, 0), fx=2, fy=2))
179 | # cv2.waitKey()
180 |
181 | bbox_map = numpy.squeeze(outputs[i * 2 + 1], 0)
182 |
183 | RF_center_Xs = numpy.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * x for x in range(score_map.shape[1])])
184 | RF_center_Xs_mat = numpy.tile(RF_center_Xs, [score_map.shape[0], 1])
185 | RF_center_Ys = numpy.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * y for y in range(score_map.shape[0])])
186 | RF_center_Ys_mat = numpy.tile(RF_center_Ys, [score_map.shape[1], 1]).T
187 |
188 | x_lt_mat = RF_center_Xs_mat - bbox_map[0, :, :] * self.constant[i]
189 | y_lt_mat = RF_center_Ys_mat - bbox_map[1, :, :] * self.constant[i]
190 | x_rb_mat = RF_center_Xs_mat - bbox_map[2, :, :] * self.constant[i]
191 | y_rb_mat = RF_center_Ys_mat - bbox_map[3, :, :] * self.constant[i]
192 |
193 | x_lt_mat = x_lt_mat / resize_scale
194 | x_lt_mat[x_lt_mat < 0] = 0
195 | y_lt_mat = y_lt_mat / resize_scale
196 | y_lt_mat[y_lt_mat < 0] = 0
197 | x_rb_mat = x_rb_mat / resize_scale
198 | x_rb_mat[x_rb_mat > image.shape[1]] = image.shape[1]
199 | y_rb_mat = y_rb_mat / resize_scale
200 | y_rb_mat[y_rb_mat > image.shape[0]] = image.shape[0]
201 |
202 | select_index = numpy.where(score_map > score_threshold)
203 | for idx in range(select_index[0].size):
204 | bbox_collection.append((x_lt_mat[select_index[0][idx], select_index[1][idx]],
205 | y_lt_mat[select_index[0][idx], select_index[1][idx]],
206 | x_rb_mat[select_index[0][idx], select_index[1][idx]],
207 | y_rb_mat[select_index[0][idx], select_index[1][idx]],
208 | score_map[select_index[0][idx], select_index[1][idx]]))
209 |
210 | # NMS
211 | bbox_collection = sorted(bbox_collection, key=lambda item: item[-1], reverse=True)
212 | if len(bbox_collection) > top_k:
213 | bbox_collection = bbox_collection[0:top_k]
214 | bbox_collection_numpy = numpy.array(bbox_collection, dtype=numpy.float32)
215 |
216 | if NMS_flag:
217 | final_bboxes = NMS(bbox_collection_numpy, NMS_threshold)
218 | final_bboxes_ = []
219 | for i in range(final_bboxes.shape[0]):
220 | final_bboxes_.append((final_bboxes[i, 0], final_bboxes[i, 1], final_bboxes[i, 2], final_bboxes[i, 3], final_bboxes[i, 4]))
221 |
222 | return final_bboxes_, infer_time
223 | else:
224 | return bbox_collection_numpy, infer_time
225 |
226 |
227 | def run_prediction_folder():
228 | sys.path.append('..')
229 | from config_farm import configuration_10_560_25L_8scales_v1 as cfg
230 | import mxnet
231 |
232 | debug_folder = '' # fill the folder that contains images
233 | file_name_list = [file_name for file_name in os.listdir(debug_folder) if file_name.lower().endswith('jpg')]
234 |
235 | symbol_file_path = '../symbol_farm/symbol_10_560_25L_8scales_v1_deploy.json'
236 | model_file_path = '../saved_model/configuration_10_560_25L_8scales_v1/train_10_560_25L_8scales_v1_iter_1400000.params'
237 | my_predictor = Predict(mxnet=mxnet,
238 | symbol_file_path=symbol_file_path,
239 | model_file_path=model_file_path,
240 | ctx=mxnet.gpu(0),
241 | receptive_field_list=cfg.param_receptive_field_list,
242 | receptive_field_stride=cfg.param_receptive_field_stride,
243 | bbox_small_list=cfg.param_bbox_small_list,
244 | bbox_large_list=cfg.param_bbox_large_list,
245 | receptive_field_center_start=cfg.param_receptive_field_center_start,
246 | num_output_scales=cfg.param_num_output_scales)
247 |
248 | for file_name in file_name_list:
249 | im = cv2.imread(os.path.join(debug_folder, file_name))
250 |
251 | bboxes = my_predictor.predict(im, resize_scale=1, score_threshold=0.3, top_k=10000, NMS_threshold=0.3, NMS_flag=True, skip_scale_branch_list=[])
252 | for bbox in bboxes:
253 | cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
254 |
255 | if max(im.shape[:2]) > 1600:
256 | scale = 1600/max(im.shape[:2])
257 | im = cv2.resize(im, (0, 0), fx=scale, fy=scale)
258 | cv2.imshow('im', im)
259 | cv2.waitKey()
260 | # cv2.imwrite(os.path.join(debug_folder, file_name.replace('.jpg','_result.jpg')), im)
261 |
262 |
263 | if __name__ == '__main__':
264 | run_prediction_folder()
265 |
--------------------------------------------------------------------------------
/face_detection/config_farm/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @date : 18-11-28
3 | # @author : MindBreaker
4 | # @module :
--------------------------------------------------------------------------------
/face_detection/data_iterator_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/data_iterator_farm/__init__.py
--------------------------------------------------------------------------------
/face_detection/data_provider_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/data_provider_farm/__init__.py
--------------------------------------------------------------------------------
/face_detection/data_provider_farm/data_folder/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/data_provider_farm/data_folder/.gitkeep
--------------------------------------------------------------------------------
/face_detection/data_provider_farm/pickle_provider.py:
--------------------------------------------------------------------------------
1 | '''
2 | This provider accepts an adapter, save dataset in pickle file and load all dataset to memory for data iterators
3 | '''
4 |
5 | import cv2
6 | import numpy
7 | import pickle
8 | import sys
9 |
10 | sys.path.append('../..')
11 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_provider import ProviderBaseclass
12 | from data_provider_farm.text_list_adapter import TextListAdapter
13 |
14 |
15 | class PickleProvider(ProviderBaseclass):
16 | """
17 | This class provides methods to save and read data.
18 | By default, images are compressed using JPG format.
19 | If data_adapter is not None, it means saving data, or it is reading data
20 | """
21 |
22 | def __init__(self,
23 | pickle_file_path,
24 | encode_quality=90,
25 | data_adapter=None):
26 | ProviderBaseclass.__init__(self)
27 |
28 | if data_adapter: # write data
29 |
30 | self.data_adapter = data_adapter
31 | self.data = {}
32 | self.counter = 0
33 | self.pickle_file_path = pickle_file_path
34 |
35 | else: # read data
36 |
37 | self.data = pickle.load(open(pickle_file_path, 'rb'))
38 | # get positive and negative indeices
39 | self._positive_index = []
40 | self._negative_index = []
41 | for k, v in self.data.items():
42 | if v[1] == 0: # negative
43 | self._negative_index.append(k)
44 | else: # positive
45 | self._positive_index.append(k)
46 |
47 | self.compression_mode = '.jpg'
48 | self.encode_params = [cv2.IMWRITE_JPEG_QUALITY, encode_quality]
49 |
50 | @property
51 | def positive_index(self):
52 | return self._positive_index
53 |
54 | @property
55 | def negative_index(self):
56 | return self._negative_index
57 |
58 | def write(self):
59 |
60 | for data_item in self.data_adapter.get_one():
61 |
62 | temp_sample = []
63 | im, bboxes = data_item
64 | ret, buf = cv2.imencode(self.compression_mode, im, self.encode_params)
65 | if buf is None or buf.size == 0:
66 | print('buf is wrong.')
67 | continue
68 | if not ret:
69 | print('An error is occurred while com:pression.')
70 | continue
71 | temp_sample.append(buf)
72 |
73 | if isinstance(bboxes, str): # 负样本
74 | temp_sample.append(0)
75 | temp_sample.append(int(bboxes))
76 | else:
77 | temp_sample.append(1)
78 | temp_sample.append(bboxes)
79 |
80 | self.data[self.counter] = temp_sample
81 | print('Successfully save the %d-th data item.' % self.counter)
82 | self.counter += 1
83 |
84 | pickle.dump(self.data, open(self.pickle_file_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
85 |
86 | def read_by_index(self, index):
87 | im_buf, flag, bboxes = self.data[index]
88 | im = cv2.imdecode(im_buf, cv2.IMREAD_COLOR)
89 | return im, flag, bboxes
90 |
91 |
92 | def write_file():
93 | data_list_file_path = './data_folder/data_list_2019-05-07-14-47-19.txt'
94 | adapter = TextListAdapter(data_list_file_path)
95 |
96 | pickle_file_path = './data_folder/data_2019-05-07-14-47-19.pkl'
97 | encode_quality = 90
98 | packer = PickleProvider(pickle_file_path, encode_quality, adapter)
99 | packer.write()
100 |
101 |
102 | def read_file():
103 | pickle_file_path = './data_folder/data_2019-05-07-14-47-19.pkl'
104 |
105 | provider = PickleProvider(pickle_file_path)
106 | positive_index = provider.positive_index
107 | negative_index = provider.negative_index
108 | print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index)))
109 | # all_index = positive_index+negative_index
110 | import random
111 | random.shuffle(positive_index)
112 |
113 | for i, index in enumerate(positive_index):
114 | im, flag, bboxes_numpy = provider.read_by_index(index)
115 | if isinstance(bboxes_numpy, numpy.ndarray):
116 | for n in range(bboxes_numpy.shape[0]):
117 | cv2.rectangle(im, (bboxes_numpy[n, 0], bboxes_numpy[n, 1]),
118 | (bboxes_numpy[n, 0] + bboxes_numpy[n, 2], bboxes_numpy[n, 1] + bboxes_numpy[n, 3]), (0, 255, 0), 1)
119 | cv2.imshow('im', im)
120 | cv2.waitKey()
121 |
122 |
123 | if __name__ == '__main__':
124 | write_file()
125 | # read_file()
126 |
--------------------------------------------------------------------------------
/face_detection/data_provider_farm/text_list_adapter.py:
--------------------------------------------------------------------------------
1 | '''
2 | This adapter accepts a text as input which describes the annotated data.
3 | Each line in text are formatted as:
4 | [image absolute path],[pos/neg flag],[num of bboxes],[x1],[y1],[width1],[height1],[x2],[y2],[width2],[height2]......
5 | '''
6 |
7 | import cv2
8 | import numpy
9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_data_adapter import DataAdapterBaseclass
10 |
11 |
12 | class TextListAdapter(DataAdapterBaseclass):
13 |
14 | def __init__(self, data_list_file_path):
15 |
16 | DataAdapterBaseclass.__init__(self)
17 | fin = open(data_list_file_path, 'r')
18 | self.lines = fin.readlines()
19 | fin.close()
20 | self.line_counter = 0
21 |
22 | def __del__(self):
23 | pass
24 |
25 | def get_one(self):
26 | """
27 | This function use 'yield' to return samples
28 | """
29 | while self.line_counter < len(self.lines):
30 |
31 | line = self.lines[self.line_counter].strip('\n').split(',')
32 | if line[1] == '1': # pos sample
33 | assert len(line[3:]) == 4 * int(line[2])
34 |
35 | im = cv2.imread(line[0], cv2.IMREAD_UNCHANGED)
36 |
37 | if line[1] == '0':
38 | yield im, '0'
39 | self.line_counter += 1
40 | continue
41 |
42 | num_bboxes = int(line[2])
43 | bboxes = []
44 | for i in range(num_bboxes):
45 | x = float(line[3 + i * 4])
46 | y = float(line[3 + i * 4 + 1])
47 | width = float(line[3 + i * 4 + 2])
48 | height = float(line[3 + i * 4 + 3])
49 |
50 | bboxes.append([x, y, width, height])
51 |
52 | bboxes = numpy.array(bboxes, dtype=numpy.float32)
53 | yield im, bboxes
54 |
55 | self.line_counter += 1
56 |
57 |
58 | if __name__ == '__main__':
59 | pass
60 |
--------------------------------------------------------------------------------
/face_detection/demo/demo.py:
--------------------------------------------------------------------------------
1 | """LFFD Demo."""
2 | import os, sys
3 | import argparse
4 | import cv2
5 | import time
6 | import mxnet as mx
7 | import numpy as np
8 |
9 | sys.path.append("..")
10 | from accuracy_evaluation import predict
11 |
12 |
13 | def parse_args():
14 | parser = argparse.ArgumentParser(description='LFFD Demo.')
15 | parser.add_argument('--version', type=str, default='v2',
16 | help='The version of pretrained model, now support "v1" and "v2".')
17 | parser.add_argument('--mode', type=str, default='image',
18 | help='The format of input data, now support "image" of jpg and "video" of mp4.')
19 | parser.add_argument('--use-gpu', type=bool, default=False,
20 | help='Default is cpu.')
21 | parser.add_argument('--data', type=str, default='./data',
22 | help='The path of input and output file.')
23 | args = parser.parse_args()
24 | return args
25 |
26 |
27 | def main():
28 | args = parse_args()
29 | # context list
30 | if args.use_gpu:
31 | ctx = mx.gpu(0)
32 | else:
33 | ctx = mx.cpu()
34 |
35 | if args.version == 'v1':
36 | from config_farm import configuration_10_320_20L_5scales_v1 as cfg
37 |
38 | symbol_file_path = '../symbol_farm/symbol_10_560_25L_8scales_v1_deploy.json'
39 | model_file_path = '../saved_model/configuration_10_560_25L_8scales_v1/train_10_560_25L_8scales_v1_iter_1400000.params'
40 | elif args.version == 'v2':
41 | from config_farm import configuration_10_320_20L_5scales_v2 as cfg
42 |
43 | symbol_file_path = '../symbol_farm/symbol_10_320_20L_5scales_v2_deploy.json'
44 | model_file_path = '../saved_model/configuration_10_320_20L_5scales_v2/train_10_320_20L_5scales_v2_iter_1800000.params'
45 | else:
46 | raise TypeError('Unsupported LFFD Version.')
47 |
48 | face_predictor = predict.Predict(mxnet=mx,
49 | symbol_file_path=symbol_file_path,
50 | model_file_path=model_file_path,
51 | ctx=ctx,
52 | receptive_field_list=cfg.param_receptive_field_list,
53 | receptive_field_stride=cfg.param_receptive_field_stride,
54 | bbox_small_list=cfg.param_bbox_small_list,
55 | bbox_large_list=cfg.param_bbox_large_list,
56 | receptive_field_center_start=cfg.param_receptive_field_center_start,
57 | num_output_scales=cfg.param_num_output_scales)
58 |
59 | if args.mode == 'image':
60 | data_folder = args.data
61 | file_name_list = [file_name for file_name in os.listdir(data_folder) \
62 | if file_name.lower().endswith('jpg')]
63 |
64 | for file_name in file_name_list:
65 | im = cv2.imread(os.path.join(data_folder, file_name))
66 |
67 | bboxes, infer_time = face_predictor.predict(im, resize_scale=1, score_threshold=0.6, top_k=10000, \
68 | NMS_threshold=0.4, NMS_flag=True, skip_scale_branch_list=[])
69 |
70 | for bbox in bboxes:
71 | cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
72 |
73 | # if max(im.shape[:2]) > 1600:
74 | # scale = 1600/max(im.shape[:2])
75 | # im = cv2.resize(im, (0, 0), fx=scale, fy=scale)
76 | cv2.imshow('im', im)
77 | cv2.waitKey(5000)
78 | cv2.imwrite(os.path.join(data_folder, file_name.replace('.jpg', '_result.png')), im)
79 | elif args.mode == 'video':
80 | # win_name = 'LFFD DEMO'
81 | # cv2.namedWindow(win_name, cv2.WINDOW_NORMAL)
82 | data_folder = args.data
83 | file_name_list = [file_name for file_name in os.listdir(data_folder) \
84 | if file_name.lower().endswith('mp4')]
85 | for file_name in file_name_list:
86 | out_file = os.path.join(data_folder, file_name.replace('.mp4', '_v2_gpu_result.avi'))
87 | cap = cv2.VideoCapture(os.path.join(data_folder, file_name))
88 | vid_writer = cv2.VideoWriter(out_file, \
89 | cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 60, \
90 | (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), \
91 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))
92 | while cv2.waitKey(1) < 0:
93 | ret, frame = cap.read()
94 | if ret:
95 | h, w, c = frame.shape
96 |
97 | if not ret:
98 | print("Done processing of %s" % file_name)
99 | print("Output file is stored as %s" % out_file)
100 | cv2.waitKey(3000)
101 | break
102 |
103 | tic = time.time()
104 | bboxes, infer_time = face_predictor.predict(frame, resize_scale=1, score_threshold=0.6, top_k=10000, \
105 | NMS_threshold=0.4, NMS_flag=True, skip_scale_branch_list=[])
106 | toc = time.time()
107 | detect_time = (toc - tic) * 1000
108 |
109 | face_num = 0
110 | for bbox in bboxes:
111 | face_num += 1
112 | cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
113 |
114 | computing_platform = 'Computing platform: NVIDIA GPU FP32'
115 | cv2.putText(frame, computing_platform, (5, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
116 | input_resolution = 'Network input resolution: %sx%s' % (w, h)
117 | cv2.putText(frame, input_resolution, (5, 65), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
118 | infer_time_info = 'Inference time: %.2f ms' % (infer_time)
119 | cv2.putText(frame, infer_time_info, (5, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
120 | infer_speed = 'Inference speed: %.2f FPS' % (1000 / infer_time)
121 | cv2.putText(frame, infer_speed, (5, 135), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
122 | face_num_info = 'Face num: %d' % (face_num)
123 | cv2.putText(frame, face_num_info, (5, 170), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
124 |
125 | vid_writer.write(frame.astype(np.uint8))
126 | # cv2.imshow(win_name, frame)
127 |
128 | if cv2.waitKey(1) & 0xFF == ord('q'):
129 | break
130 |
131 | cap.release()
132 | cv2.destroyAllWindows()
133 | else:
134 | raise TypeError('Unsupported File Format.')
135 |
136 |
137 | if __name__ == '__main__':
138 | main()
139 |
--------------------------------------------------------------------------------
/face_detection/deploy_tensorrt/README.md:
--------------------------------------------------------------------------------
1 | ## Deployment with TensorRT
2 | We provide code for deployment with [TensorRT python API](https://developer.nvidia.com/tensorrt).
3 | In general, once you use NVIDIA GPU in your applications,
4 | TensorRT is the best choice for deployment, rather than training frameworks like TensorFlow, PyTorch, MXNet, Caffe...
5 |
6 | ### Prerequirements
7 | Refer to [inference_speed_evaluation](../inference_speed_evaluation) for details.
8 |
9 | ### Getting Started
10 | 1. usr `to_onnx.py` to generate onnx model file
11 | 2. run `predict_tensorrt.py` to do inference based on the generated model file
12 | 3. after you fully understand the code, you may reform and merge it to your own project.
13 |
14 | > In most practical cases, C++ is the primary choice for efficient running.
15 | So you can rewrite the code according to the python code structure.
16 | In the future, we will provide C++ version.
17 |
18 | ### NVIDIA Jetson NANO&TX2 Deployment Instructions
19 | TBD
--------------------------------------------------------------------------------
/face_detection/deploy_tensorrt/debug_image/test1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/deploy_tensorrt/debug_image/test1.jpg
--------------------------------------------------------------------------------
/face_detection/deploy_tensorrt/debug_image/test2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/deploy_tensorrt/debug_image/test2.jpg
--------------------------------------------------------------------------------
/face_detection/deploy_tensorrt/debug_image/test3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/deploy_tensorrt/debug_image/test3.jpg
--------------------------------------------------------------------------------
/face_detection/deploy_tensorrt/debug_image/test5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/deploy_tensorrt/debug_image/test5.jpg
--------------------------------------------------------------------------------
/face_detection/deploy_tensorrt/debug_image/test6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/deploy_tensorrt/debug_image/test6.jpg
--------------------------------------------------------------------------------
/face_detection/deploy_tensorrt/to_onnx.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import numpy
3 | import sys
4 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python') # add mxnet python path if need
5 | import mxnet
6 | from mxnet.contrib import onnx as onnx_mxnet
7 | from onnx import checker
8 | import onnx
9 |
10 |
11 | def generate_onnx_file():
12 | logging.basicConfig(level=logging.INFO)
13 |
14 | # set the proper symbol path, param path and onnx path
15 | symbol_path = '../symbol_farm/symbol_10_320_20L_5scales_v2_deploy.json'
16 | param_path = '../saved_model/configuration_10_320_20L_5scales_v2/train_10_320_20L_5scales_v2_iter_1800000.params'
17 | onnx_path = './onnx_files/v2.onnx'
18 |
19 | net_symbol = mxnet.symbol.load(symbol_path)
20 | net_params_raw = mxnet.nd.load(param_path)
21 | net_params = dict()
22 | for k, v in net_params_raw.items():
23 | tp, name = k.split(':', 1)
24 | net_params.update({name: v})
25 |
26 | input_shape = (1, 3, 480, 640) # CAUTION: in TensorRT, the input size cannot be changed dynamically, so you must set it here.
27 |
28 | onnx_mxnet.export_model(net_symbol, net_params, [input_shape], numpy.float32, onnx_path, verbose=True)
29 |
30 | # Load onnx model
31 | model_proto = onnx.load_model(onnx_path)
32 |
33 | # Check if converted ONNX protobuf is valid
34 | checker.check_graph(model_proto.graph)
35 |
36 |
37 | if __name__ == '__main__':
38 | generate_onnx_file()
39 |
--------------------------------------------------------------------------------
/face_detection/inference_speed_evaluation/README.md:
--------------------------------------------------------------------------------
1 | ## Inference Speed Evaluation
2 |
3 | ### Update History
4 | * `2019.8.1` inference python code for MXNet-cudnn and TensorRT-cudnn is online.
5 |
6 | ### Additional Prerequirements
7 | * [onnx](https://onnx.ai/) (pip3 install onnx==1.3.0)
8 | * [pycuda](https://developer.nvidia.com/pycuda) (pip3 install pycuda==2019.1.1 or [install guide](https://pypi.org/project/pycuda/))
9 | * [tensorrt](https://developer.nvidia.com/tensorrt) =5.x (use pip3 to install the corresponding .whl file in python folder)
10 |
11 | > CAUTION:
12 | >
13 | > Carefully check the version compatible between CUDA, CUDNN, pycuda, TensorRT and onnx.
14 |
15 |
16 | ### Getting Started
17 | 1. (optional) temporally add mxnet python path to env if mxnet is not globally set
18 | 2. set `eval_with_mxnet_flag` to True to evaluate with mxnet with cudnn, or with tensorrt with cudnn (cannot run both at the same time due to some conflicts)
19 | 3. set `symbol_file_path`, `input_shape` and `real_run_loops`
20 | 4. run the script
--------------------------------------------------------------------------------
/face_detection/inference_speed_evaluation/inference_speed_eval.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import sys
3 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python') # append mxnet python path if need
4 | sys.path.append('../..')
5 | import mxnet
6 |
7 | eval_with_mxnet_flag = False
8 | symbol_file_path = '../symbol_farm/symbol_10_320_20L_5scales_v2_deploy.json'
9 | input_shape = (1, 3, 480, 640) # (1,3,240,320) (1,3,480,640) (1,3,720,1280) (1,3,1080,1920) (1,3,2160,3840)
10 |
11 | if eval_with_mxnet_flag:
12 | from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_mxnet_cudnn import InferenceSpeedEval as InferenceSpeedEvalMXNet
13 |
14 | inferenceSpeedEvalMXNet = InferenceSpeedEvalMXNet(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, device_type='gpu', gpu_index=0)
15 | inferenceSpeedEvalMXNet.run_speed_eval(warm_run_loops=10, real_run_loops=200)
16 |
17 | else:
18 | from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_tensorrt_cudnn import InferenceSpeedEval as InferenceSpeedEvalTRT
19 |
20 | inferenceSpeedEvalTRT = InferenceSpeedEvalTRT(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape)
21 | inferenceSpeedEvalTRT.run_speed_eval(warm_run_loops=10, real_run_loops=200)
22 |
--------------------------------------------------------------------------------
/face_detection/metric_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/metric_farm/__init__.py
--------------------------------------------------------------------------------
/face_detection/metric_farm/metric_default.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy
4 | import torch
5 |
6 |
7 | class Metric:
8 | def __init__(self, num_scales):
9 | self.num_scales = num_scales
10 | self.sum_metric = [0.0 for i in range(num_scales * 2)]
11 | self.num_update = 0
12 | self.multiply_factor = 10000
13 |
14 | def update(self, loss_branch):
15 | for i in range(self.num_scales):
16 | loss_score = loss_branch[i * 2]
17 | loss_bbox = loss_branch[i * 2 + 1]
18 |
19 | self.sum_metric[i * 2] += loss_score
20 | self.sum_metric[i * 2 + 1] += loss_bbox
21 |
22 | self.num_update += 1
23 |
24 | def get(self):
25 | return_string_list = []
26 | for i in range(self.num_scales):
27 | return_string_list.append('cls_loss_score_' + str(i))
28 | return_string_list.append('reg_loss_bbox_' + str(i))
29 |
30 | return return_string_list, [m / self.num_update * self.multiply_factor for i, m in enumerate(self.sum_metric)]
31 |
32 | def reset(self):
33 | self.sum_metric = [0.0 for i in range(self.num_scales * 2)]
34 | self.num_update = 0
35 |
--------------------------------------------------------------------------------
/face_detection/net_farm/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @date : 18-8-19
3 | # @author : MindBreaker
4 | # @module :
--------------------------------------------------------------------------------
/face_detection/net_farm/naivenet_structures.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/net_farm/naivenet_structures.xlsx
--------------------------------------------------------------------------------
/face_detection/qualitative_results/v1_qualitative_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/qualitative_results/v1_qualitative_1.jpg
--------------------------------------------------------------------------------
/face_detection/qualitative_results/v1_qualitative_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/qualitative_results/v1_qualitative_2.jpg
--------------------------------------------------------------------------------
/face_detection/qualitative_results/v1_qualitative_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/qualitative_results/v1_qualitative_3.jpg
--------------------------------------------------------------------------------
/face_detection/qualitative_results/v1_qualitative_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/qualitative_results/v1_qualitative_4.jpg
--------------------------------------------------------------------------------
/face_detection/qualitative_results/v1_qualitative_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/qualitative_results/v1_qualitative_5.jpg
--------------------------------------------------------------------------------
/face_detection/saved_model/configuration_10_320_20L_5scales_v2/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/saved_model/configuration_10_320_20L_5scales_v2/.gitkeep
--------------------------------------------------------------------------------
/face_detection/saved_model/configuration_10_560_25L_8scales_v1/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/saved_model/configuration_10_560_25L_8scales_v1/.gitkeep
--------------------------------------------------------------------------------
/head_detection/README.md:
--------------------------------------------------------------------------------
1 | ## Head Detection
2 | We use the brainwash dataset introduced by paper [End-to-end people detection in crowded scenes](https://arxiv.org/abs/1506.04878).
3 |
4 | ### Recent Update
5 | * `2019.09.23` model v1 for brainwash dataset is released.
6 | * `2019.09.26` brainwash dataset (and packed pkl) is uploaded for downloading.
7 |
8 | ### Brief Introduction to Model Version
9 | * v1 - is designed for brainwash dataset, covering head scale [10, 160]. It has 4 branches. Please check
10 | `./symbol_farm/symbol_structures.xlsx` for details.
11 |
12 | ### Inference Latency
13 |
14 | * Platform info: NVIDIA RTX 2080TI, CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0
15 |
16 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160|7680×4320
17 | -------------|-------|-------|--------|---------|---------|---------
18 | v1|0.83ms(1198.38FPS)|1.91ms(524.14FPS)|4.83ms(206.92FPS)|10.62ms(94.19FPS)|42.28ms(23.65FPS)|166.81ms(5.99FPS)
19 |
20 | * Platform info: NVIDIA GTX 1060(laptop), CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0
21 |
22 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160
23 | -------------|-------|-------|--------|---------|---------
24 | v1|1.62ms(618.53FPS)|4.83ms(207.06FPS)|13.67ms(73.18FPS)|30.01ms(33.32FPS)|121.15ms(8.25FPS)
25 |
26 | > CAUTION: The latency may vary even in the same setting.
27 |
28 | ### Accuracy on Brainwash Dataset
29 | We train v1 on the training set (10769 images with 81975 annotated heads) and evaluate on the test set (500 images with 5007
30 | annotated heads). This dataset is relatively simple due to monotonous scenario.
31 |
32 | #### Quantitative Results on Test Set
33 | Average Precision (AP) is used for measuring the accuracy. In detail, we use code [Object-Detection-Metrics](https://github.com/rafaelpadilla/Object-Detection-Metrics)
34 | for calculating the AP metric. The following table presents the results:
35 |
36 | Method|AP
37 | --------|------
38 | ReInspect, Lhungarian [1]|0.78
39 | FCHD [2]|0.70
40 | v1 (our)|0.91
41 |
42 | >[1] [End-to-end people detection in crowded scenes](https://arxiv.org/abs/1506.04878)
43 | >
44 | >[2] [FCHD: Fast and accurate head detection in crowded scenes](https://arxiv.org/abs/1809.08766)
45 |
46 | The v1 significantly outperforms the existing methods.
47 |
48 | #### Some Qualitative Results on Test Set
49 | 
50 | 
51 | 
52 | 
53 |
54 | ### User Instructions
55 | Please refer to [README in face_detection](../face_detection/README.md) for details.
56 |
57 | ### Data Download
58 | We provide original and packed data of brainwash dataset. We provide three ways to download the packed data:
59 | * [Baidu Yunpan](https://pan.baidu.com/s/1VdiXHhtw9aNaU1E9PhVwtQ) (pwd:zvma)
60 | * [MS OneDrive]
61 | * [Google Drive]
--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/evaluation_on_brainwash.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os
3 | import sys
4 | import cv2
5 | import math
6 | import re
7 |
8 | sys.path.append('..')
9 | # change the config as your need
10 | from config_farm import configuration_10_160_17L_4scales_v1 as cfg
11 | import mxnet
12 | from predict import Predict
13 |
14 |
15 | def generate_gt_files():
16 | txt_file_path = '/media/heyonghao/HYH-4T-WD/public_dataset/head_detection/brainwash/brainwash/brainwash_test.idl'
17 | gt_file_root = './brainwash_testset_gt_files_for_evaluation'
18 |
19 | if not os.path.exists(gt_file_root):
20 | os.makedirs(gt_file_root)
21 |
22 | fin = open(txt_file_path, 'r')
23 |
24 | counter = 0
25 | for line in fin:
26 | line = line.strip(';\n')
27 | im_path = re.findall('["](.*?)["]', line)[0]
28 |
29 | bbox_str_list = re.findall('[(](.*?)[)]', line)
30 | bbox_list = []
31 | for bbox_str in bbox_str_list:
32 | bbox_str = bbox_str.split(', ')
33 | xmin = int(float(bbox_str[0]))
34 | ymin = int(float(bbox_str[1]))
35 | xmax = int(float(bbox_str[2]))
36 | ymax = int(float(bbox_str[3]))
37 | bbox_list.append((xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
38 |
39 | if len(bbox_list) != 0:
40 | gt_file_name = im_path.replace('/', '_')
41 | gt_file_name = gt_file_name.replace('png', 'txt')
42 | fout = open(os.path.join(gt_file_root, gt_file_name), 'w')
43 | for bbox in bbox_list:
44 | line_str = 'head ' + str(bbox[0]) + ' ' + str(bbox[1]) + ' ' + str(bbox[2]) + ' ' + str(bbox[3])
45 | fout.write(line_str + '\n')
46 | fout.close()
47 | counter += 1
48 | print(counter)
49 | fin.close()
50 |
51 |
52 | def generate_predicted_files():
53 | # set the proper symbol file and model file
54 | symbol_file_path = '../symbol_farm/symbol_10_160_17L_4scales_v1_deploy.json'
55 | model_file_path = '../saved_model/configuration_10_160_17L_4scales_v1_2019-09-20-13-08-26/train_10_160_17L_4scales_v1_iter_800000.params'
56 | my_predictor = Predict(mxnet=mxnet,
57 | symbol_file_path=symbol_file_path,
58 | model_file_path=model_file_path,
59 | ctx=mxnet.gpu(0),
60 | receptive_field_list=cfg.param_receptive_field_list,
61 | receptive_field_stride=cfg.param_receptive_field_stride,
62 | bbox_small_list=cfg.param_bbox_small_list,
63 | bbox_large_list=cfg.param_bbox_large_list,
64 | receptive_field_center_start=cfg.param_receptive_field_center_start,
65 | num_output_scales=cfg.param_num_output_scales)
66 |
67 | # set the val root, the path should look like XXXX/WIDER_val/images
68 | txt_file_path = '/media/heyonghao/HYH-4T-WD/public_dataset/head_detection/brainwash/brainwash/brainwash_test.idl'
69 | image_root = '/media/heyonghao/HYH-4T-WD/public_dataset/head_detection/brainwash/brainwash'
70 | predicted_file_root = './brainwash_testset_predicted_files_for_evaluation_' + os.path.basename(model_file_path).split('.')[0]
71 |
72 | if not os.path.exists(predicted_file_root):
73 | os.makedirs(predicted_file_root)
74 |
75 | fin = open(txt_file_path, 'r')
76 |
77 | resize_scale = 1
78 | score_threshold = 0.05
79 | NMS_threshold = 0.6
80 | counter = 0
81 |
82 | for line in fin:
83 | line = line.strip(';\n')
84 | im_path = re.findall('["](.*?)["]', line)[0]
85 |
86 | im = cv2.imread(os.path.join(image_root, im_path), cv2.IMREAD_COLOR)
87 |
88 | bboxes = my_predictor.predict(im, resize_scale=resize_scale, score_threshold=score_threshold, top_k=10000, NMS_threshold=NMS_threshold)
89 |
90 | # for bbox in bboxes:
91 | # cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 1)
92 | # cv2.imshow('im',im)
93 | # cv2.waitKey()
94 | predicted_file_name = im_path.replace('/', '_')
95 | predicted_file_name = predicted_file_name.replace('png', 'txt')
96 | fout = open(os.path.join(predicted_file_root, predicted_file_name), 'w')
97 | for bbox in bboxes:
98 | fout.write('head %.03f %d %d %d %d' % (bbox[4] if bbox[4] <= 1 else 1, math.floor(bbox[0]), math.floor(bbox[1]), math.ceil(bbox[2] - bbox[0]), math.ceil(bbox[3] - bbox[1])) + '\n')
99 | fout.close()
100 | counter += 1
101 | print('[%d] is processed.' % counter)
102 |
103 |
104 | if __name__ == '__main__':
105 | # generate_gt_files()
106 | generate_predicted_files()
107 |
--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/predict.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import sys
3 | import os
4 | import numpy
5 | import cv2
6 |
7 |
8 | # empty data batch class for dynamical properties
9 | class DataBatch:
10 | pass
11 |
12 |
13 | def NMS(boxes, overlap_threshold):
14 | '''
15 |
16 | :param boxes: numpy nx5, n is the number of boxes, 0:4->x1, y1, x2, y2, 4->score
17 | :param overlap_threshold:
18 | :return:
19 | '''
20 | if boxes.shape[0] == 0:
21 | return boxes
22 |
23 | # if the bounding boxes integers, convert them to floats --
24 | # this is important since we'll be doing a bunch of divisions
25 | if boxes.dtype != numpy.float32:
26 | boxes = boxes.astype(numpy.float32)
27 |
28 | # initialize the list of picked indexes
29 | pick = []
30 | # grab the coordinates of the bounding boxes
31 | x1 = boxes[:, 0]
32 | y1 = boxes[:, 1]
33 | x2 = boxes[:, 2]
34 | y2 = boxes[:, 3]
35 | sc = boxes[:, 4]
36 | widths = x2 - x1
37 | heights = y2 - y1
38 |
39 | # compute the area of the bounding boxes and sort the bounding
40 | # boxes by the bottom-right y-coordinate of the bounding box
41 | area = heights * widths
42 | idxs = numpy.argsort(sc) # 从小到大排序
43 |
44 | # keep looping while some indexes still remain in the indexes list
45 | while len(idxs) > 0:
46 | # grab the last index in the indexes list and add the
47 | # index value to the list of picked indexes
48 | last = len(idxs) - 1
49 | i = idxs[last]
50 | pick.append(i)
51 |
52 | # compare secend highest score boxes
53 | xx1 = numpy.maximum(x1[i], x1[idxs[:last]])
54 | yy1 = numpy.maximum(y1[i], y1[idxs[:last]])
55 | xx2 = numpy.minimum(x2[i], x2[idxs[:last]])
56 | yy2 = numpy.minimum(y2[i], y2[idxs[:last]])
57 |
58 | # compute the width and height of the bo( box
59 | w = numpy.maximum(0, xx2 - xx1 + 1)
60 | h = numpy.maximum(0, yy2 - yy1 + 1)
61 |
62 | # compute the ratio of overlap
63 | overlap = (w * h) / area[idxs[:last]]
64 |
65 | # delete all indexes from the index list that have
66 | idxs = numpy.delete(idxs, numpy.concatenate(([last], numpy.where(overlap > overlap_threshold)[0])))
67 |
68 | # return only the bounding boxes that were picked using the
69 | # integer data type
70 | return boxes[pick]
71 |
72 |
73 | class Predict(object):
74 |
75 | def __init__(self,
76 | mxnet,
77 | symbol_file_path,
78 | model_file_path,
79 | ctx,
80 | receptive_field_list,
81 | receptive_field_stride,
82 | bbox_small_list,
83 | bbox_large_list,
84 | receptive_field_center_start,
85 | num_output_scales
86 | ):
87 | self.mxnet = mxnet
88 | self.symbol_file_path = symbol_file_path
89 | self.model_file_path = model_file_path
90 | self.ctx = ctx
91 |
92 | self.receptive_field_list = receptive_field_list
93 | self.receptive_field_stride = receptive_field_stride
94 | self.bbox_small_list = bbox_small_list
95 | self.bbox_large_list = bbox_large_list
96 | self.receptive_field_center_start = receptive_field_center_start
97 | self.num_output_scales = num_output_scales
98 | self.constant = [i / 2.0 for i in self.receptive_field_list]
99 | self.input_height = 480
100 | self.input_width = 640
101 | self.__load_model()
102 |
103 | def __load_model(self):
104 | # load symbol and parameters
105 | print('----> load symbol file: %s\n----> load model file: %s' % (self.symbol_file_path, self.model_file_path))
106 | if not os.path.exists(self.symbol_file_path):
107 | print('The symbol file does not exist!!!!')
108 | sys.exit(1)
109 | if not os.path.exists(self.model_file_path):
110 | print('The model file does not exist!!!!')
111 | sys.exit(1)
112 | self.symbol_net = self.mxnet.symbol.load(self.symbol_file_path)
113 | data_name = 'data'
114 | data_name_shape = (data_name, (1, 3, self.input_height, self.input_width))
115 | self.module = self.mxnet.module.Module(symbol=self.symbol_net,
116 | data_names=[data_name],
117 | label_names=None,
118 | context=self.ctx,
119 | work_load_list=None)
120 | self.module.bind(data_shapes=[data_name_shape],
121 | for_training=False)
122 |
123 | save_dict = self.mxnet.nd.load(self.model_file_path)
124 | self.arg_name_arrays = dict()
125 | self.arg_name_arrays['data'] = self.mxnet.nd.zeros((1, 3, self.input_height, self.input_width), self.ctx)
126 | self.aux_name_arrays = {}
127 | for k, v in save_dict.items():
128 | tp, name = k.split(':', 1)
129 | if tp == 'arg':
130 | self.arg_name_arrays.update({name: v.as_in_context(self.ctx)})
131 | if tp == 'aux':
132 | self.aux_name_arrays.update({name: v.as_in_context(self.ctx)})
133 | self.module.init_params(arg_params=self.arg_name_arrays,
134 | aux_params=self.aux_name_arrays,
135 | allow_missing=True)
136 | print('----> Model is loaded successfully.')
137 |
138 | def predict(self, image, resize_scale=1, score_threshold=0.8, top_k=100, NMS_threshold=0.3, NMS_flag=True, skip_scale_branch_list=[]):
139 |
140 | if image.ndim != 3 or image.shape[2] != 3:
141 | print('Only RGB images are supported.')
142 | return None
143 |
144 | bbox_collection = []
145 |
146 | shorter_side = min(image.shape[:2])
147 | if shorter_side * resize_scale < 128:
148 | resize_scale = float(128) / shorter_side
149 |
150 | input_image = cv2.resize(image, (0, 0), fx=resize_scale, fy=resize_scale)
151 |
152 | input_image = input_image.astype(dtype=numpy.float32)
153 | input_image = input_image[:, :, :, numpy.newaxis]
154 | input_image = input_image.transpose([3, 2, 0, 1])
155 |
156 | data_batch = DataBatch()
157 | data_batch.data = [self.mxnet.ndarray.array(input_image, self.ctx)]
158 |
159 | self.module.forward(data_batch=data_batch, is_train=False)
160 | results = self.module.get_outputs()
161 | outputs = []
162 | for output in results:
163 | outputs.append(output.asnumpy())
164 |
165 | for i in range(self.num_output_scales):
166 | if i in skip_scale_branch_list:
167 | continue
168 |
169 | score_map = numpy.squeeze(outputs[i * 2], (0, 1))
170 |
171 | # score_map_show = score_map * 255
172 | # score_map_show[score_map_show < 0] = 0
173 | # score_map_show[score_map_show > 255] = 255
174 | # cv2.imshow('score_map' + str(i), cv2.resize(score_map_show.astype(dtype=numpy.uint8), (0, 0), fx=2, fy=2))
175 | # cv2.waitKey()
176 |
177 | bbox_map = numpy.squeeze(outputs[i * 2 + 1], 0)
178 |
179 | RF_center_Xs = numpy.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * x for x in range(score_map.shape[1])])
180 | RF_center_Xs_mat = numpy.tile(RF_center_Xs, [score_map.shape[0], 1])
181 | RF_center_Ys = numpy.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * y for y in range(score_map.shape[0])])
182 | RF_center_Ys_mat = numpy.tile(RF_center_Ys, [score_map.shape[1], 1]).T
183 |
184 | x_lt_mat = RF_center_Xs_mat - bbox_map[0, :, :] * self.constant[i]
185 | y_lt_mat = RF_center_Ys_mat - bbox_map[1, :, :] * self.constant[i]
186 | x_rb_mat = RF_center_Xs_mat - bbox_map[2, :, :] * self.constant[i]
187 | y_rb_mat = RF_center_Ys_mat - bbox_map[3, :, :] * self.constant[i]
188 |
189 | x_lt_mat = x_lt_mat / resize_scale
190 | x_lt_mat[x_lt_mat < 0] = 0
191 | y_lt_mat = y_lt_mat / resize_scale
192 | y_lt_mat[y_lt_mat < 0] = 0
193 | x_rb_mat = x_rb_mat / resize_scale
194 | x_rb_mat[x_rb_mat > image.shape[1]] = image.shape[1]
195 | y_rb_mat = y_rb_mat / resize_scale
196 | y_rb_mat[y_rb_mat > image.shape[0]] = image.shape[0]
197 |
198 | select_index = numpy.where(score_map > score_threshold)
199 | for idx in range(select_index[0].size):
200 | bbox_collection.append((x_lt_mat[select_index[0][idx], select_index[1][idx]],
201 | y_lt_mat[select_index[0][idx], select_index[1][idx]],
202 | x_rb_mat[select_index[0][idx], select_index[1][idx]],
203 | y_rb_mat[select_index[0][idx], select_index[1][idx]],
204 | score_map[select_index[0][idx], select_index[1][idx]]))
205 |
206 | # NMS
207 | bbox_collection = sorted(bbox_collection, key=lambda item: item[-1], reverse=True)
208 | if len(bbox_collection) > top_k:
209 | bbox_collection = bbox_collection[0:top_k]
210 | bbox_collection_numpy = numpy.array(bbox_collection, dtype=numpy.float32)
211 |
212 | if NMS_flag:
213 | final_bboxes = NMS(bbox_collection_numpy, NMS_threshold)
214 | final_bboxes_ = []
215 | for i in range(final_bboxes.shape[0]):
216 | final_bboxes_.append((final_bboxes[i, 0], final_bboxes[i, 1], final_bboxes[i, 2], final_bboxes[i, 3], final_bboxes[i, 4]))
217 |
218 | return final_bboxes_
219 | else:
220 | return bbox_collection_numpy
221 |
222 |
223 | def run_prediction_pickle():
224 | from config_farm import configuration_10_160_17L_4scales_v1 as cfg
225 | import mxnet
226 |
227 | data_pickle_file_path = '../data_provider_farm/data_folder/data_list_brainwash_test.pkl'
228 | from data_provider_farm.pickle_provider import PickleProvider
229 | pickle_provider = PickleProvider(data_pickle_file_path)
230 | positive_index = pickle_provider.positive_index
231 | negative_index = pickle_provider.negative_index
232 | all_index = positive_index
233 | print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index)))
234 | # import random
235 | # random.shuffle(all_index)
236 |
237 | symbol_file_path = '../symbol_farm/symbol_10_160_17L_4scales_v1_deploy.json'
238 | model_file_path = '../saved_model/configuration_10_160_17L_4scales_v1_2019-09-20-13-08-26/train_10_160_17L_4scales_v1_iter_800000.params'
239 | my_predictor = Predict(mxnet=mxnet,
240 | symbol_file_path=symbol_file_path,
241 | model_file_path=model_file_path,
242 | ctx=mxnet.gpu(0),
243 | receptive_field_list=cfg.param_receptive_field_list,
244 | receptive_field_stride=cfg.param_receptive_field_stride,
245 | bbox_small_list=cfg.param_bbox_small_list,
246 | bbox_large_list=cfg.param_bbox_large_list,
247 | receptive_field_center_start=cfg.param_receptive_field_center_start,
248 | num_output_scales=cfg.param_num_output_scales)
249 |
250 | for idx in all_index:
251 | im, _, bboxes_gt = pickle_provider.read_by_index(idx)
252 |
253 | bboxes = my_predictor.predict(im, resize_scale=1, score_threshold=0.5, top_k=10000, NMS_threshold=0.6)
254 | for bbox in bboxes:
255 | cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
256 |
257 | cv2.imshow('im', im)
258 | key = cv2.waitKey()
259 | # if key & 0xFF == ord('s'):
260 | # cv2.imwrite('./test_images/' + str(idx) + '.jpg', im)
261 |
262 |
263 | if __name__ == '__main__':
264 | run_prediction_pickle()
265 |
266 |
--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/2.jpg
--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/247.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/247.jpg
--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/322.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/322.jpg
--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/342.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/342.jpg
--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/377.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/377.jpg
--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/411.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/411.jpg
--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/5.jpg
--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/7.jpg
--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/72.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/72.jpg
--------------------------------------------------------------------------------
/head_detection/config_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/config_farm/__init__.py
--------------------------------------------------------------------------------
/head_detection/data_iterator_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/data_iterator_farm/__init__.py
--------------------------------------------------------------------------------
/head_detection/data_provider_farm/pickle_provider.py:
--------------------------------------------------------------------------------
1 | '''
2 | This provider accepts an adapter, save dataset in pickle file and load all dataset to memory for data iterators
3 | '''
4 |
5 | import cv2
6 | import numpy
7 | import pickle
8 |
9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_provider import ProviderBaseclass
10 | from .text_list_adapter import TextListAdapter
11 |
12 |
13 | class PickleProvider(ProviderBaseclass):
14 | """
15 | This class provides methods to save and read data.
16 | By default, images are compressed using JPG format.
17 | If data_adapter is not None, it means saving data, or it is reading data
18 | """
19 |
20 | def __init__(self,
21 | pickle_file_path,
22 | encode_quality=90,
23 | data_adapter=None):
24 | ProviderBaseclass.__init__(self)
25 |
26 | if data_adapter: # write data
27 |
28 | self.data_adapter = data_adapter
29 | self.data = {}
30 | self.counter = 0
31 | self.pickle_file_path = pickle_file_path
32 |
33 | else: # read data
34 |
35 | self.data = pickle.load(open(pickle_file_path, 'rb'))
36 | # get positive and negative indeices
37 | self._positive_index = []
38 | self._negative_index = []
39 | for k, v in self.data.items():
40 | if v[1] == 0: # negative
41 | self._negative_index.append(k)
42 | else: # positive
43 | self._positive_index.append(k)
44 |
45 | self.compression_mode = '.jpg'
46 | self.encode_params = [cv2.IMWRITE_JPEG_QUALITY, encode_quality]
47 |
48 | @property
49 | def positive_index(self):
50 | return self._positive_index
51 |
52 | @property
53 | def negative_index(self):
54 | return self._negative_index
55 |
56 | def write(self):
57 |
58 | for data_item in self.data_adapter.get_one():
59 |
60 | temp_sample = []
61 | im, bboxes = data_item
62 | ret, buf = cv2.imencode(self.compression_mode, im, self.encode_params)
63 | if buf is None or buf.size == 0:
64 | print('buf is wrong.')
65 | continue
66 | if not ret:
67 | print('An error is occurred.')
68 | continue
69 | temp_sample.append(buf)
70 |
71 | if isinstance(bboxes, str): # 负样本
72 | temp_sample.append(0)
73 | temp_sample.append(int(bboxes))
74 | else:
75 | temp_sample.append(1)
76 | temp_sample.append(bboxes)
77 |
78 | self.data[self.counter] = temp_sample
79 | print('Successfully save the %d-th data item.' % self.counter)
80 | self.counter += 1
81 |
82 | pickle.dump(self.data, open(self.pickle_file_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
83 |
84 | def read_by_index(self, index):
85 | im_buf, flag, bboxes = self.data[index]
86 | im = cv2.imdecode(im_buf, cv2.IMREAD_COLOR)
87 | return im, flag, bboxes
88 |
89 |
90 | def write_file():
91 | data_list_file_path = './data_folder/data_list_brainwash_test.txt'
92 | adapter = TextListAdapter(data_list_file_path)
93 |
94 | pickle_file_path = './data_folder/data_list_brainwash_test.pkl'
95 | encode_quality = 90
96 | packer = PickleProvider(pickle_file_path, encode_quality, adapter)
97 | packer.write()
98 |
99 |
100 | def read_file():
101 | pickle_file_path = './data_folder/data_list_brainwash_test.pkl'
102 |
103 | provider = PickleProvider(pickle_file_path)
104 | positive_index = provider.positive_index
105 | negative_index = provider.negative_index
106 | print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index)))
107 | # all_index = positive_index+negative_index
108 | import random
109 | random.shuffle(positive_index)
110 |
111 | for i, index in enumerate(positive_index):
112 | im, flag, bboxes_numpy = provider.read_by_index(index)
113 | if isinstance(bboxes_numpy, numpy.ndarray):
114 | for n in range(bboxes_numpy.shape[0]):
115 | cv2.rectangle(im, (bboxes_numpy[n, 0], bboxes_numpy[n, 1]),
116 | (bboxes_numpy[n, 0] + bboxes_numpy[n, 2], bboxes_numpy[n, 1] + bboxes_numpy[n, 3]), (0, 255, 0), 1)
117 | cv2.imshow('im', im)
118 | cv2.waitKey()
119 |
120 |
121 | if __name__ == '__main__':
122 | # write_file()
123 | read_file()
124 |
--------------------------------------------------------------------------------
/head_detection/data_provider_farm/reformat_brainwash.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import os
3 | import cv2
4 | import json
5 | import math
6 | import re
7 |
8 | '''
9 | '''
10 |
11 |
12 | def generate_data_list():
13 | txt_file_path = '/media/heyonghao/HYH-4T-WD/public_dataset/head_detection/brainwash/brainwash/brainwash_test.idl'
14 | image_root = '/media/heyonghao/HYH-4T-WD/public_dataset/head_detection/brainwash/brainwash'
15 |
16 | list_file_path = './data_folder/data_list_brainwash_test.txt'
17 | if not os.path.exists(os.path.dirname(list_file_path)):
18 | os.makedirs(os.path.dirname(list_file_path))
19 | fin = open(txt_file_path, 'r')
20 | fout = open(list_file_path, 'w')
21 |
22 | counter = 0
23 | for line in fin:
24 | line = line.strip(';\n')
25 | im_path = re.findall('["](.*?)["]', line)[0]
26 | im_path = os.path.join(image_root, im_path)
27 | if not os.path.exists(im_path):
28 | print('im file does not exist : %s'%im_path)
29 | continue
30 | bbox_str_list = re.findall('[(](.*?)[)]', line)
31 | bbox_list = []
32 | for bbox_str in bbox_str_list:
33 | bbox_str = bbox_str.split(', ')
34 | xmin = int(float(bbox_str[0]))
35 | ymin = int(float(bbox_str[1]))
36 | xmax = int(float(bbox_str[2]))
37 | ymax = int(float(bbox_str[3]))
38 | bbox_list.append((xmin, ymin, xmax-xmin+1, ymax-ymin+1))
39 |
40 | if len(bbox_list) == 0:
41 | line_str = im_path+',0,0'
42 | fout.write(line_str+'\n')
43 | else:
44 | line_str = im_path+',1,'+str(len(bbox_list))
45 | for bbox in bbox_list:
46 | line_str += ','+str(bbox[0])+','+str(bbox[1])+','+str(bbox[2])+','+str(bbox[3])
47 | fout.write(line_str + '\n')
48 | counter += 1
49 | print(counter)
50 |
51 | fout.close()
52 | fin.close()
53 |
54 |
55 | def show_image():
56 | list_file_path = './data_folder/data_list_brainwash_test.txt'
57 |
58 | fin = open(list_file_path, 'r')
59 | lines = fin.readlines()
60 | fin.close()
61 |
62 | import random
63 | random.shuffle(lines)
64 | for line in lines:
65 | line = line.strip('\n').split(',')
66 |
67 | im = cv2.imread(line[0])
68 |
69 | bboxes = []
70 | num_bboxes = int(line[2])
71 | for i in range(num_bboxes):
72 | xmin = int(line[3 + i * 4])
73 | ymin = int(line[4 + i * 4])
74 | width = int(line[5 + i * 4])
75 | height = int(line[6 + i * 4])
76 | bboxes.append((xmin, ymin, xmin + width - 1, ymin + height - 1))
77 |
78 | for bbox in bboxes:
79 | cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 2)
80 |
81 | cv2.imshow('im', im)
82 | cv2.waitKey()
83 |
84 |
85 | def dataset_statistics():
86 | list_file_path = './data_folder/data_list_brainwash_test.txt'
87 |
88 | fin = open(list_file_path, 'r')
89 | lines = fin.readlines()
90 | fin.close()
91 |
92 | bin_size = 5
93 | longer_bin_dict = {}
94 | shorter_bin_dict = {}
95 | for line in lines:
96 | line = line.strip('\n').split(',')
97 | num_bboxes = int(line[2])
98 | for i in range(num_bboxes):
99 | width = int(line[5 + i * 4])
100 | height = int(line[6 + i * 4])
101 |
102 | longer_side = max(width, height)
103 | shorter_side = min(width, height)
104 |
105 | key = int(longer_side / bin_size)
106 | if key in longer_bin_dict:
107 | longer_bin_dict[key] += 1
108 | else:
109 | longer_bin_dict[key] = 1
110 |
111 | key = int(shorter_side / bin_size)
112 | if key in shorter_bin_dict:
113 | shorter_bin_dict[key] += 1
114 | else:
115 | shorter_bin_dict[key] = 1
116 |
117 | print('shorter side based statistics:')
118 | shorter_bin_dict_key_list = sorted(shorter_bin_dict)
119 | for k in shorter_bin_dict_key_list:
120 | v = shorter_bin_dict[k]
121 | print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v))
122 |
123 | print('longer side based statistics:')
124 | longer_bin_dict_key_list = sorted(longer_bin_dict)
125 | for k in longer_bin_dict_key_list:
126 | v = longer_bin_dict[k]
127 | print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v))
128 |
129 |
130 | if __name__ == '__main__':
131 | # generate_data_list()
132 | # show_image()
133 | dataset_statistics()
134 |
135 |
--------------------------------------------------------------------------------
/head_detection/data_provider_farm/text_list_adapter.py:
--------------------------------------------------------------------------------
1 | '''
2 | This adapter accepts a text as input which describes the annotated data.
3 | Each line in text are formatted as:
4 | [image absolute path],[pos/neg flag],[num of bboxes],[x1],[y1],[width1],[height1],[x2],[y2],[width2],[height2]......
5 | '''
6 |
7 | import cv2
8 | import numpy
9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_data_adapter import DataAdapterBaseclass
10 |
11 |
12 | class TextListAdapter(DataAdapterBaseclass):
13 |
14 | def __init__(self, data_list_file_path):
15 |
16 | DataAdapterBaseclass.__init__(self)
17 | fin = open(data_list_file_path, 'r')
18 | self.lines = fin.readlines()
19 | fin.close()
20 | self.line_counter = 0
21 |
22 | def __del__(self):
23 | pass
24 |
25 | def get_one(self):
26 | """
27 | This function use 'yield' to return samples
28 | """
29 | while self.line_counter < len(self.lines):
30 |
31 | line = self.lines[self.line_counter].strip('\n').split(',')
32 | if line[1] == '1': # 如果是正样本,需要校验bbox的个数是否一样
33 | assert len(line[3:]) == 4 * int(line[2])
34 |
35 | im = cv2.imread(line[0], cv2.IMREAD_UNCHANGED)
36 |
37 | if line[1] == '0':
38 | yield im, '0'
39 | self.line_counter += 1
40 | continue
41 |
42 | num_bboxes = int(line[2])
43 | bboxes = []
44 | for i in range(num_bboxes):
45 | x = float(line[3 + i * 4])
46 | y = float(line[3 + i * 4 + 1])
47 | width = float(line[3 + i * 4 + 2])
48 | height = float(line[3 + i * 4 + 3])
49 |
50 | bboxes.append([x, y, width, height])
51 |
52 | bboxes = numpy.array(bboxes, dtype=numpy.float32)
53 | yield im, bboxes
54 |
55 | self.line_counter += 1
56 |
57 |
58 | if __name__ == '__main__':
59 | pass
60 |
--------------------------------------------------------------------------------
/head_detection/inference_speed_evaluation/inference_speed_eval.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import sys
3 |
4 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python')
5 | import mxnet
6 |
7 | eval_with_mxnet_flag = False
8 | symbol_file_path = '/home/heyonghao/projects/ChasingHeadDetection/symbol_farm/symbol_10_160_17L_4scales_v1_deploy.json'
9 | input_shape = (1,3,2160,3840) # (1,3,240,320) (1,3,480,640) (1,3,720,1280) (1,3,1080,1920) (1,3,2160,3840)
10 | real_run_loops = 200
11 |
12 | if eval_with_mxnet_flag:
13 | from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_mxnet_cudnn import InferenceSpeedEval as InferenceSpeedEvalMXNet
14 |
15 | inferenceSpeedEvalMXNet = InferenceSpeedEvalMXNet(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, device_type='gpu', gpu_index=0)
16 | inferenceSpeedEvalMXNet.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops)
17 |
18 | else:
19 | from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_tensorrt_cudnn import InferenceSpeedEval as InferenceSpeedEvalTRT
20 |
21 | inferenceSpeedEvalTRT = InferenceSpeedEvalTRT(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape)
22 | inferenceSpeedEvalTRT.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops)
23 |
--------------------------------------------------------------------------------
/head_detection/metric_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/metric_farm/__init__.py
--------------------------------------------------------------------------------
/head_detection/metric_farm/metric_default.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy
4 | import mxnet
5 |
6 |
7 | class Metric:
8 | # 需要输入多少个loss,即scale个数
9 | def __init__(self, num_scales):
10 | self.sum_metric = [0.0 for i in range(num_scales * 2)]
11 | self.num_update = 0
12 | self.num_scales = num_scales
13 | self.num_nonzero = [1.0 for i in range(num_scales * 2)]
14 | self.scale_factor = 10000
15 |
16 | # it is expected that the shape is num*c*h*w
17 | def update(self, labels, preds): # 这里需要注意label里面item的顺序。要参考prefetching_dataiter
18 |
19 | for i in range(self.num_scales):
20 | mask = labels[i * 2] # 先mask
21 | label = labels[i * 2 + 1] # 后label
22 |
23 | score_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=0, end=2).asnumpy()
24 | bbox_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=2, end=6).asnumpy()
25 |
26 | label_bbox = mxnet.ndarray.slice_axis(label, axis=1, begin=2, end=6).asnumpy()
27 |
28 | pred_score = preds[i * 2].asnumpy()
29 | pred_bbox = preds[i * 2 + 1].asnumpy()
30 |
31 | loss_score = numpy.sum(pred_score * score_mask)
32 | loss_bbox = numpy.sum((label_bbox - pred_bbox) ** 2.0)
33 |
34 | self.num_nonzero[i * 2] += numpy.sum(score_mask[:, 0, :, :] > 0.5)
35 | self.num_nonzero[i * 2 + 1] += numpy.sum(bbox_mask > 0.5)
36 | self.sum_metric[i * 2] += loss_score
37 | self.sum_metric[i * 2 + 1] += loss_bbox
38 |
39 | self.num_update += 1
40 |
41 | def get(self):
42 | return_string_list = []
43 | for i in range(self.num_scales):
44 | return_string_list.append('CE_loss_score_' + str(i))
45 | return_string_list.append('SE_loss_bbox_' + str(i))
46 |
47 | return return_string_list, [m / self.num_nonzero[i] * self.scale_factor for i, m in enumerate(self.sum_metric)]
48 |
49 | def reset(self):
50 | self.sum_metric = [0.0 for i in range(self.num_scales * 2)]
51 | self.num_update = 0
52 | self.num_nonzero = [1.0 for i in range(self.num_scales * 2)]
53 |
--------------------------------------------------------------------------------
/head_detection/symbol_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/symbol_farm/__init__.py
--------------------------------------------------------------------------------
/head_detection/symbol_farm/symbol_structures.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/symbol_farm/symbol_structures.xlsx
--------------------------------------------------------------------------------
/license_plate_detection/README.md:
--------------------------------------------------------------------------------
1 | ## License Plate (LP) Detection
2 | We use the CCPD dataset introduced by paper [Towards End-to-End License Plate Detection and Recognition: A Large Dataset and Baseline](https://github.com/detectRecog/CCPD).
3 |
4 | ### Recent Update
5 | * `2019.10.02` model v1 for CCPD dataset is released.
6 |
7 | ### Brief Introduction to Model Version
8 | * v1 - is designed for CCPD dataset, covering LP scale [64, 512]. It has 3 branches. Please check
9 | `./symbol_farm/symbol_structures.xlsx` for details.
10 |
11 | ### Inference Latency
12 |
13 | * Platform info: NVIDIA RTX 2080TI, CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0
14 |
15 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160|7680×4320
16 | -------------|-------|-------|--------|---------|---------|---------
17 | v1|0.62ms(1613.18FPS)|1.02ms(978.64FPS)|2.10ms(476.80FPS)|4.21ms(237.32FPS)|15.68ms(63.78FPS)|62.82ms(15.92FPS)
18 |
19 | * Platform info: NVIDIA GTX 1060(laptop), CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0
20 |
21 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160
22 | -------------|-------|-------|--------|---------|---------
23 | v1|0.86ms(1167.71FPS)|1.83ms(546.00FPS)|4.45ms(224.63FPS)|9.68ms(103.27FPS)|37.59ms(26.60FPS)
24 |
25 | > CAUTION: The latency may vary even in the same setting.
26 |
27 | ### Accuracy on CCPD Dataset
28 | We use the latest CCPD dataset, containing 351,974 images (it is larger than the version described in the paper).
29 | **Since the train/test split is not provided by the paper, we randomly select 3/5 data for training and the rest is for test.**
30 | We train v1 on the training set (211,180 images) and evaluate on the test set (140,794 images).
31 |
32 | #### Quantitative Results on Test Set
33 | Average Precision (AP) is used for measuring the accuracy. In detail, we use code [Object-Detection-Metrics](https://github.com/rafaelpadilla/Object-Detection-Metrics)
34 | for calculating the AP metric. The following table presents the results:
35 |
36 | > `The comparison is not fair due to different traning/test split. This is for reference only!`
37 | >
38 | > `We make only one inference for each image in test. So some extremely large plates are failed to detect.`
39 |
40 | Method|AP
41 | --------|------
42 | RPnet [1]|0.945
43 | v1 (our)|0.989
44 |
45 | >[1] [Towards End-to-End License Plate Detection and Recognition: A Large Dataset and Baseline](http://openaccess.thecvf.com/content_ECCV_2018/papers/Zhenbo_Xu_Towards_End-to-End_License_ECCV_2018_paper.pdf)
46 |
47 | #### Some Qualitative Results on Test Set
48 | > Some challenging cases are presented.
49 |
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 |
58 | ### User Instructions
59 | Please refer to [README in face_detection](../face_detection/README.md) for details.
60 |
61 | ### Data Download
62 | Please visit [CCPD](https://github.com/detectRecog/CCPD) for accessing the data.
--------------------------------------------------------------------------------
/license_plate_detection/accuracy_evaluation/evaluation_on_CCPD.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os
3 | import sys
4 | import cv2
5 | import math
6 | import re
7 |
8 | sys.path.append('..')
9 | # change the config as your need
10 | from config_farm import configuration_64_512_16L_3scales_v1 as cfg
11 | import mxnet
12 | from predict import Predict
13 |
14 |
15 | def generate_gt_files():
16 | txt_file_path = '../data_provider_farm/data_folder/data_list_CCPD_test.txt'
17 | gt_file_root = './CCPD_testset_gt_files_for_evaluation'
18 |
19 | if not os.path.exists(gt_file_root):
20 | os.makedirs(gt_file_root)
21 |
22 | fin = open(txt_file_path, 'r')
23 |
24 | counter = 0
25 | for line in fin:
26 | line = line.strip('\n').split(',')
27 | im_path = os.path.basename(line[0])
28 | num_bboxes = int(line[2])
29 | if num_bboxes == 0:
30 | continue
31 | bbox_list = []
32 | for i in range(num_bboxes):
33 | xmin = int(float(line[3+i*4]))
34 | ymin = int(float(line[4+i*4]))
35 | width = int(float(line[5+i*4]))
36 | height = int(float(line[6+i*4]))
37 | bbox_list.append((xmin, ymin, width, height))
38 |
39 | gt_file_name = im_path.replace('jpg', 'txt')
40 |
41 | fout = open(os.path.join(gt_file_root, gt_file_name), 'w')
42 | for bbox in bbox_list:
43 | line_str = 'LP ' + str(bbox[0]) + ' ' + str(bbox[1]) + ' ' + str(bbox[2]) + ' ' + str(bbox[3])
44 | fout.write(line_str + '\n')
45 | fout.close()
46 | counter += 1
47 | print(counter)
48 | fin.close()
49 |
50 |
51 | def generate_predicted_files():
52 | # set the proper symbol file and model file
53 | symbol_file_path = '../symbol_farm/symbol_64_512_16L_3scales_v1_deploy.json'
54 | model_file_path = '../saved_model/configuration_64_512_16L_3scales_v1_2019-09-29-13-41-44/train_64_512_16L_3scales_v1_iter_600000.params'
55 | my_predictor = Predict(mxnet=mxnet,
56 | symbol_file_path=symbol_file_path,
57 | model_file_path=model_file_path,
58 | ctx=mxnet.gpu(0),
59 | receptive_field_list=cfg.param_receptive_field_list,
60 | receptive_field_stride=cfg.param_receptive_field_stride,
61 | bbox_small_list=cfg.param_bbox_small_list,
62 | bbox_large_list=cfg.param_bbox_large_list,
63 | receptive_field_center_start=cfg.param_receptive_field_center_start,
64 | num_output_scales=cfg.param_num_output_scales)
65 |
66 | # set the val root, the path should look like XXXX/WIDER_val/images
67 | txt_file_path = '../data_provider_farm/data_folder/data_list_CCPD_test.txt'
68 | predicted_file_root = './CCPD_testset_predicted_files_for_evaluation_' + os.path.basename(model_file_path).split('.')[0]
69 |
70 | if not os.path.exists(predicted_file_root):
71 | os.makedirs(predicted_file_root)
72 |
73 | fin = open(txt_file_path, 'r')
74 |
75 | resize_scale = 1
76 | score_threshold = 0.2
77 | NMS_threshold = 0.6
78 | counter = 0
79 |
80 | for line in fin:
81 | line = line.strip('\n').split(',')
82 |
83 | im = cv2.imread(line[0], cv2.IMREAD_COLOR)
84 |
85 | bboxes = my_predictor.predict(im, resize_scale=resize_scale, score_threshold=score_threshold, top_k=10000, NMS_threshold=NMS_threshold)
86 |
87 | # for bbox in bboxes:
88 | # cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 1)
89 | # cv2.imshow('im',im)
90 | # cv2.waitKey()
91 | predicted_file_name = os.path.basename(line[0]).replace('jpg', 'txt')
92 | fout = open(os.path.join(predicted_file_root, predicted_file_name), 'w')
93 | for bbox in bboxes:
94 | fout.write('LP %.03f %d %d %d %d' % (bbox[4] if bbox[4] <= 1 else 1, math.floor(bbox[0]), math.floor(bbox[1]), math.ceil(bbox[2] - bbox[0]), math.ceil(bbox[3] - bbox[1])) + '\n')
95 | fout.close()
96 | counter += 1
97 | print('[%d] is processed.' % counter)
98 |
99 |
100 | if __name__ == '__main__':
101 | # generate_gt_files()
102 | generate_predicted_files()
103 |
--------------------------------------------------------------------------------
/license_plate_detection/accuracy_evaluation/test_images/test1.jpg_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test1.jpg_result.jpg
--------------------------------------------------------------------------------
/license_plate_detection/accuracy_evaluation/test_images/test2.jpg_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test2.jpg_result.jpg
--------------------------------------------------------------------------------
/license_plate_detection/accuracy_evaluation/test_images/test3.jpg_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test3.jpg_result.jpg
--------------------------------------------------------------------------------
/license_plate_detection/accuracy_evaluation/test_images/test4.jpg_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test4.jpg_result.jpg
--------------------------------------------------------------------------------
/license_plate_detection/accuracy_evaluation/test_images/test5.jpg_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test5.jpg_result.jpg
--------------------------------------------------------------------------------
/license_plate_detection/accuracy_evaluation/test_images/test6.jpg_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test6.jpg_result.jpg
--------------------------------------------------------------------------------
/license_plate_detection/accuracy_evaluation/test_images/test7.jpg_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test7.jpg_result.jpg
--------------------------------------------------------------------------------
/license_plate_detection/config_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/config_farm/__init__.py
--------------------------------------------------------------------------------
/license_plate_detection/data_iterator_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/data_iterator_farm/__init__.py
--------------------------------------------------------------------------------
/license_plate_detection/data_provider_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/data_provider_farm/__init__.py
--------------------------------------------------------------------------------
/license_plate_detection/data_provider_farm/pickle_provider.py:
--------------------------------------------------------------------------------
1 | '''
2 | This provider accepts an adapter, save dataset in pickle file and load all dataset to memory for data iterators
3 | '''
4 |
5 | import cv2
6 | import numpy
7 | import pickle
8 |
9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_provider import ProviderBaseclass
10 | from .text_list_adapter import TextListAdapter
11 |
12 |
13 | class PickleProvider(ProviderBaseclass):
14 | """
15 | This class provides methods to save and read data.
16 | By default, images are compressed using JPG format.
17 | If data_adapter is not None, it means saving data, or it is reading data
18 | """
19 |
20 | def __init__(self,
21 | pickle_file_path,
22 | encode_quality=90,
23 | data_adapter=None):
24 | ProviderBaseclass.__init__(self)
25 |
26 | if data_adapter: # write data
27 |
28 | self.data_adapter = data_adapter
29 | self.data = {}
30 | self.counter = 0
31 | self.pickle_file_path = pickle_file_path
32 |
33 | else: # read data
34 |
35 | self.data = pickle.load(open(pickle_file_path, 'rb'))
36 | # get positive and negative indeices
37 | self._positive_index = []
38 | self._negative_index = []
39 | for k, v in self.data.items():
40 | if v[1] == 0: # negative
41 | self._negative_index.append(k)
42 | else: # positive
43 | self._positive_index.append(k)
44 |
45 | self.compression_mode = '.jpg'
46 | self.encode_params = [cv2.IMWRITE_JPEG_QUALITY, encode_quality]
47 |
48 | @property
49 | def positive_index(self):
50 | return self._positive_index
51 |
52 | @property
53 | def negative_index(self):
54 | return self._negative_index
55 |
56 | def write(self):
57 |
58 | for data_item in self.data_adapter.get_one():
59 |
60 | temp_sample = []
61 | im, bboxes = data_item
62 | ret, buf = cv2.imencode(self.compression_mode, im, self.encode_params)
63 | if buf is None or buf.size == 0:
64 | print('buf is wrong.')
65 | continue
66 | if not ret:
67 | print('An error is occurred.')
68 | continue
69 | temp_sample.append(buf)
70 |
71 | if isinstance(bboxes, str): # 负样本
72 | temp_sample.append(0)
73 | temp_sample.append(int(bboxes))
74 | else:
75 | temp_sample.append(1)
76 | temp_sample.append(bboxes)
77 |
78 | self.data[self.counter] = temp_sample
79 | print('Successfully save the %d-th data item.' % self.counter)
80 | self.counter += 1
81 |
82 | pickle.dump(self.data, open(self.pickle_file_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
83 |
84 | def read_by_index(self, index):
85 | im_buf, flag, bboxes = self.data[index]
86 | im = cv2.imdecode(im_buf, cv2.IMREAD_COLOR)
87 | return im, flag, bboxes
88 |
89 |
90 | def write_file():
91 | data_list_file_path = './data_folder/data_list_CCPD_train_debug.txt'
92 | adapter = TextListAdapter(data_list_file_path)
93 |
94 | pickle_file_path = './data_folder/data_list_CCPD_train_debug.pkl'
95 | encode_quality = 90
96 | packer = PickleProvider(pickle_file_path, encode_quality, adapter)
97 | packer.write()
98 |
99 |
100 | def read_file():
101 | pickle_file_path = './data_folder/data_list_CCPD_train_debug.pkl'
102 |
103 | provider = PickleProvider(pickle_file_path)
104 | positive_index = provider.positive_index
105 | negative_index = provider.negative_index
106 | print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index)))
107 | all_index = positive_index+negative_index
108 | import random
109 | random.shuffle(all_index)
110 |
111 | for i, index in enumerate(all_index):
112 | im, flag, bboxes_numpy = provider.read_by_index(index)
113 | if isinstance(bboxes_numpy, numpy.ndarray):
114 | for n in range(bboxes_numpy.shape[0]):
115 | cv2.rectangle(im, (bboxes_numpy[n, 0], bboxes_numpy[n, 1]),
116 | (bboxes_numpy[n, 0] + bboxes_numpy[n, 2], bboxes_numpy[n, 1] + bboxes_numpy[n, 3]), (0, 255, 0), 2)
117 | cv2.imshow('im', im)
118 | cv2.waitKey()
119 |
120 |
121 | if __name__ == '__main__':
122 | # write_file()
123 | read_file()
124 |
--------------------------------------------------------------------------------
/license_plate_detection/data_provider_farm/reformat_CCPD.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import os
3 | import cv2
4 | import json
5 | import math
6 | import random
7 |
8 |
9 | def annotation_from_name(file_name):
10 | file_name = file_name[:-4]
11 | name_split = file_name.split('-')
12 | location = name_split[2]
13 | location = location.split('_')
14 | left_top = location[0].split('&')
15 | right_bottom = location[1].split('&')
16 | x1 = int(left_top[0])
17 | y1 = int(left_top[1])
18 | x2 = int(right_bottom[0])
19 | y2 = int(right_bottom[1])
20 |
21 | return (x1, y1, x2-x1+1, y2-y1+1)
22 |
23 |
24 | def generate_data_list():
25 | image_roots = ['/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_base',
26 | '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_blur',
27 | '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_challenge',
28 | '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_db',
29 | '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_fn',
30 | '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_rotate',
31 | '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_tilt',
32 | '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_weather']
33 |
34 | train_list_file_path = './data_folder/data_list_CCPD_train.txt'
35 | test_list_file_path = './data_folder/data_list_CCPD_test.txt'
36 | if not os.path.exists(os.path.dirname(train_list_file_path)):
37 | os.makedirs(os.path.dirname(train_list_file_path))
38 | fout_train = open(train_list_file_path, 'w')
39 | fout_test = open(test_list_file_path, 'w')
40 |
41 | train_proportion = 0.6
42 | train_counter = 0
43 | test_counter = 0
44 | for root in image_roots:
45 | file_name_list = [name for name in os.listdir(root) if name.endswith('.jpg')]
46 | random.shuffle(file_name_list)
47 |
48 | file_name_list_train = file_name_list[:int(len(file_name_list)*train_proportion)]
49 | file_name_list_test = file_name_list[int(len(file_name_list)*train_proportion):]
50 |
51 | for file_name in file_name_list_train:
52 | location_annotation = annotation_from_name(file_name)
53 | line = os.path.join(root, file_name)+',1,1,'+str(location_annotation[0])+','+str(location_annotation[1])+','+str(location_annotation[2])+','+str(location_annotation[3])
54 | fout_train.write(line+'\n')
55 | train_counter += 1
56 | print(train_counter)
57 |
58 | for file_name in file_name_list_test:
59 | location_annotation = annotation_from_name(file_name)
60 | line = os.path.join(root, file_name)+',1,1,'+str(location_annotation[0])+','+str(location_annotation[1])+','+str(location_annotation[2])+','+str(location_annotation[3])
61 | fout_test.write(line+'\n')
62 | test_counter += 1
63 | print(test_counter)
64 |
65 | fout_train.close()
66 | fout_test.close()
67 |
68 |
69 | def show_image():
70 | list_file_path = './data_folder/data_list_CCPD_train.txt'
71 |
72 | fin = open(list_file_path, 'r')
73 | lines = fin.readlines()
74 | fin.close()
75 |
76 | import random
77 | random.shuffle(lines)
78 | for line in lines:
79 | line = line.strip('\n').split(',')
80 |
81 | im = cv2.imread(line[0])
82 |
83 | bboxes = []
84 | num_bboxes = int(line[2])
85 | for i in range(num_bboxes):
86 | xmin = int(line[3 + i * 4])
87 | ymin = int(line[4 + i * 4])
88 | width = int(line[5 + i * 4])
89 | height = int(line[6 + i * 4])
90 | bboxes.append((xmin, ymin, xmin + width - 1, ymin + height - 1))
91 |
92 | for bbox in bboxes:
93 | cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 2)
94 |
95 | cv2.imshow('im', im)
96 | cv2.waitKey()
97 |
98 |
99 | def dataset_statistics():
100 | list_file_path = './data_folder/data_list_CCPD_train.txt'
101 |
102 | fin = open(list_file_path, 'r')
103 | lines = fin.readlines()
104 | fin.close()
105 |
106 | bin_size = 8
107 | longer_bin_dict = {}
108 | shorter_bin_dict = {}
109 | counter_pos = 0
110 | counter_neg = 0
111 | for line in lines:
112 | line = line.strip('\n').split(',')
113 | if line[1] == '0':
114 | counter_neg += 1
115 | continue
116 | else:
117 | counter_pos += 1
118 | num_bboxes = int(line[2])
119 | for i in range(num_bboxes):
120 | width = int(line[5 + i * 4])
121 | height = int(line[6 + i * 4])
122 |
123 | longer_side = max(width, height)
124 | shorter_side = min(width, height)
125 |
126 | key = int(longer_side / bin_size)
127 | if key in longer_bin_dict:
128 | longer_bin_dict[key] += 1
129 | else:
130 | longer_bin_dict[key] = 1
131 |
132 | key = int(shorter_side / bin_size)
133 | if key in shorter_bin_dict:
134 | shorter_bin_dict[key] += 1
135 | else:
136 | shorter_bin_dict[key] = 1
137 |
138 | total_pedestrian = 0
139 | print('shorter side based statistics:')
140 | shorter_bin_dict_key_list = sorted(shorter_bin_dict)
141 | for k in shorter_bin_dict_key_list:
142 | v = shorter_bin_dict[k]
143 | total_pedestrian += v
144 | print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v))
145 |
146 | print('longer side based statistics:')
147 | longer_bin_dict_key_list = sorted(longer_bin_dict)
148 | for k in longer_bin_dict_key_list:
149 | v = longer_bin_dict[k]
150 | print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v))
151 |
152 | print('num pos: %d, num neg: %d' % (counter_pos, counter_neg))
153 | print('total LP: %d' % total_pedestrian)
154 |
155 |
156 | if __name__ == '__main__':
157 | # test_name2anno()
158 | # generate_data_list()
159 | # show_image()
160 | dataset_statistics()
161 |
--------------------------------------------------------------------------------
/license_plate_detection/data_provider_farm/text_list_adapter.py:
--------------------------------------------------------------------------------
1 | '''
2 | This adapter accepts a text as input which describes the annotated data.
3 | Each line in text are formatted as:
4 | [image absolute path],[pos/neg flag],[num of bboxes],[x1],[y1],[width1],[height1],[x2],[y2],[width2],[height2]......
5 | '''
6 |
7 | import cv2
8 | import numpy
9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_data_adapter import DataAdapterBaseclass
10 | import random
11 |
12 |
13 | class TextListAdapter(DataAdapterBaseclass):
14 |
15 | def __init__(self, data_list_file_path):
16 |
17 | DataAdapterBaseclass.__init__(self)
18 | fin = open(data_list_file_path, 'r')
19 | self.lines = fin.readlines()
20 | fin.close()
21 | self.line_counter = 0
22 |
23 | def __del__(self):
24 | pass
25 |
26 | def get_one(self):
27 | """
28 | This function use 'yield' to return samples
29 | """
30 | while self.line_counter < len(self.lines):
31 |
32 | line = self.lines[self.line_counter].strip('\n').split(',')
33 | if line[1] == '1': # 如果是正样本,需要校验bbox的个数是否一样
34 | assert len(line[3:]) == 4 * int(line[2])
35 |
36 | im = cv2.imread(line[0], cv2.IMREAD_UNCHANGED)
37 |
38 | if line[1] == '0':
39 | yield im, '0'
40 | self.line_counter += 1
41 | continue
42 |
43 | num_bboxes = int(line[2])
44 | bboxes = []
45 | for i in range(num_bboxes):
46 | x = float(line[3 + i * 4])
47 | y = float(line[3 + i * 4 + 1])
48 | width = float(line[3 + i * 4 + 2])
49 | height = float(line[3 + i * 4 + 3])
50 |
51 | bboxes.append([x, y, width, height])
52 |
53 | bboxes = numpy.array(bboxes, dtype=numpy.float32)
54 | yield im, bboxes
55 |
56 | # generate negative samples
57 | left = numpy.min(bboxes[:, 0])
58 | top = numpy.min(bboxes[:, 1])
59 | right = numpy.max(bboxes[:, 0] + bboxes[:, 2])
60 | bottom = numpy.max(bboxes[:, 1] + bboxes[:, 3])
61 | if random.random() < 0.25:
62 | im_crop = im[:, :int(left), :].copy()
63 | if im_crop.shape[1] > 100:
64 | yield im_crop, '0'
65 | if random.random() < 0.25:
66 | im_crop = im[:, int(right):, :].copy()
67 | if im_crop.shape[1] > 100:
68 | yield im_crop, '0'
69 | if random.random() < 0.25:
70 | im_crop = im[:int(top), :, :].copy()
71 | if im_crop.shape[0] > 100:
72 | yield im_crop, '0'
73 | if random.random() < 0.25:
74 | im_crop = im[int(bottom):, :, :].copy()
75 | if im_crop.shape[0] > 100:
76 | yield im_crop, '0'
77 |
78 | self.line_counter += 1
79 |
80 |
81 | if __name__ == '__main__':
82 | pass
83 |
--------------------------------------------------------------------------------
/license_plate_detection/inference_speed_evaluation/inference_speed_eval.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import sys
3 |
4 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python')
5 | import mxnet
6 |
7 | eval_with_mxnet_flag = False
8 | symbol_file_path = '/home/heyonghao/projects/ChasingLicensePlateDetection/symbol_farm/symbol_64_512_16L_3scales_v1_deploy.json'
9 | input_shape = (1,3,2160,3840) # (1,3,240,320) (1,3,480,640) (1,3,720,1280) (1,3,1080,1920) (1,3,2160,3840)
10 | real_run_loops = 200
11 |
12 | if eval_with_mxnet_flag:
13 | from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_mxnet_cudnn import InferenceSpeedEval as InferenceSpeedEvalMXNet
14 |
15 | inferenceSpeedEvalMXNet = InferenceSpeedEvalMXNet(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, device_type='gpu', gpu_index=0)
16 | inferenceSpeedEvalMXNet.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops)
17 |
18 | else:
19 | from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_tensorrt_cudnn import InferenceSpeedEval as InferenceSpeedEvalTRT
20 |
21 | inferenceSpeedEvalTRT = InferenceSpeedEvalTRT(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape)
22 | inferenceSpeedEvalTRT.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops)
23 |
--------------------------------------------------------------------------------
/license_plate_detection/metric_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/metric_farm/__init__.py
--------------------------------------------------------------------------------
/license_plate_detection/metric_farm/metric_default.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy
4 | import mxnet
5 |
6 |
7 | class Metric:
8 | # 需要输入多少个loss,即scale个数
9 | def __init__(self, num_scales):
10 | self.sum_metric = [0.0 for i in range(num_scales * 2)]
11 | self.num_update = 0
12 | self.num_scales = num_scales
13 | self.num_nonzero = [1.0 for i in range(num_scales * 2)]
14 | self.scale_factor = 10000
15 |
16 | # it is expected that the shape is num*c*h*w
17 | def update(self, labels, preds): # 这里需要注意label里面item的顺序。要参考prefetching_dataiter
18 |
19 | for i in range(self.num_scales):
20 | mask = labels[i * 2] # 先mask
21 | label = labels[i * 2 + 1] # 后label
22 |
23 | score_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=0, end=2).asnumpy()
24 | bbox_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=2, end=6).asnumpy()
25 |
26 | label_bbox = mxnet.ndarray.slice_axis(label, axis=1, begin=2, end=6).asnumpy()
27 |
28 | pred_score = preds[i * 2].asnumpy()
29 | pred_bbox = preds[i * 2 + 1].asnumpy()
30 |
31 | loss_score = numpy.sum(pred_score * score_mask)
32 | loss_bbox = numpy.sum((label_bbox - pred_bbox) ** 2.0)
33 |
34 | self.num_nonzero[i * 2] += numpy.sum(score_mask[:, 0, :, :] > 0.5)
35 | self.num_nonzero[i * 2 + 1] += numpy.sum(bbox_mask > 0.5)
36 | self.sum_metric[i * 2] += loss_score
37 | self.sum_metric[i * 2 + 1] += loss_bbox
38 |
39 | self.num_update += 1
40 |
41 | def get(self):
42 | return_string_list = []
43 | for i in range(self.num_scales):
44 | return_string_list.append('CE_loss_score_' + str(i))
45 | return_string_list.append('SE_loss_bbox_' + str(i))
46 |
47 | return return_string_list, [m / self.num_nonzero[i] * self.scale_factor for i, m in enumerate(self.sum_metric)]
48 |
49 | def reset(self):
50 | self.sum_metric = [0.0 for i in range(self.num_scales * 2)]
51 | self.num_update = 0
52 | self.num_nonzero = [1.0 for i in range(self.num_scales * 2)]
53 |
--------------------------------------------------------------------------------
/license_plate_detection/symbol_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/symbol_farm/__init__.py
--------------------------------------------------------------------------------
/license_plate_detection/symbol_farm/symbol_structures.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/symbol_farm/symbol_structures.xlsx
--------------------------------------------------------------------------------
/pedestrian_detection/README.md:
--------------------------------------------------------------------------------
1 | ## Pedestrian Detection
2 | We plan to use [Caltech Pedestrian Dataset](http://www.vision.caltech.edu/Image_Datasets/CaltechPedestrians/index.html)
3 | with [new annotations](http://www.vision.caltech.edu/Image_Datasets/CaltechPedestrians/index.html),
4 | [CityPersons](https://bitbucket.org/shanshanzhang/citypersons) (a part of [CityScapes](https://www.cityscapes-dataset.com/)) and
5 | [KITTI](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=2d) for benchmarking.
6 |
7 | ### Recent Update
8 | * `2019.09.18` preview version of model v1 for Caltech Pedestrian Dataset is released.
9 |
10 | ### Brief Introduction to Model Version
11 | * v1 - is designed for Caltech Pedestrian Dataset, covering pedestrian scale [30, 320]. It has 4 branches. Please check
12 | `./symbol_farm/symbol_structures.xlsx` for details.
13 |
14 | ### Inference Latency
15 | * Platform info: NVIDIA Jetson NANO, CUDA 10.0, CUDNN 7.5.0, TensorRT 5.1.6
16 |
17 | Model Version|160×140|320×240|640×480|1280×720
18 | -------------|-------|-------|-------|--------
19 | v1|6.90ms(144.83FPS)|11.87ms(84.24FPS)|36.95ms(27.06FPS)|106.23ms(9.41FPS)
20 | v2|-|-|-|-
21 |
22 | * Platform info: NVIDIA Jetson TX2, CUDA 10.0, CUDNN 7.5.0, TensorRT 5.1.6 (power mode: MAXN)
23 |
24 | Model Version|160×140|320×240|640×480|1280×720|1920×1080
25 | -------------|-------|-------|-------|--------|---------
26 | v1|3.63ms(275.43FPS)|6.80ms(147.36FPS)|15.87ms(63.01FPS)|43.33ms(23.08FPS)|93.93ms(10.65FPS)
27 | v2|-|-|-|-|-
28 |
29 |
30 | * Platform info: NVIDIA RTX 2080TI, CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0
31 |
32 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160|7680×4320
33 | -------------|-------|-------|--------|---------|---------|---------
34 | v1|1.01ms(985.71FPS)|1.55ms(644.93FPS)|3.26ms(306.77FPS)|6.50ms(153.76FPS)|24.58ms(40.68FPS)|99.71ms(10.03FPS)
35 | v2|-|-|-|-|-|-
36 |
37 | * Platform info: NVIDIA GTX 1060(laptop), CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0
38 |
39 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160
40 | -------------|-------|-------|--------|---------|---------
41 | v1|1.25ms(800.00FPS)|2.93ms(341.80FPS)|7.46ms(134.08FPS)|16.03ms(62.39FPS)|62.80ms(15.92FPS)
42 | v2|-|-|-|-|-
43 |
44 | > CAUTION: The latency may vary even in the same setting.
45 |
46 | ### Accuracy on Caltech Pedestrian Dataset
47 | After investigating the data, we found that Caltech Pedestrian Dataset is not well annotated, even giving the
48 | new annotations (not annotated, not aligned well, the highly occluded are annotated). The final data used for training:
49 | 1559 pos images (at least one pedestrian inside), 2691 neg images; 4786 pedestrian in total; the longer side of bboxes
50 | varies from 10 pixels to 500 pixels.
51 |
52 | Download links for packed training and test sets:
53 | * [Baidu Yunpan](https://pan.baidu.com/s/1SvoSeg5thFHDDwZc9gh09A) (pwd:8omv)
54 | * [MS OneDrive](https://1drv.ms/u/s!Av9h0YMgxdaSinO2G1DT-yPWkKc6?e=elsea6)
55 | * [Google Drive](https://drive.google.com/open?id=1ICNAEfLa2YHJvxE6_YZYAA8Cyl1N1kAD)
56 |
57 | #### Quantitative Results on Test Set
58 | Currently, the quantitative results are not prepared well. We will release later.
59 |
60 | #### Some Qualitative Results on Test Set
61 | **(we found that false positives are often appear in the small scales, probably due to noisy training instances. For large scales, v1 performs well.)**
62 |
63 | 
64 | 
65 | 
66 | 
67 |
68 | > To play with the trained v1 model, please check `./accuracy_evaluation/predict.py`.
69 | ### User Instructions
70 | Please refer to [README in face_detection](../face_detection/README.md) for details.
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/1064.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1064.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/1081.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1081.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/1104.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1104.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/1199.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1199.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/1212.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1212.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/1461.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1461.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/2210.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/2210.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/2221.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/2221.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/2396.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/2396.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/2407.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/2407.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/2756.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/2756.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/3043.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3043.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/326.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/326.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/3368.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3368.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/3812.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3812.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/3914.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3914.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/3981.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3981.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/3988.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3988.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/877.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/877.jpg
--------------------------------------------------------------------------------
/pedestrian_detection/config_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/config_farm/__init__.py
--------------------------------------------------------------------------------
/pedestrian_detection/data_iterator_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/data_iterator_farm/__init__.py
--------------------------------------------------------------------------------
/pedestrian_detection/data_provider_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/data_provider_farm/__init__.py
--------------------------------------------------------------------------------
/pedestrian_detection/data_provider_farm/pickle_provider.py:
--------------------------------------------------------------------------------
1 | '''
2 | This provider accepts an adapter, save dataset in pickle file and load all dataset to memory for data iterators
3 | '''
4 |
5 | import cv2
6 | import numpy
7 | import pickle
8 |
9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_provider import ProviderBaseclass
10 | from .text_list_adapter import TextListAdapter
11 |
12 |
13 | class PickleProvider(ProviderBaseclass):
14 | """
15 | This class provides methods to save and read data.
16 | By default, images are compressed using JPG format.
17 | If data_adapter is not None, it means saving data, or it is reading data
18 | """
19 |
20 | def __init__(self,
21 | pickle_file_path,
22 | encode_quality=90,
23 | data_adapter=None):
24 | ProviderBaseclass.__init__(self)
25 |
26 | if data_adapter: # write data
27 |
28 | self.data_adapter = data_adapter
29 | self.data = {}
30 | self.counter = 0
31 | self.pickle_file_path = pickle_file_path
32 |
33 | else: # read data
34 |
35 | self.data = pickle.load(open(pickle_file_path, 'rb'))
36 | # get positive and negative indeices
37 | self._positive_index = []
38 | self._negative_index = []
39 | for k, v in self.data.items():
40 | if v[1] == 0: # negative
41 | self._negative_index.append(k)
42 | else: # positive
43 | self._positive_index.append(k)
44 |
45 | self.compression_mode = '.jpg'
46 | self.encode_params = [cv2.IMWRITE_JPEG_QUALITY, encode_quality]
47 |
48 | @property
49 | def positive_index(self):
50 | return self._positive_index
51 |
52 | @property
53 | def negative_index(self):
54 | return self._negative_index
55 |
56 | def write(self):
57 |
58 | for data_item in self.data_adapter.get_one():
59 |
60 | temp_sample = []
61 | im, bboxes = data_item
62 | ret, buf = cv2.imencode(self.compression_mode, im, self.encode_params)
63 | if buf is None or buf.size == 0:
64 | print('buf is wrong.')
65 | continue
66 | if not ret:
67 | print('An error is occurred.')
68 | continue
69 | temp_sample.append(buf)
70 |
71 | if isinstance(bboxes, str): # 负样本
72 | temp_sample.append(0)
73 | temp_sample.append(int(bboxes))
74 | else:
75 | temp_sample.append(1)
76 | temp_sample.append(bboxes)
77 |
78 | self.data[self.counter] = temp_sample
79 | print('Successfully save the %d-th data item.' % self.counter)
80 | self.counter += 1
81 |
82 | pickle.dump(self.data, open(self.pickle_file_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
83 |
84 | def read_by_index(self, index):
85 | im_buf, flag, bboxes = self.data[index]
86 | im = cv2.imdecode(im_buf, cv2.IMREAD_COLOR)
87 | return im, flag, bboxes
88 |
89 |
90 | def write_file():
91 | data_list_file_path = './data_folder/data_list_caltech_test.txt'
92 | adapter = TextListAdapter(data_list_file_path)
93 |
94 | pickle_file_path = './data_folder/data_list_caltech_test.pkl'
95 | encode_quality = 90
96 | packer = PickleProvider(pickle_file_path, encode_quality, adapter)
97 | packer.write()
98 |
99 |
100 | def read_file():
101 | pickle_file_path = './data_folder/data_list_caltech_test.pkl'
102 |
103 | provider = PickleProvider(pickle_file_path)
104 | positive_index = provider.positive_index
105 | negative_index = provider.negative_index
106 | print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index)))
107 | # all_index = positive_index+negative_index
108 | import random
109 | random.shuffle(positive_index)
110 |
111 | for i, index in enumerate(positive_index):
112 | im, flag, bboxes_numpy = provider.read_by_index(index)
113 | if isinstance(bboxes_numpy, numpy.ndarray):
114 | for n in range(bboxes_numpy.shape[0]):
115 | cv2.rectangle(im, (bboxes_numpy[n, 0], bboxes_numpy[n, 1]),
116 | (bboxes_numpy[n, 0] + bboxes_numpy[n, 2], bboxes_numpy[n, 1] + bboxes_numpy[n, 3]), (0, 255, 0), 2)
117 | cv2.imshow('im', im)
118 | cv2.waitKey()
119 |
120 |
121 | if __name__ == '__main__':
122 | # write_file()
123 | read_file()
124 |
--------------------------------------------------------------------------------
/pedestrian_detection/data_provider_farm/reformat_caltech.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import os
3 | import cv2
4 | import json
5 | import math
6 |
7 |
8 | def generate_data_list():
9 | annotation_root = '/media/heyonghao/HYH-4T-WD/public_dataset/Caltech/Caltech_new_annotations/anno_test_1xnew'
10 | image_root = '/media/heyonghao/HYH-4T-WD/public_dataset/Caltech/Caltech_data/extracted_data'
11 |
12 | list_file_path = './data_folder/data_list_caltech_test.txt'
13 | if not os.path.exists(os.path.dirname(list_file_path)):
14 | os.makedirs(os.path.dirname(list_file_path))
15 | fout = open(list_file_path, 'w')
16 |
17 | counter = 0
18 | for parent, dirnames, filenames in os.walk(annotation_root):
19 | for filename in filenames:
20 | if not filename.endswith('.txt'):
21 | continue
22 |
23 | filename_splits = filename[:-4].split('_')
24 | set_name = filename_splits[0]
25 | seq_name = filename_splits[1]
26 | img_name = filename_splits[2]
27 |
28 | img_path = os.path.join(image_root, set_name, seq_name, 'images', img_name)
29 | if not os.path.exists(img_path):
30 | print('The corresponding image does not exist! [%s]' % img_path)
31 | continue
32 |
33 | line = img_path
34 |
35 | fin_anno = open(os.path.join(parent, filename), 'r')
36 |
37 | bbox_list = []
38 | for i, anno in enumerate(fin_anno):
39 | if i == 0:
40 | continue
41 | anno = anno.strip('\n').split(' ')
42 | if anno[0] != 'person':
43 | continue
44 | x = math.floor(float(anno[1]))
45 | y = math.floor(float(anno[2]))
46 | width = math.ceil(float(anno[3]))
47 | height = math.ceil(float(anno[4]))
48 |
49 | width_vis = math.ceil(float(anno[8]))
50 | height_vis = math.ceil(float(anno[9]))
51 |
52 | if (width_vis*height_vis)/(width*height) < 0.2:
53 | continue
54 |
55 | bbox_list.append((x, y, width, height))
56 | if len(bbox_list) == 0:
57 | line += ',0,0'
58 | fout.write(line + '\n')
59 | else:
60 | bbox_line = ''
61 | for bbox in bbox_list:
62 | bbox_line += ',' + str(bbox[0]) + ',' + str(bbox[1]) + ',' + str(bbox[2]) + ',' + str(bbox[3])
63 | line += ',1,' + str(len(bbox_list)) + bbox_line
64 | fout.write(line + '\n')
65 | counter += 1
66 | print(counter)
67 |
68 | fout.close()
69 |
70 |
71 | def show_image():
72 | list_file_path = './data_folder/data_list_caltech_test.txt'
73 |
74 | fin = open(list_file_path, 'r')
75 | lines = fin.readlines()
76 | fin.close()
77 |
78 | import random
79 | random.shuffle(lines)
80 | for line in lines:
81 | line = line.strip('\n').split(',')
82 |
83 | im = cv2.imread(line[0])
84 |
85 | bboxes = []
86 | num_bboxes = int(line[2])
87 | for i in range(num_bboxes):
88 | xmin = int(line[3 + i * 4])
89 | ymin = int(line[4 + i * 4])
90 | width = int(line[5 + i * 4])
91 | height = int(line[6 + i * 4])
92 | bboxes.append((xmin, ymin, xmin + width - 1, ymin + height - 1))
93 |
94 | for bbox in bboxes:
95 | cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 2)
96 |
97 | cv2.imshow('im', im)
98 | cv2.waitKey()
99 |
100 |
101 | def dataset_statistics():
102 | list_file_path = './data_folder/data_list_caltech_test.txt'
103 |
104 | fin = open(list_file_path, 'r')
105 | lines = fin.readlines()
106 | fin.close()
107 |
108 | bin_size = 10
109 | longer_bin_dict = {}
110 | shorter_bin_dict = {}
111 | counter_pos = 0
112 | counter_neg = 0
113 | for line in lines:
114 | line = line.strip('\n').split(',')
115 | if line[1] == '0':
116 | counter_neg += 1
117 | continue
118 | else:
119 | counter_pos += 1
120 | num_bboxes = int(line[2])
121 | for i in range(num_bboxes):
122 | width = int(line[5 + i * 4])
123 | height = int(line[6 + i * 4])
124 |
125 | longer_side = max(width, height)
126 | shorter_side = min(width, height)
127 |
128 | key = int(longer_side / bin_size)
129 | if key in longer_bin_dict:
130 | longer_bin_dict[key] += 1
131 | else:
132 | longer_bin_dict[key] = 1
133 |
134 | key = int(shorter_side / bin_size)
135 | if key in shorter_bin_dict:
136 | shorter_bin_dict[key] += 1
137 | else:
138 | shorter_bin_dict[key] = 1
139 |
140 | total_pedestrian = 0
141 | print('shorter side based statistics:')
142 | shorter_bin_dict_key_list = sorted(shorter_bin_dict)
143 | for k in shorter_bin_dict_key_list:
144 | v = shorter_bin_dict[k]
145 | total_pedestrian += v
146 | print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v))
147 |
148 | print('longer side based statistics:')
149 | longer_bin_dict_key_list = sorted(longer_bin_dict)
150 | for k in longer_bin_dict_key_list:
151 | v = longer_bin_dict[k]
152 | print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v))
153 |
154 | print('num pos: %d, num neg: %d' % (counter_pos, counter_neg))
155 | print('total pedestrian: %d' % total_pedestrian)
156 |
157 |
158 | if __name__ == '__main__':
159 | # generate_data_list()
160 | show_image()
161 | # dataset_statistics()
162 |
--------------------------------------------------------------------------------
/pedestrian_detection/data_provider_farm/text_list_adapter.py:
--------------------------------------------------------------------------------
1 | '''
2 | This adapter accepts a text as input which describes the annotated data.
3 | Each line in text are formatted as:
4 | [image absolute path],[pos/neg flag],[num of bboxes],[x1],[y1],[width1],[height1],[x2],[y2],[width2],[height2]......
5 | '''
6 |
7 | import cv2
8 | import numpy
9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_data_adapter import DataAdapterBaseclass
10 |
11 |
12 | class TextListAdapter(DataAdapterBaseclass):
13 |
14 | def __init__(self, data_list_file_path):
15 |
16 | DataAdapterBaseclass.__init__(self)
17 | fin = open(data_list_file_path, 'r')
18 | self.lines = fin.readlines()
19 | fin.close()
20 | self.line_counter = 0
21 |
22 | def __del__(self):
23 | pass
24 |
25 | def get_one(self):
26 | """
27 | This function use 'yield' to return samples
28 | """
29 | while self.line_counter < len(self.lines):
30 |
31 | line = self.lines[self.line_counter].strip('\n').split(',')
32 | if line[1] == '1': #
33 | assert len(line[3:]) == 4 * int(line[2])
34 |
35 | im = cv2.imread(line[0], cv2.IMREAD_UNCHANGED)
36 |
37 | if line[1] == '0':
38 | yield im, '0'
39 | self.line_counter += 1
40 | continue
41 |
42 | num_bboxes = int(line[2])
43 | bboxes = []
44 | for i in range(num_bboxes):
45 | x = float(line[3 + i * 4])
46 | y = float(line[3 + i * 4 + 1])
47 | width = float(line[3 + i * 4 + 2])
48 | height = float(line[3 + i * 4 + 3])
49 |
50 | bboxes.append([x, y, width, height])
51 |
52 | bboxes = numpy.array(bboxes, dtype=numpy.float32)
53 | yield im, bboxes
54 |
55 | self.line_counter += 1
56 |
57 |
58 | if __name__ == '__main__':
59 | pass
60 |
--------------------------------------------------------------------------------
/pedestrian_detection/inference_speed_evaluation/inference_speed_eval.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import sys
3 |
4 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python')
5 | import mxnet
6 |
7 | eval_with_mxnet_flag = False
8 | symbol_file_path = '/home/heyonghao/projects/ChasingPedestrainDetection/symbol_farm/symbol_30_320_20L_4scales_v1_deploy.json'
9 | input_shape = (1, 3, 2160, 3840) # (1,3,240,320) (1,3,480,640) (1,3,720,1280) (1,3,1080,1920) (1,3,2160,3840)
10 | real_run_loops = 200
11 |
12 | if eval_with_mxnet_flag:
13 | from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_mxnet_cudnn import InferenceSpeedEval as InferenceSpeedEvalMXNet
14 |
15 | inferenceSpeedEvalMXNet = InferenceSpeedEvalMXNet(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, device_type='gpu', gpu_index=0)
16 | inferenceSpeedEvalMXNet.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops)
17 |
18 | else:
19 | from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_tensorrt_cudnn import InferenceSpeedEval as InferenceSpeedEvalTRT
20 |
21 | inferenceSpeedEvalTRT = InferenceSpeedEvalTRT(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape)
22 | inferenceSpeedEvalTRT.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops)
23 |
--------------------------------------------------------------------------------
/pedestrian_detection/metric_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/metric_farm/__init__.py
--------------------------------------------------------------------------------
/pedestrian_detection/metric_farm/metric_default.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy
4 | import mxnet
5 |
6 |
7 | class Metric:
8 | # 需要输入多少个loss,即scale个数
9 | def __init__(self, num_scales):
10 | self.sum_metric = [0.0 for i in range(num_scales * 2)]
11 | self.num_update = 0
12 | self.num_scales = num_scales
13 | self.num_nonzero = [1.0 for i in range(num_scales * 2)]
14 | self.scale_factor = 10000
15 |
16 | # it is expected that the shape is num*c*h*w
17 | def update(self, labels, preds): # 这里需要注意label里面item的顺序。要参考prefetching_dataiter
18 |
19 | for i in range(self.num_scales):
20 | mask = labels[i * 2] # 先mask
21 | label = labels[i * 2 + 1] # 后label
22 |
23 | score_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=0, end=2).asnumpy()
24 | bbox_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=2, end=6).asnumpy()
25 |
26 | label_bbox = mxnet.ndarray.slice_axis(label, axis=1, begin=2, end=6).asnumpy()
27 |
28 | pred_score = preds[i * 2].asnumpy()
29 | pred_bbox = preds[i * 2 + 1].asnumpy()
30 |
31 | loss_score = numpy.sum(pred_score * score_mask)
32 | loss_bbox = numpy.sum((label_bbox - pred_bbox) ** 2.0)
33 |
34 | self.num_nonzero[i * 2] += numpy.sum(score_mask[:, 0, :, :] > 0.5)
35 | self.num_nonzero[i * 2 + 1] += numpy.sum(bbox_mask > 0.5)
36 | self.sum_metric[i * 2] += loss_score
37 | self.sum_metric[i * 2 + 1] += loss_bbox
38 |
39 | self.num_update += 1
40 |
41 | def get(self):
42 | return_string_list = []
43 | for i in range(self.num_scales):
44 | return_string_list.append('CE_loss_score_' + str(i))
45 | return_string_list.append('SE_loss_bbox_' + str(i))
46 |
47 | return return_string_list, [m / self.num_nonzero[i] * self.scale_factor for i, m in enumerate(self.sum_metric)]
48 |
49 | def reset(self):
50 | self.sum_metric = [0.0 for i in range(self.num_scales * 2)]
51 | self.num_update = 0
52 | self.num_nonzero = [1.0 for i in range(self.num_scales * 2)]
53 |
--------------------------------------------------------------------------------
/pedestrian_detection/symbol_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/symbol_farm/__init__.py
--------------------------------------------------------------------------------
/pedestrian_detection/symbol_farm/symbol_structures.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/symbol_farm/symbol_structures.xlsx
--------------------------------------------------------------------------------
/vehicle_detection/README.md:
--------------------------------------------------------------------------------
1 | Coming soon...
--------------------------------------------------------------------------------