├── .gitignore ├── ChasingTrainFramework_GeneralOneClassDetection ├── README.md ├── __init__.py ├── data_iterator_base │ ├── __init__.py │ └── data_batch.py ├── data_provider_base │ ├── __init__.py │ ├── base_data_adapter.py │ ├── base_provider.py │ ├── pickle_provider.py │ └── text_list_adapter.py ├── image_augmentation │ ├── __init__.py │ └── augmentor.py ├── inference_speed_eval │ ├── __init__.py │ ├── inference_speed_eval_with_mxnet_cudnn.py │ └── inference_speed_eval_with_tensorrt_cudnn.py ├── logging_GOCD.py ├── loss_layer_farm │ ├── __init__.py │ ├── cross_entropy_with_focal_loss_for_one_class_detection.py │ ├── cross_entropy_with_hnm_for_one_class_detection.py │ ├── loss.py │ ├── mean_squared_error_with_hnm_for_one_class_detection.py │ └── mean_squared_error_with_ohem_for_one_class_detection.py ├── solver_GOCD.py └── train_GOCD.py ├── LICENSE ├── README.md ├── face_detection ├── README.md ├── accuracy_evaluation │ ├── evaluation_on_fddb.py │ ├── evaluation_on_widerface.py │ └── predict.py ├── config_farm │ ├── __init__.py │ ├── configuration_10_320_20L_5scales_v2.py │ └── configuration_10_560_25L_8scales_v1.py ├── data_iterator_farm │ ├── __init__.py │ ├── multithread_dataiter_for_cross_entropy_v1.py │ └── multithread_dataiter_for_cross_entropy_v2.py ├── data_provider_farm │ ├── __init__.py │ ├── data_folder │ │ └── .gitkeep │ ├── pickle_provider.py │ └── text_list_adapter.py ├── demo │ └── demo.py ├── deploy_tensorrt │ ├── README.md │ ├── debug_image │ │ ├── test1.jpg │ │ ├── test2.jpg │ │ ├── test3.jpg │ │ ├── test5.jpg │ │ └── test6.jpg │ ├── predict_tensorrt.py │ └── to_onnx.py ├── inference_speed_evaluation │ ├── README.md │ └── inference_speed_eval.py ├── metric_farm │ ├── __init__.py │ └── metric_default.py ├── net_farm │ ├── __init__.py │ ├── naivenet.py │ ├── naivenet20_resv2.gv │ ├── naivenet20_resv2.gv.svg │ └── naivenet_structures.xlsx ├── qualitative_results │ ├── v1_qualitative_1.jpg │ ├── v1_qualitative_2.jpg │ ├── v1_qualitative_3.jpg │ ├── v1_qualitative_4.jpg │ └── v1_qualitative_5.jpg └── saved_model │ ├── configuration_10_320_20L_5scales_v2 │ └── .gitkeep │ └── configuration_10_560_25L_8scales_v1 │ └── .gitkeep ├── head_detection ├── README.md ├── accuracy_evaluation │ ├── evaluation_on_brainwash.py │ ├── predict.py │ └── test_images │ │ ├── 2.jpg │ │ ├── 247.jpg │ │ ├── 322.jpg │ │ ├── 342.jpg │ │ ├── 377.jpg │ │ ├── 411.jpg │ │ ├── 5.jpg │ │ ├── 7.jpg │ │ └── 72.jpg ├── config_farm │ ├── __init__.py │ └── configuration_10_160_17L_4scales_v1.py ├── data_iterator_farm │ ├── __init__.py │ └── multithread_dataiter_for_cross_entropy_v1.py ├── data_provider_farm │ ├── pickle_provider.py │ ├── reformat_brainwash.py │ └── text_list_adapter.py ├── inference_speed_evaluation │ └── inference_speed_eval.py ├── metric_farm │ ├── __init__.py │ └── metric_default.py └── symbol_farm │ ├── __init__.py │ ├── symbol_10_160_17L_4scales_v1.py │ ├── symbol_10_160_17L_4scales_v1_deploy.json │ └── symbol_structures.xlsx ├── license_plate_detection ├── README.md ├── accuracy_evaluation │ ├── evaluation_on_CCPD.py │ ├── predict.py │ └── test_images │ │ ├── test1.jpg_result.jpg │ │ ├── test2.jpg_result.jpg │ │ ├── test3.jpg_result.jpg │ │ ├── test4.jpg_result.jpg │ │ ├── test5.jpg_result.jpg │ │ ├── test6.jpg_result.jpg │ │ └── test7.jpg_result.jpg ├── config_farm │ ├── __init__.py │ └── configuration_64_512_16L_3scales_v1.py ├── data_iterator_farm │ ├── __init__.py │ └── multithread_dataiter_for_cross_entropy_v1.py ├── data_provider_farm │ ├── __init__.py │ ├── pickle_provider.py │ ├── reformat_CCPD.py │ └── text_list_adapter.py ├── inference_speed_evaluation │ └── inference_speed_eval.py ├── metric_farm │ ├── __init__.py │ └── metric_default.py └── symbol_farm │ ├── __init__.py │ ├── symbol_64_512_16L_3scales_v1.py │ ├── symbol_64_512_16L_3scales_v1_deploy.json │ └── symbol_structures.xlsx ├── pedestrian_detection ├── README.md ├── accuracy_evaluation │ ├── predict.py │ └── test_images │ │ ├── 1064.jpg │ │ ├── 1081.jpg │ │ ├── 1104.jpg │ │ ├── 1199.jpg │ │ ├── 1212.jpg │ │ ├── 1461.jpg │ │ ├── 2210.jpg │ │ ├── 2221.jpg │ │ ├── 2396.jpg │ │ ├── 2407.jpg │ │ ├── 2756.jpg │ │ ├── 3043.jpg │ │ ├── 326.jpg │ │ ├── 3368.jpg │ │ ├── 3812.jpg │ │ ├── 3914.jpg │ │ ├── 3981.jpg │ │ ├── 3988.jpg │ │ └── 877.jpg ├── config_farm │ ├── __init__.py │ └── configuration_30_320_20L_4scales_v1.py ├── data_iterator_farm │ ├── __init__.py │ └── multithread_dataiter_for_cross_entropy_v1.py ├── data_provider_farm │ ├── __init__.py │ ├── pickle_provider.py │ ├── reformat_caltech.py │ └── text_list_adapter.py ├── inference_speed_evaluation │ └── inference_speed_eval.py ├── metric_farm │ ├── __init__.py │ └── metric_default.py └── symbol_farm │ ├── __init__.py │ ├── symbol_30_320_20L_4scales_v1.py │ ├── symbol_30_320_20L_4scales_v1_deploy.json │ └── symbol_structures.xlsx └── vehicle_detection └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # IPython 76 | profile_default/ 77 | ipython_config.py 78 | 79 | # pyenv 80 | .python-version 81 | 82 | # celery beat schedule file 83 | celerybeat-schedule 84 | 85 | # SageMath parsed files 86 | *.sage.py 87 | 88 | # Environments 89 | .env 90 | .venv 91 | env/ 92 | venv/ 93 | ENV/ 94 | env.bak/ 95 | venv.bak/ 96 | 97 | # Spyder project settings 98 | .spyderproject 99 | .spyproject 100 | 101 | # Rope project settings 102 | .ropeproject 103 | 104 | # mkdocs documentation 105 | /site 106 | 107 | # mypy 108 | .mypy_cache/ 109 | .dmypy.json 110 | dmypy.json 111 | <<<<<<< HEAD 112 | 113 | .idea/ 114 | 115 | # pytorch model 116 | *.pth 117 | 118 | # pkl format dataset 119 | *.pkl 120 | 121 | # mxnet model 122 | *.params 123 | -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/README.md: -------------------------------------------------------------------------------- 1 | ## ChasingTrainFramework_GeneralSingleClassDetection 2 | ChasingTrainFramework_GeneralSingleClassDetection is a simple 3 | wrapper based on MXNet Module API for general one class detection. 4 | `Chasing` is just a project code. 5 | 6 | ### Framework Introduction 7 | * **data_iterator_base** provide some utils for batch iterator. The design of a data 8 | iterator relies on the specific task. So we do not provide a default iterator here. 9 | 10 | * **data_provider_base** reformat, pack raw data. In most cases, we can load all data into 11 | the memory for fast access. 12 | 13 | * **image_augmentation** provide some often used augmentations. 14 | 15 | * **inference_speed_eval** provide two ways for inference speed evaluation -- MXNet with CUDNN and TensorRT with CUDNN. 16 | 17 | * **loss_layer_farm** provide customized loss type like hard negative mining, focal loss. 18 | 19 | * **logging_GOCD** is a logging wrapper. 20 | 21 | * **solver_GOCD** execute training process. 22 | 23 | * **train_GOCD** is the entrance of the framework. -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/__init__.py -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/data_iterator_base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/data_iterator_base/__init__.py -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/data_iterator_base/data_batch.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | 4 | class DataBatch: 5 | def __init__(self, torch_module): 6 | self._data = [] 7 | self._label = [] 8 | self.torch_module = torch_module 9 | 10 | def append_data(self, new_data): 11 | self._data.append(self.__as_tensor(new_data)) 12 | 13 | def append_label(self, new_label): 14 | self._label.append(self.__as_tensor(new_label)) 15 | 16 | def __as_tensor(self, in_data): 17 | return self.torch_module.from_numpy(in_data) 18 | 19 | @property 20 | def data(self): 21 | return self._data 22 | 23 | @property 24 | def label(self): 25 | return self._label 26 | -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/__init__.py -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/base_data_adapter.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module is to read, modify and return a single sample. 3 | It only works in data packing phase. 4 | """ 5 | 6 | 7 | class DataAdapterBaseclass(object): 8 | 9 | def __init__(self): 10 | pass 11 | 12 | def __del__(self): 13 | pass 14 | 15 | def get_one(self): 16 | """ 17 | return only one sample each time 18 | :return: 19 | """ 20 | raise NotImplementedError() 21 | -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/base_provider.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module takes an adapter as data supplier, pack data and provide data for data iterators 3 | 4 | """ 5 | 6 | 7 | class ProviderBaseclass(object): 8 | """ 9 | This is the baseclass of packer. Any other detailed packer must inherit this class. 10 | """ 11 | 12 | def __init__(self): 13 | pass 14 | 15 | def __str__(self): 16 | return self.__class__.__name__ 17 | 18 | def __del__(self): 19 | pass 20 | 21 | def write(self): 22 | """ 23 | Write a single sample to the files 24 | :return: 25 | """ 26 | raise NotImplementedError() 27 | 28 | def read_by_index(self, index): 29 | """ 30 | Read a single sample 31 | :return: 32 | """ 33 | raise NotImplementedError() 34 | 35 | 36 | if __name__ == '__main__': 37 | provider = ProviderBaseclass() 38 | print(provider) 39 | -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/pickle_provider.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This provider accepts an adapter, save dataset in pickle file and load all dataset to memory for data iterators 3 | ''' 4 | 5 | import cv2 6 | import numpy 7 | import pickle 8 | 9 | from .base_provider import ProviderBaseclass 10 | from .text_list_adapter import TextListAdapter 11 | 12 | 13 | class PickleProvider(ProviderBaseclass): 14 | """ 15 | This class provides methods to save and read data. 16 | By default, images are compressed using JPG format. 17 | If data_adapter is not None, it means saving data, or it is reading data 18 | """ 19 | 20 | def __init__(self, 21 | pickle_file_path, 22 | encode_quality=90, 23 | data_adapter=None): 24 | ProviderBaseclass.__init__(self) 25 | 26 | if data_adapter: # write data 27 | 28 | self.data_adapter = data_adapter 29 | self.data = {} 30 | self.counter = 0 31 | self.pickle_file_path = pickle_file_path 32 | 33 | else: # read data 34 | 35 | self.data = pickle.load(open(pickle_file_path, 'rb')) 36 | # get positive and negative indeices 37 | self._positive_index = [] 38 | self._negative_index = [] 39 | for k, v in self.data.items(): 40 | if v[1] == 0: # negative 41 | self._negative_index.append(k) 42 | else: # positive 43 | self._positive_index.append(k) 44 | 45 | self.compression_mode = '.jpg' 46 | self.encode_params = [cv2.IMWRITE_JPEG_QUALITY, encode_quality] 47 | 48 | @property 49 | def positive_index(self): 50 | return self._positive_index 51 | 52 | @property 53 | def negative_index(self): 54 | return self._negative_index 55 | 56 | def write(self): 57 | 58 | for data_item in self.data_adapter.get_one(): 59 | 60 | temp_sample = [] 61 | im, bboxes = data_item 62 | ret, buf = cv2.imencode(self.compression_mode, im, self.encode_params) 63 | if buf is None or buf.size == 0: 64 | print('buf is wrong.') 65 | continue 66 | if not ret: 67 | print('An error is occurred while com:pression.') 68 | continue 69 | temp_sample.append(buf) 70 | 71 | if isinstance(bboxes, str): # 负样本 72 | temp_sample.append(0) 73 | temp_sample.append(int(bboxes)) 74 | else: 75 | temp_sample.append(1) 76 | temp_sample.append(bboxes) 77 | 78 | self.data[self.counter] = temp_sample 79 | print('Successfully save the %d-th data item.' % self.counter) 80 | self.counter += 1 81 | 82 | pickle.dump(self.data, open(self.pickle_file_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) 83 | 84 | def read_by_index(self, index): 85 | im_buf, flag, bboxes = self.data[index] 86 | im = cv2.imdecode(im_buf, cv2.IMREAD_COLOR) 87 | return im, flag, bboxes 88 | 89 | 90 | def write_file(): 91 | data_list_file_path = './data_folder/data_list_2019-05-07-14-47-19.txt' 92 | LFPD_adapter = TextListAdapter(data_list_file_path) 93 | 94 | pickle_file_path = './data_folder/data_2019-05-07-14-47-19.pkl' 95 | encode_quality = 90 96 | LFPD_packer = PickleProvider(pickle_file_path, encode_quality, LFPD_adapter) 97 | LFPD_packer.write() 98 | 99 | 100 | def read_file(): 101 | pickle_file_path = './data_folder/data_2019-05-07-14-47-19.pkl' 102 | 103 | LFPD_provider = PickleProvider(pickle_file_path) 104 | positive_index = LFPD_provider.positive_index 105 | negative_index = LFPD_provider.negative_index 106 | print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index))) 107 | # all_index = positive_index+negative_index 108 | import random 109 | random.shuffle(positive_index) 110 | 111 | for i, index in enumerate(positive_index): 112 | im, flag, bboxes_numpy = LFPD_provider.read_by_index(index) 113 | if isinstance(bboxes_numpy, numpy.ndarray): 114 | for n in range(bboxes_numpy.shape[0]): 115 | cv2.rectangle(im, (bboxes_numpy[n, 0], bboxes_numpy[n, 1]), 116 | (bboxes_numpy[n, 0] + bboxes_numpy[n, 2], bboxes_numpy[n, 1] + bboxes_numpy[n, 3]), (0, 255, 0), 1) 117 | cv2.imshow('im', im) 118 | cv2.waitKey() 119 | 120 | 121 | if __name__ == '__main__': 122 | pass 123 | write_file() 124 | # read_file() 125 | # generate_rec_20181202() 126 | -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/text_list_adapter.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This adapter accepts a text as input which describes the annotated data. 3 | Each line in text are formatted as: 4 | [image absolute path],[pos/neg flag],[num of bboxes],[x1],[y1],[width1],[height1],[x2],[y2],[width2],[height2]...... 5 | ''' 6 | 7 | import cv2 8 | import numpy 9 | from .base_data_adapter import DataAdapterBaseclass 10 | 11 | 12 | class TextListAdapter(DataAdapterBaseclass): 13 | 14 | def __init__(self, data_list_file_path): 15 | 16 | DataAdapterBaseclass.__init__(self) 17 | fin = open(data_list_file_path, 'r') 18 | self.lines = fin.readlines() 19 | fin.close() 20 | self.line_counter = 0 21 | 22 | def __del__(self): 23 | pass 24 | 25 | def get_one(self): 26 | """ 27 | This function use 'yield' to return samples 28 | """ 29 | while self.line_counter < len(self.lines): 30 | 31 | line = self.lines[self.line_counter].strip('\n').split(',') 32 | if line[1] == '1': # 如果是正样本,需要校验bbox的个数是否一样 33 | assert len(line[3:]) == 4 * int(line[2]) 34 | 35 | im = cv2.imread(line[0], cv2.IMREAD_UNCHANGED) 36 | 37 | if line[1] == '0': 38 | yield im, '0' 39 | self.line_counter += 1 40 | continue 41 | 42 | num_bboxes = int(line[2]) 43 | bboxes = [] 44 | for i in range(num_bboxes): 45 | x = float(line[3 + i * 4]) 46 | y = float(line[3 + i * 4 + 1]) 47 | width = float(line[3 + i * 4 + 2]) 48 | height = float(line[3 + i * 4 + 3]) 49 | 50 | bboxes.append([x, y, width, height]) 51 | 52 | bboxes = numpy.array(bboxes, dtype=numpy.float32) 53 | yield im, bboxes 54 | 55 | self.line_counter += 1 56 | 57 | 58 | if __name__ == '__main__': 59 | pass 60 | -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/image_augmentation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/image_augmentation/__init__.py -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/image_augmentation/augmentor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This module provides many types of image augmentation. One can choose appropriate augmentation for 4 | detection, segmentation and classification. 5 | """ 6 | import cv2 7 | import numpy 8 | import random 9 | 10 | 11 | class Augmentor(object): 12 | """ 13 | All augmentation operations are static methods of this class. 14 | """ 15 | 16 | def __init__(self): 17 | pass 18 | 19 | @staticmethod 20 | def histogram_equalisation(image): 21 | """ 22 | do histogram equlisation for grayscale image 23 | :param image: input image with single channel 8bits 24 | :return: processed image 25 | """ 26 | if image.ndim != 2: 27 | print('Input image is not grayscale!') 28 | return None 29 | if image.dtype != numpy.uint8: 30 | print('Input image is not uint8!') 31 | return None 32 | 33 | result = cv2.equalizeHist(image) 34 | return result 35 | 36 | @staticmethod 37 | def grayscale(image): 38 | """ 39 | convert BGR image to grayscale image 40 | :param image: input image with BGR channels 41 | :return: 42 | """ 43 | if image.ndim != 3: 44 | return None 45 | if image.dtype != numpy.uint8: 46 | print('Input image is not uint8!') 47 | return None 48 | 49 | result = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 50 | return result 51 | 52 | @staticmethod 53 | def inversion(image): 54 | """ 55 | invert the image (255-) 56 | :param image: input image with BGR or grayscale 57 | :return: 58 | """ 59 | if image.dtype != numpy.uint8: 60 | print('Input image is not uint8!') 61 | return None 62 | 63 | result = 255 - image 64 | return result 65 | 66 | @staticmethod 67 | def binarization(image, block_size=5, C=10): 68 | """ 69 | convert input image to binary image 70 | cv2.adaptiveThreshold is used, for detailed information, refer to opencv docs 71 | :param image: 72 | :return: 73 | """ 74 | if image.ndim == 3: 75 | image_grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 76 | else: 77 | image_grayscale = image 78 | 79 | binary_image = cv2.adaptiveThreshold(image_grayscale, 255, cv2.ADAPTIVE_THRESH_MEAN_C, 80 | cv2.THRESH_BINARY, block_size, C) 81 | return binary_image 82 | 83 | @staticmethod 84 | def brightness(image, min_factor=0.5, max_factor=1.5): 85 | ''' 86 | adjust the image brightness 87 | :param image: 88 | :param min_factor: 89 | :param max_factor: 90 | :return: 91 | ''' 92 | if image.dtype != numpy.uint8: 93 | print('Input image is not uint8!') 94 | return None 95 | 96 | factor = numpy.random.uniform(min_factor, max_factor) 97 | result = image * factor 98 | if factor > 1: 99 | result[result > 255] = 255 100 | result = result.astype(numpy.uint8) 101 | return result 102 | 103 | @staticmethod 104 | def saturation(image, min_factor=0.5, max_factor=1.5): 105 | ''' 106 | adjust the image saturation 107 | :param image: 108 | :param min_factor: 109 | :param max_factor: 110 | :return: 111 | ''' 112 | if image.dtype != numpy.uint8: 113 | print('Input image is not uint8!') 114 | return None 115 | 116 | image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 117 | factor = numpy.random.uniform(min_factor, max_factor) 118 | 119 | result = numpy.zeros(image.shape, dtype=numpy.float32) 120 | result[:, :, 0] = image[:, :, 0] * factor + image_gray * (1 - factor) 121 | result[:, :, 1] = image[:, :, 1] * factor + image_gray * (1 - factor) 122 | result[:, :, 2] = image[:, :, 2] * factor + image_gray * (1 - factor) 123 | result[result > 255] = 255 124 | result[result < 0] = 0 125 | result = result.astype(numpy.uint8) 126 | return result 127 | 128 | @staticmethod 129 | def contrast(image, min_factor=0.5, max_factor=1.5): 130 | ''' 131 | adjust the image contrast 132 | :param image: 133 | :param min_factor: 134 | :param max_factor: 135 | :return: 136 | ''' 137 | if image.dtype != numpy.uint8: 138 | print('Input image is not uint8!') 139 | return None 140 | 141 | image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 142 | gray_mean = numpy.mean(image_gray) 143 | temp = numpy.ones((image.shape[0], image.shape[1]), dtype=numpy.float32) * gray_mean 144 | factor = numpy.random.uniform(min_factor, max_factor) 145 | 146 | result = numpy.zeros(image.shape, dtype=numpy.float32) 147 | result[:, :, 0] = image[:, :, 0] * factor + temp * (1 - factor) 148 | result[:, :, 1] = image[:, :, 1] * factor + temp * (1 - factor) 149 | result[:, :, 2] = image[:, :, 2] * factor + temp * (1 - factor) 150 | 151 | result[result > 255] = 255 152 | result[result < 0] = 0 153 | result = result.astype(numpy.uint8) 154 | 155 | return result 156 | 157 | @staticmethod 158 | def blur(image, mode='random', kernel_size=3, sigma=1): 159 | """ 160 | 161 | :param image: 162 | :param mode: options 'normalized' 'gaussian' 'median' 163 | :param kernel_size: 164 | :param sigma: used for gaussian blur 165 | :return: 166 | """ 167 | if image.dtype != numpy.uint8: 168 | print('Input image is not uint8!') 169 | return None 170 | 171 | if mode == 'random': 172 | mode = random.choice(['normalized', 'gaussian', 'median']) 173 | 174 | if mode == 'normalized': 175 | result = cv2.blur(image, (kernel_size, kernel_size)) 176 | elif mode == 'gaussian': 177 | result = cv2.GaussianBlur(image, (kernel_size, kernel_size), sigmaX=sigma, sigmaY=sigma) 178 | elif mode == 'median': 179 | result = cv2.medianBlur(image, kernel_size) 180 | else: 181 | print('Blur mode is not supported: %s.' % mode) 182 | result = image 183 | return result 184 | 185 | @staticmethod 186 | def rotation(image, degree=10, mode='crop', scale=1): 187 | """ 188 | 189 | :param image: 190 | :param degree: 191 | :param mode: 'crop'-keep original size, 'fill'-keep full image 192 | :param scale: 193 | :return: 194 | """ 195 | if image.dtype != numpy.uint8: 196 | print('Input image is not uint8!') 197 | return None 198 | 199 | h, w = image.shape[:2] 200 | center_x, center_y = w / 2, h / 2 201 | M = cv2.getRotationMatrix2D((center_x, center_y), degree, scale) 202 | 203 | if mode == 'crop': 204 | new_w, new_h = w, h 205 | else: 206 | cos = numpy.abs(M[0, 0]) 207 | sin = numpy.abs(M[0, 1]) 208 | new_w = int(h * sin + w * cos) 209 | new_h = int(h * cos + w * sin) 210 | M[0, 2] += (new_w / 2) - center_x 211 | M[1, 2] += (new_h / 2) - center_y 212 | 213 | result = cv2.warpAffine(image, M, (new_w, new_h)) 214 | return result 215 | 216 | @staticmethod 217 | def flip(image, orientation='h'): 218 | ''' 219 | 220 | :param image: 221 | :param orientation: 222 | :return: 223 | ''' 224 | if image.dtype != numpy.uint8: 225 | print('Input image is not uint8!') 226 | return None 227 | 228 | if orientation == 'h': 229 | return cv2.flip(image, 1) 230 | elif orientation == 'v': 231 | return cv2.flip(image, 0) 232 | else: 233 | print('Unsupported orientation: %s.' % orientation) 234 | return image 235 | 236 | @staticmethod 237 | def resize(image, size_in_pixel=None, size_in_scale=None): 238 | """ 239 | 240 | :param image: 241 | :param size_in_pixel: tuple (width, height) 242 | :param size_in_scale: tuple (width_scale, height_scale) 243 | :return: 244 | """ 245 | if image.dtype != numpy.uint8: 246 | print('Input image is not uint8!') 247 | return None 248 | 249 | if size_in_pixel is not None: 250 | return cv2.resize(image, size_in_pixel) 251 | elif size_in_scale is not None: 252 | return cv2.resize(image, (0, 0), fx=size_in_scale[0], fy=size_in_scale[1]) 253 | else: 254 | print('size_in_pixel and size_in_scale are both None.') 255 | return image 256 | 257 | @staticmethod 258 | def crop(image, x, y, width, height): 259 | """ 260 | 261 | :param image: 262 | :param x: crop area top-left x coordinate 263 | :param y: crop area top-left y coordinate 264 | :param width: crop area width 265 | :param height: crop area height 266 | :return: 267 | """ 268 | if image.dtype != numpy.uint8: 269 | print('Input image is not uint8!') 270 | return None 271 | 272 | if image.ndim == 3: 273 | return image[y:y + height, x:x + width, :] 274 | else: 275 | return image[y:y + height, x:x + width] 276 | 277 | @staticmethod 278 | def random_crop(image, width, height): 279 | """ 280 | 281 | :param image: 282 | :param width: crop area width 283 | :param height: crop area height 284 | :return: 285 | """ 286 | if image.dtype != numpy.uint8: 287 | print('Input image is not uint8!') 288 | return False, image 289 | 290 | w_interval = image.shape[1] - width 291 | h_interval = image.shape[0] - height 292 | 293 | if image.ndim == 3: 294 | result = numpy.zeros((height, width, 3), dtype=numpy.uint8) 295 | else: 296 | result = numpy.zeros((height, width), dtype=numpy.uint8) 297 | 298 | if w_interval >= 0 and h_interval >= 0: 299 | crop_x, crop_y = random.randint(0, w_interval), random.randint(0, h_interval) 300 | if image.ndim == 3: 301 | result = image[crop_y:crop_y + height, crop_x:crop_x + width, :] 302 | else: 303 | result = image[crop_y:crop_y + height, crop_x:crop_x + width] 304 | elif w_interval < 0 and h_interval >= 0: 305 | put_x = -w_interval / 2 306 | crop_y = random.randint(0, h_interval) 307 | if image.ndim == 3: 308 | result[:, put_x:put_x + image.shape[1], :] = image[crop_y:crop_y + height, :, :] 309 | else: 310 | result[:, put_x:put_x + image.shape[1]] = image[crop_y:crop_y + height, :] 311 | elif w_interval >= 0 and h_interval < 0: 312 | crop_x = random.randint(0, w_interval) 313 | put_y = -h_interval / 2 314 | if image.ndim == 3: 315 | result[put_y:put_y + image.shape[0], :, :] = image[:, crop_x:crop_x + width, :] 316 | else: 317 | result[put_y:put_y + image.shape[0], :] = image[:, crop_x:crop_x + width] 318 | else: 319 | put_x, put_y = -w_interval / 2, -h_interval / 2 320 | if image.ndim == 3: 321 | result[put_y:put_y + image.shape[0], put_x:put_x + image.shape[1], :] = image[:, :, :] 322 | else: 323 | result[put_y:put_y + image.shape[0], put_x:put_x + image.shape[1]] = image[:, :] 324 | 325 | return result 326 | 327 | -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/inference_speed_eval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/inference_speed_eval/__init__.py -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/inference_speed_eval/inference_speed_eval_with_mxnet_cudnn.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | import os 4 | import time 5 | import logging 6 | 7 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '1' 8 | logging.getLogger().setLevel(logging.INFO) 9 | 10 | 11 | class InferenceSpeedEval(object): 12 | def __init__(self, symbol_file_path, mxnet_module, input_shape, input_name='data', device_type='gpu', gpu_index=0): 13 | ''' 14 | 15 | :param symbol_file_path: symbol file path 16 | :param mxnet_module: mxnet module 17 | :param input_shape: input shape in tuple--(batch_size, num_channel, height, width) 18 | :param input_name: input name defined in symbol, by default 'data' 19 | :param device_type: device type: 'gpu', 'cpu' 20 | :param gpu_index: gpu index 21 | ''' 22 | self.symbol_file_path = symbol_file_path 23 | self.mxnet_module = mxnet_module 24 | self.input_name = input_name 25 | self.input_shape = input_shape 26 | self.device_type = device_type 27 | if self.device_type == 'cpu': # CAUTION: x86 cpu inference needs MXNet with mkldnn, or inference speed will be very slow 28 | self.context = self.mxnet_module.cpu() 29 | elif self.device_type == 'gpu': 30 | self.context = self.mxnet_module.gpu(gpu_index) 31 | else: 32 | logging.error('Unknow device_type: %s .' % self.device_type) 33 | sys.exit(1) 34 | 35 | # load symbol file 36 | if not os.path.exists(self.symbol_file_path): 37 | logging.error('Symbol file: %s does not exist!' % symbol_file_path) 38 | sys.exit(1) 39 | self.symbol_net = self.mxnet_module.symbol.load(self.symbol_file_path) 40 | 41 | # create module 42 | self.module = self.mxnet_module.module.Module(symbol=self.symbol_net, 43 | data_names=[self.input_name], 44 | label_names=None, 45 | context=self.context) 46 | self.module.bind(data_shapes=[(self.input_name, self.input_shape)], for_training=False, grad_req='write') 47 | 48 | self.module.init_params(initializer=self.mxnet_module.initializer.Xavier(), allow_missing=True) 49 | self.module.init_optimizer(kvstore=None) 50 | 51 | def run_speed_eval(self, warm_run_loops=10, real_run_loops=100): 52 | random_input_data = [self.mxnet_module.random.uniform(-1.0, 1.0, shape=self.input_shape, ctx=self.context)] 53 | temp_batch = self.mxnet_module.io.DataBatch(random_input_data, []) 54 | 55 | # basic info of this eval 56 | logging.info('Test symbol file: %s' % self.symbol_file_path) 57 | logging.info('Test device: %s' % self.device_type) 58 | logging.info('Test input shape: %s' % str(self.input_shape)) 59 | 60 | # warn run 61 | for i in range(warm_run_loops): 62 | self.module.forward(temp_batch) 63 | for output in self.module.get_outputs(): 64 | output.asnumpy() 65 | 66 | logging.info('Start real run loops---------------') 67 | tic = time.time() 68 | # real run 69 | for i in range(real_run_loops): 70 | self.module.forward(temp_batch) 71 | for output in self.module.get_outputs(): 72 | output.asnumpy() 73 | 74 | toc = time.time() 75 | 76 | print('Finish %d loops in %.02f ms. \n[%.02f ms] for each loop \n[%.02f ms] for each image (namely %.02f FPS)' % 77 | (real_run_loops, 78 | (toc - tic) * 1000, 79 | (toc - tic) * 1000 / real_run_loops, 80 | (toc - tic) * 1000 / real_run_loops / self.input_shape[0], 81 | real_run_loops * self.input_shape[0] / (toc - tic))) 82 | 83 | 84 | if __name__ == '__main__': 85 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python') # set MXNet python path if needed 86 | import mxnet 87 | 88 | symbol_file_path = '/home/heyonghao/projects/tocreate_LFFD_ICCV2019_FaceDetector/symbol_farm/symbol_10_560_25L_8scales_s5_v2_deploy.json' 89 | input_shape = (1, 3, 720, 1280) # (1, 3, 240, 320) (1, 3, 480, 640) (1, 3, 720, 1280) (1, 3, 1080, 1920) (1, 3, 2160, 3840) (1, 3, 4320, 7680) 90 | device_type = 'gpu' 91 | gpu_index = 0 92 | 93 | speedEval = InferenceSpeedEval(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, device_type=device_type, gpu_index=gpu_index) 94 | speedEval.run_speed_eval(warm_run_loops=10, real_run_loops=500) 95 | -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/inference_speed_eval/inference_speed_eval_with_tensorrt_cudnn.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | import os 4 | import time 5 | import logging 6 | import numpy 7 | 8 | import pycuda.driver as cuda 9 | import pycuda.autoinit 10 | import tensorrt as trt 11 | 12 | logging.getLogger().setLevel(logging.INFO) 13 | 14 | 15 | # Simple helper data class that's a little nicer to use than a 2-tuple. 16 | class HostDeviceMem(object): 17 | def __init__(self, host_mem, device_mem): 18 | self.host = host_mem 19 | self.device = device_mem 20 | 21 | def __str__(self): 22 | return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) 23 | 24 | def __repr__(self): 25 | return self.__str__() 26 | 27 | 28 | class InferenceSpeedEval(object): 29 | def __init__(self, symbol_file_path, mxnet_module, input_shape, data_mode='fp32'): 30 | 31 | if not os.path.exists(symbol_file_path): 32 | logging.error('symbol file does not exist!') 33 | sys.exit(1) 34 | 35 | if len(input_shape) != 4: 36 | logging.error('input shape should have 4 elements in the order of NCHW.') 37 | sys.exit(1) 38 | 39 | symbol_net = mxnet_module.symbol.load(symbol_file_path) 40 | # create module 41 | module = mxnet_module.module.Module(symbol=symbol_net, 42 | data_names=['data'], 43 | label_names=None, 44 | context=mxnet_module.cpu()) 45 | module.bind(data_shapes=[('data', input_shape)], for_training=False, grad_req='write') 46 | module.init_params(initializer=mxnet_module.initializer.Xavier(), allow_missing=True) 47 | arg_params, aux_params = module.get_params() 48 | net_params = dict() 49 | net_params.update(arg_params) 50 | net_params.update(aux_params) 51 | self.onnx_temp_file = 'temp.onnx' 52 | logging.info('Convert mxnet symbol to onnx...') 53 | mxnet_module.contrib.onnx.export_model(symbol_net, net_params, [input_shape], numpy.float32, self.onnx_temp_file, verbose=False) 54 | 55 | # build engine 56 | trt_logger = trt.Logger(trt.Logger.WARNING) 57 | builder = trt.Builder(trt_logger) 58 | builder.max_batch_size = input_shape[0] 59 | builder.average_find_iterations = 2 60 | builder.max_workspace_size = 2 << 30 61 | 62 | if data_mode == 'fp32': 63 | pass 64 | elif data_mode == 'fp16': 65 | if not builder.platform_has_fast_fp16: 66 | logging.error('fp16 is not supported by this platform!') 67 | sys.exit(1) 68 | builder.fp16_mode = True 69 | elif data_mode == 'int8': 70 | logging.error('Currently, not implemented yet.') 71 | sys.exit(1) 72 | if not builder.platform_has_fast_int8: 73 | logging.error('int8 is not supported by this platform!') 74 | sys.exit(1) 75 | builder.int8_mode = True 76 | else: 77 | logging.error('Unknown data_mode: %s' % data_mode) 78 | logging.error('Available choices: \'fp32\'(default), \'fp16\', \'int8\'') 79 | sys.exit(1) 80 | 81 | network = builder.create_network() 82 | parser = trt.OnnxParser(network, trt_logger) 83 | logging.info('Parsing onnx for trt network...') 84 | with open(self.onnx_temp_file, 'rb') as onnx_fin: 85 | parser.parse(onnx_fin.read()) 86 | 87 | num_parser_errors = parser.num_errors 88 | if num_parser_errors != 0: 89 | logging.error('Errors occur while parsing the onnx file!') 90 | for i in range(num_parser_errors): 91 | logging.error('Error %d: %s' % (i, parser.get_error(i).desc())) 92 | sys.exit(1) 93 | 94 | logging.info('Start to build trt engine...(this step may cost much time)') 95 | time_start = time.time() 96 | self.engine = builder.build_cuda_engine(network) 97 | time_end = time.time() 98 | logging.info('Engine building time: %.02f s' % (time_end - time_start)) 99 | 100 | for binding in self.engine: 101 | if self.engine.binding_is_input(binding): 102 | logging.info('Input name: %s, shape: %s' % (binding, str(self.engine.get_binding_shape(binding)))) 103 | 104 | self.executor = self.engine.create_execution_context() 105 | self.max_batch_size = builder.max_batch_size 106 | 107 | def __del__(self): 108 | if os.path.exists(self.onnx_temp_file): 109 | os.remove(self.onnx_temp_file) 110 | 111 | def run_speed_eval(self, warm_run_loops=10, real_run_loops=100): 112 | 113 | def allocate_buffers(engine): 114 | inputs = [] 115 | outputs = [] 116 | bindings = [] 117 | for binding in engine: 118 | size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size 119 | dtype = trt.nptype(engine.get_binding_dtype(binding)) 120 | # Allocate host and device buffers 121 | host_mem = cuda.pagelocked_empty(size, dtype) 122 | device_mem = cuda.mem_alloc(host_mem.nbytes) 123 | # Append the device buffer to device bindings. 124 | bindings.append(int(device_mem)) 125 | # Append to the appropriate list. 126 | if engine.binding_is_input(binding): 127 | inputs.append(HostDeviceMem(host_mem, device_mem)) 128 | else: 129 | outputs.append(HostDeviceMem(host_mem, device_mem)) 130 | return inputs, outputs, bindings 131 | 132 | inputs, outputs, bindings = allocate_buffers(self.engine) 133 | # warm run 134 | for i in range(warm_run_loops): 135 | [cuda.memcpy_htod(inp.device, inp.host) for inp in inputs] 136 | self.executor.execute(batch_size=self.max_batch_size, bindings=bindings) 137 | [cuda.memcpy_dtoh(out.host, out.device) for out in outputs] 138 | 139 | # real run 140 | logging.info('Start real run loop.') 141 | sum_time_data_copy = 0. 142 | sum_time_inference_only = 0. 143 | for i in range(real_run_loops): 144 | time_start = time.time() 145 | [cuda.memcpy_htod(inp.device, inp.host) for inp in inputs] 146 | sum_time_data_copy += time.time() - time_start 147 | 148 | time_start = time.time() 149 | self.executor.execute(batch_size=self.max_batch_size, bindings=bindings) 150 | sum_time_inference_only += time.time() - time_start 151 | 152 | time_start = time.time() 153 | [cuda.memcpy_dtoh(out.host, out.device) for out in outputs] 154 | sum_time_data_copy += time.time() - time_start 155 | 156 | logging.info('Total time (data transfer & inference) elapsed: %.02f ms. [%.02f ms] for each image (%.02f PFS)' 157 | % ((sum_time_data_copy + sum_time_inference_only) * 1000, 158 | (sum_time_data_copy + sum_time_inference_only) * 1000 / real_run_loops / self.max_batch_size, 159 | real_run_loops * self.max_batch_size / (sum_time_data_copy + sum_time_inference_only))) 160 | 161 | 162 | if __name__ == '__main__': 163 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python') 164 | import mxnet 165 | 166 | symbol_file_path = '/home/heyonghao/projects/tocreate_LFFD_ICCV2019_FaceDetector/symbol_farm/symbol_10_560_25L_8scales_s5_v2_deploy.json' 167 | input_shape = (1, 3, 720, 1280) # (1, 3, 240, 320) (1, 3, 480, 640) (1, 3, 720, 1280) (1, 3, 1080, 1920) (1, 3, 2160, 3840) (1, 3, 4320, 7680) 168 | 169 | speedEval = InferenceSpeedEval(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, data_mode='fp32') 170 | speedEval.run_speed_eval(warm_run_loops=10, real_run_loops=500) 171 | -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/logging_GOCD.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import logging 3 | import os 4 | import sys 5 | 6 | ''' 7 | logging module 8 | ''' 9 | 10 | 11 | def init_logging(log_file_path=None, log_file_mode='w', log_overwrite_flag=False, log_level=logging.INFO): 12 | # basically, the basic log offers console output 13 | console_handler = logging.StreamHandler() 14 | formatter = logging.Formatter('%(asctime)s[%(levelname)s]: %(message)s') 15 | console_handler.setFormatter(formatter) 16 | 17 | logging.getLogger().setLevel(log_level) 18 | logging.getLogger().addHandler(console_handler) 19 | 20 | if not log_file_path or log_file_path == '': 21 | print('No log file is specified. The log information is only displayed in console.') 22 | return 23 | 24 | # check that the log_file is already existed or not 25 | if not os.path.exists(log_file_path): 26 | location_dir = os.path.dirname(log_file_path) 27 | if not os.path.exists(location_dir): 28 | os.makedirs(location_dir) 29 | 30 | file_handler = logging.FileHandler(filename=log_file_path, mode=log_file_mode) 31 | file_handler.setFormatter(formatter) 32 | logging.getLogger().addHandler(file_handler) 33 | else: 34 | if log_overwrite_flag: 35 | print('The file [%s] is existed. And it is to be handled according to the arg [file_mode](the default is \'w\').' % log_file_path) 36 | file_handler = logging.FileHandler(filename=log_file_path, mode=log_file_mode) 37 | file_handler.setFormatter(formatter) 38 | logging.getLogger().addHandler(file_handler) 39 | else: 40 | print('The file [%s] is existed. The [overwrite_flag] is False, please change the log file name.') 41 | sys.exit(0) 42 | 43 | 44 | def temp_test(): 45 | log_file = './test.log' 46 | file_mode = 'w' 47 | init_logging(log_file_path=log_file, log_file_mode=file_mode, log_overwrite_flag=True, log_level=logging.DEBUG) 48 | 49 | 50 | if __name__ == '__main__': 51 | temp_test() 52 | logging.info('test info') 53 | -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/__init__.py -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/cross_entropy_with_focal_loss_for_one_class_detection.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @date : 19-1-23 3 | # @author : MindBreaker 4 | # @module : 5 | 6 | import mxnet as mx 7 | import numpy as np 8 | import logging 9 | 10 | 11 | class focal_loss_for_twoclass(mx.operator.CustomOp): 12 | ''' 13 | 1, the in_data[0], namely the pred, must be applied with softmax before running this loss operator 14 | 2, this CE operator is only for two-class situation, the 0-index indicates pos(foreground), and the 1-index is for neg(background) 15 | ''' 16 | 17 | def __init__(self, alpha=0.25, gamma=2): 18 | super(focal_loss_for_twoclass, self).__init__() 19 | self.alpha = alpha 20 | self.gamma = gamma 21 | 22 | def forward(self, is_train, req, in_data, out_data, aux): 23 | pred = in_data[0] 24 | label = in_data[1] 25 | pred_softmax = mx.ndarray.softmax(pred, axis=1) 26 | pred_log = mx.ndarray.log(pred_softmax) 27 | cross_entropy = - label * pred_log 28 | 29 | self.assign(out_data[0], req[0], cross_entropy) 30 | 31 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 32 | pred = in_data[0] 33 | label = in_data[1] 34 | mask = in_data[2] 35 | 36 | pred_softmax = mx.ndarray.softmax(pred, axis=1) 37 | 38 | # print('pos mean prob:', mx.ndarray.mean(pred_softmax[:, 0, :, :][label[:, 0, :, :] > 0.5]).asnumpy()) 39 | # print('neg mean prob:', mx.ndarray.mean(pred_softmax[:, 1, :, :][label[:, 1, :, :] > 0.5]).asnumpy()) 40 | 41 | # pos_flag = label[:, 0, :, :] > 0.5 42 | # neg_flag = label[:, 1, :, :] > 0.5 43 | 44 | FL_gradient = -self.gamma * mx.ndarray.power(1 - pred_softmax, self.gamma - 1) * mx.ndarray.log(pred_softmax) * pred_softmax + mx.ndarray.power(1 - pred_softmax, self.gamma) 45 | 46 | FL_gradient[:, 0, :, :] *= self.alpha 47 | FL_gradient[:, 1, :, :] *= 1 - self.alpha 48 | 49 | FL_gradient *= (pred_softmax-label) 50 | 51 | FL_gradient /= mx.ndarray.sum(mask).asnumpy()[0] 52 | # print('mean grad:', mx.ndarray.mean(mx.ndarray.abs(FL_gradient)).asnumpy()) 53 | 54 | self.assign(in_grad[0], req[0], FL_gradient) 55 | 56 | 57 | @mx.operator.register("focal_loss_for_twoclass") 58 | class focal_loss_for_twoclass_Prop(mx.operator.CustomOpProp): 59 | def __init__(self): 60 | super(focal_loss_for_twoclass_Prop, self).__init__(need_top_grad=False) 61 | 62 | def list_arguments(self): 63 | return ['pred', 'label', 'mask'] 64 | 65 | def list_outputs(self): 66 | return ['output'] 67 | 68 | def infer_shape(self, in_shape): 69 | data_shape = in_shape[0] 70 | label_shape = in_shape[0] 71 | mask_shape = in_shape[0] 72 | output_shape = in_shape[0] 73 | return [data_shape, label_shape, mask_shape], [output_shape], [] 74 | 75 | def create_operator(self, ctx, shapes, dtypes): 76 | return focal_loss_for_twoclass() 77 | -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/cross_entropy_with_hnm_for_one_class_detection.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | 3 | 4 | class cross_entropy_with_hnm_for_one_class_detection(mx.operator.CustomOp): 5 | 6 | def __init__(self, hnm_ratio): 7 | super(cross_entropy_with_hnm_for_one_class_detection, self).__init__() 8 | self.hnm_ratio = int(hnm_ratio) 9 | 10 | def forward(self, is_train, req, in_data, out_data, aux): 11 | pred = in_data[0] 12 | label = in_data[1] 13 | pred_softmax = mx.ndarray.softmax(pred, axis=1) 14 | pred_log = mx.ndarray.log(pred_softmax) 15 | cross_entropy = - label * pred_log 16 | 17 | self.assign(out_data[0], req[0], cross_entropy) 18 | 19 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 20 | pred = in_data[0] 21 | label = in_data[1] 22 | mask = in_data[2] 23 | 24 | pred_softmax = mx.ndarray.softmax(pred, axis=1) 25 | CE_gradient = pred_softmax - label # Standard CE gradient 26 | loss_mask = mx.ndarray.ones((CE_gradient.shape[0], 1, CE_gradient.shape[2], CE_gradient.shape[3]), ctx=CE_gradient.context) 27 | 28 | if self.hnm_ratio > 0: 29 | pos_flag = (label[:, 0, :, :] > 0.5) 30 | pos_num = mx.ndarray.sum(pos_flag).asnumpy()[0] # 得到正样本的个数 31 | 32 | if pos_num > 0: 33 | neg_flag = (label[:, 1, :, :] > 0.5) 34 | neg_num = mx.ndarray.sum(neg_flag).asnumpy()[0] 35 | neg_num_selected = min(int(self.hnm_ratio * pos_num), int(neg_num)) 36 | neg_prob = pred_softmax[:, 1, :, :] * neg_flag # non-negative value 37 | neg_prob_sort = mx.ndarray.sort(neg_prob.reshape((1, -1)), is_ascend=True) 38 | 39 | prob_threshold = neg_prob_sort[0][neg_num_selected].asnumpy()[0] 40 | neg_grad_flag = (neg_prob <= prob_threshold) 41 | loss_mask = mx.ndarray.logical_or(neg_grad_flag, pos_flag) 42 | else: 43 | neg_choice_ratio = 0.1 44 | neg_num_selected = int(pred_softmax[:, 1, :, :].size * neg_choice_ratio) 45 | neg_prob = pred_softmax[:, 1, :, :] 46 | neg_prob_sort = mx.ndarray.sort(neg_prob.reshape((1, -1)), is_ascend=True) 47 | prob_threshold = neg_prob_sort[0][neg_num_selected].asnumpy()[0] 48 | loss_mask = (neg_prob <= prob_threshold) 49 | 50 | for i in range(CE_gradient.shape[1]): 51 | CE_gradient[:, i, :, :] *= loss_mask * mask[:, i, :, :] 52 | 53 | CE_gradient /= mx.ndarray.sum(loss_mask).asnumpy()[0] 54 | 55 | self.assign(in_grad[0], req[0], CE_gradient) 56 | 57 | 58 | @mx.operator.register("cross_entropy_with_hnm_for_one_class_detection") 59 | class cross_entropy_with_hnm_for_one_class_detection_Prop(mx.operator.CustomOpProp): 60 | def __init__(self, hnm_ratio=5): 61 | super(cross_entropy_with_hnm_for_one_class_detection_Prop, self).__init__(need_top_grad=False) 62 | self.hnm_ratio = hnm_ratio 63 | 64 | def list_arguments(self): 65 | return ['pred', 'label', 'mask'] 66 | 67 | def list_outputs(self): 68 | return ['output'] 69 | 70 | def infer_shape(self, in_shape): 71 | data_shape = in_shape[0] 72 | label_shape = in_shape[0] 73 | mask_shape = in_shape[0] 74 | output_shape = in_shape[0] 75 | return [data_shape, label_shape, mask_shape], [output_shape], [] 76 | 77 | def create_operator(self, ctx, shapes, dtypes): 78 | return cross_entropy_with_hnm_for_one_class_detection(self.hnm_ratio) 79 | -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/loss.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class cross_entropy_with_hnm_for_one_class_detection2(nn.Module): 8 | def __init__(self, hnm_ratio, num_output_scales): 9 | super(cross_entropy_with_hnm_for_one_class_detection, self).__init__() 10 | self.hnm_ratio = int(hnm_ratio) 11 | self.num_output_scales = num_output_scales 12 | 13 | def forward(self, outputs, targets): 14 | loss_branch_list = [] 15 | for i in range(self.num_output_scales): 16 | pred_score = outputs[i * 2] 17 | pred_bbox = outputs[i * 2 + 1] 18 | gt_mask = targets[i * 2].cuda() 19 | gt_label = targets[i * 2 + 1].cuda() 20 | 21 | pred_score_softmax = torch.softmax(pred_score, dim=1) 22 | # loss_mask = torch.ones(pred_score_softmax.shape[0], 23 | # 1, 24 | # pred_score_softmax.shape[2], 25 | # pred_score_softmax.shape[3]) 26 | loss_mask = torch.ones(pred_score_softmax.shape) 27 | 28 | if self.hnm_ratio > 0: 29 | # print('gt_label.shape:', gt_label.shape) 30 | # print('gt_label.size():', gt_label.size()) 31 | pos_flag = (gt_label[:, 0, :, :] > 0.5) 32 | pos_num = torch.sum(pos_flag) # get num. of positive examples 33 | 34 | if pos_num > 0: 35 | neg_flag = (gt_label[:, 1, :, :] > 0.5) 36 | neg_num = torch.sum(neg_flag) 37 | neg_num_selected = min(int(self.hnm_ratio * pos_num), int(neg_num)) 38 | # non-negative value 39 | neg_prob = torch.where(neg_flag, pred_score_softmax[:, 1, :, :], \ 40 | torch.zeros_like(pred_score_softmax[:, 1, :, :])) 41 | neg_prob_sort, _ = torch.sort(neg_prob.reshape(1, -1), descending=False) 42 | 43 | prob_threshold = neg_prob_sort[0][neg_num_selected-1] 44 | neg_grad_flag = (neg_prob <= prob_threshold) 45 | loss_mask = torch.cat([pos_flag.unsqueeze(1), neg_grad_flag.unsqueeze(1)], dim=1) 46 | else: 47 | neg_choice_ratio = 0.1 48 | neg_num_selected = int(pred_score_softmax[:, 1, :, :].numel() * neg_choice_ratio) 49 | neg_prob = pred_score_softmax[:, 1, :, :] 50 | neg_prob_sort, _ = torch.sort(neg_prob.reshape(1, -1), descending=False) 51 | prob_threshold = neg_prob_sort[0][neg_num_selected-1] 52 | neg_grad_flag = (neg_prob <= prob_threshold) 53 | loss_mask = torch.cat([pos_flag.unsqueeze(1), neg_grad_flag.unsqueeze(1)], dim=1) 54 | 55 | # cross entropy with mask 56 | pred_score_softmax_masked = pred_score_softmax[loss_mask] 57 | pred_score_log = torch.log(pred_score_softmax_masked) 58 | score_cross_entropy = -gt_label[:, :2, :, :][loss_mask] * pred_score_log 59 | loss_score = torch.sum(score_cross_entropy) / score_cross_entropy.numel() 60 | 61 | mask_bbox = gt_mask[:, 2:6, :, :] 62 | if torch.sum(mask_bbox) == 0: 63 | loss_bbox = torch.zeros_like(loss_score) 64 | else: 65 | predict_bbox = pred_bbox * mask_bbox 66 | label_bbox = gt_label[:, 2:6, :, :] * mask_bbox 67 | loss_bbox = F.mse_loss(predict_bbox, label_bbox, reduction='mean') 68 | # loss_bbox = F.smooth_l1_loss(predict_bbox, label_bbox, reduction='mean') 69 | # loss_bbox = torch.nn.MSELoss(predict_bbox, label_bbox, size_average=True, reduce=True) 70 | # loss_bbox = torch.nn.SmoothL1Loss(predict_bbox, label_bbox, size_average=True, reduce=True) 71 | 72 | loss_branch = loss_score + loss_bbox 73 | loss_branch_list.append(loss_branch) 74 | return loss_branch_list 75 | 76 | 77 | class cross_entropy_with_hnm_for_one_class_detection(nn.Module): 78 | def __init__(self, hnm_ratio, num_output_scales): 79 | super(cross_entropy_with_hnm_for_one_class_detection, self).__init__() 80 | self.hnm_ratio = int(hnm_ratio) 81 | self.num_output_scales = num_output_scales 82 | 83 | def forward(self, outputs, targets): 84 | loss_cls = 0 85 | loss_reg = 0 86 | loss_branch = [] 87 | for i in range(self.num_output_scales): 88 | pred_score = outputs[i * 2] 89 | pred_bbox = outputs[i * 2 + 1] 90 | gt_mask = targets[i * 2].cuda() 91 | gt_label = targets[i * 2 + 1].cuda() 92 | 93 | pred_score_softmax = torch.softmax(pred_score, dim=1) 94 | # loss_mask = torch.ones(pred_score_softmax.shape[0], 95 | # 1, 96 | # pred_score_softmax.shape[2], 97 | # pred_score_softmax.shape[3]) 98 | loss_mask = torch.ones(pred_score_softmax.shape) 99 | 100 | if self.hnm_ratio > 0: 101 | # print('gt_label.shape:', gt_label.shape) 102 | # print('gt_label.size():', gt_label.size()) 103 | pos_flag = (gt_label[:, 0, :, :] > 0.5) 104 | pos_num = torch.sum(pos_flag) # get num. of positive examples 105 | 106 | if pos_num > 0: 107 | neg_flag = (gt_label[:, 1, :, :] > 0.5) 108 | neg_num = torch.sum(neg_flag) 109 | neg_num_selected = min(int(self.hnm_ratio * pos_num), int(neg_num)) 110 | # non-negative value 111 | neg_prob = torch.where(neg_flag, pred_score_softmax[:, 1, :, :], \ 112 | torch.zeros_like(pred_score_softmax[:, 1, :, :])) 113 | neg_prob_sort, _ = torch.sort(neg_prob.reshape(1, -1), descending=False) 114 | 115 | prob_threshold = neg_prob_sort[0][neg_num_selected-1] 116 | neg_grad_flag = (neg_prob <= prob_threshold) 117 | loss_mask = torch.cat([pos_flag.unsqueeze(1), neg_grad_flag.unsqueeze(1)], dim=1) 118 | else: 119 | neg_choice_ratio = 0.1 120 | neg_num_selected = int(pred_score_softmax[:, 1, :, :].numel() * neg_choice_ratio) 121 | neg_prob = pred_score_softmax[:, 1, :, :] 122 | neg_prob_sort, _ = torch.sort(neg_prob.reshape(1, -1), descending=False) 123 | prob_threshold = neg_prob_sort[0][neg_num_selected-1] 124 | neg_grad_flag = (neg_prob <= prob_threshold) 125 | loss_mask = torch.cat([pos_flag.unsqueeze(1), neg_grad_flag.unsqueeze(1)], dim=1) 126 | 127 | # cross entropy with mask 128 | pred_score_softmax_masked = pred_score_softmax[loss_mask] 129 | pred_score_log = torch.log(pred_score_softmax_masked) 130 | score_cross_entropy = -gt_label[:, :2, :, :][loss_mask] * pred_score_log 131 | loss_score = torch.sum(score_cross_entropy) / score_cross_entropy.numel() 132 | 133 | mask_bbox = gt_mask[:, 2:6, :, :] 134 | if torch.sum(mask_bbox) == 0: 135 | loss_bbox = torch.zeros_like(loss_score) 136 | else: 137 | predict_bbox = pred_bbox * mask_bbox 138 | label_bbox = gt_label[:, 2:6, :, :] * mask_bbox 139 | loss_bbox = F.mse_loss(predict_bbox, label_bbox, reduction='sum') / torch.sum(mask_bbox) 140 | # loss_bbox = F.smooth_l1_loss(predict_bbox, label_bbox, reduction='sum') / torch.sum(mask_bbox) 141 | # loss_bbox = torch.nn.MSELoss(predict_bbox, label_bbox, size_average=False, reduce=True) 142 | # loss_bbox = torch.nn.SmoothL1Loss(predict_bbox, label_bbox, size_average=False, reduce=True) 143 | 144 | loss_cls += loss_score 145 | loss_reg += loss_bbox 146 | loss_branch.append(loss_score) 147 | loss_branch.append(loss_bbox) 148 | loss = loss_cls + loss_reg 149 | return loss, loss_branch -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/mean_squared_error_with_hnm_for_one_class_detection.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ''' 3 | squared error with hard negative mining 4 | ''' 5 | import mxnet as mx 6 | 7 | 8 | class mean_squared_error_with_hnm_for_one_class_detection(mx.operator.CustomOp): 9 | def __init__(self, hnm_ratio): 10 | super(mean_squared_error_with_hnm_for_one_class_detection, self).__init__() 11 | self.hnm_ratio = int(hnm_ratio) 12 | 13 | def forward(self, is_train, req, in_data, out_data, aux): 14 | pred = in_data[0] 15 | self.assign(out_data[0], req[0], pred) 16 | 17 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 18 | pred = in_data[0] 19 | label = in_data[1] 20 | loss = pred - label # Standard gradient in MXNET for Regression loss. 21 | if self.hnm_ratio != 0: 22 | pos_flag = (label > 0) 23 | pos_num = mx.ndarray.sum(pos_flag).asnumpy()[0] # 得到正样本的个数 24 | if pos_num > 0: 25 | neg_flag = (label < 0.0001) 26 | neg_num = mx.ndarray.sum(neg_flag).asnumpy()[0] 27 | neg_num_selected = min(int(self.hnm_ratio * pos_num), int(neg_num)) 28 | neg_loss = mx.ndarray.abs(loss * neg_flag) # non-negative value 29 | neg_loss_tem = mx.ndarray.sort(neg_loss.reshape((1, -1)), is_ascend=False) 30 | 31 | top_loss_min = neg_loss_tem[0][neg_num_selected].asnumpy()[0] 32 | neg_loss_flag = (neg_loss >= top_loss_min) 33 | loss_mask = mx.ndarray.logical_or(neg_loss_flag, pos_flag) 34 | else: 35 | neg_choice_ratio = 0.1 36 | neg_num_selected = int(loss.size * neg_choice_ratio) 37 | loss_abs = mx.ndarray.abs(loss) 38 | neg_loss_tem = mx.ndarray.sort(loss_abs.reshape((1, -1)), is_ascend=False) 39 | top_loss_min = neg_loss_tem[0][neg_num_selected].asnumpy()[0] 40 | # logging.info('top_loss_min:%0.4f', top_loss_min) 41 | loss_mask = (loss_abs >= top_loss_min) 42 | 43 | # logging.info('remained_num:%d', mx.ndarray.sum(mask).asnumpy()[0]) 44 | 45 | loss *= loss_mask 46 | loss /= loss[0].size 47 | self.assign(in_grad[0], req[0], loss) 48 | 49 | 50 | @mx.operator.register("mean_squared_error_with_hnm_for_one_class_detection") 51 | class mean_squared_error_with_hnm_for_one_class_detection_Prop(mx.operator.CustomOpProp): 52 | def __init__(self, hnm_ratio=10): 53 | super(mean_squared_error_with_hnm_for_one_class_detection_Prop, self).__init__(need_top_grad=False) 54 | self.hnm_ratio = hnm_ratio 55 | 56 | def list_arguments(self): 57 | return ['pred', 'label', 'mask'] 58 | 59 | def list_outputs(self): 60 | return ['output'] 61 | 62 | def infer_shape(self, in_shape): 63 | data_shape = in_shape[0] 64 | label_shape = in_shape[0] 65 | mask_shape = in_shape[0] 66 | output_shape = in_shape[0] 67 | return [data_shape, label_shape, mask_shape], [output_shape], [] 68 | 69 | def create_operator(self, ctx, shapes, dtypes): 70 | return mean_squared_error_with_hnm_for_one_class_detection(self.hnm_ratio) 71 | -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/mean_squared_error_with_ohem_for_one_class_detection.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ''' 3 | squared error with online hard example mining 4 | ''' 5 | import mxnet as mx 6 | 7 | 8 | class mean_squared_error_with_ohem_for_one_class_detection(mx.operator.CustomOp): 9 | def __init__(self, ohem_ratio): 10 | super(mean_squared_error_with_ohem_for_one_class_detection, self).__init__() 11 | self.ohem_ratio = ohem_ratio 12 | 13 | def forward(self, is_train, req, in_data, out_data, aux): 14 | pred = in_data[0] 15 | self.assign(out_data[0], req[0], pred) 16 | 17 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 18 | pred = out_data[0] 19 | label = in_data[1] 20 | loss = pred - label 21 | 22 | # perform OHEM 23 | num_select = int(label.size * self.ohem_ratio) 24 | loss_abs = mx.nd.abs(loss) 25 | loss_sort = mx.nd.sort(loss_abs.reshape((1, -1)), is_ascend=False) 26 | min_threshold = loss_sort[0][num_select].asnumpy()[0] 27 | select_flag = loss_abs >= min_threshold 28 | loss *= select_flag 29 | loss /= num_select 30 | 31 | self.assign(in_grad[0], req[0], loss) 32 | 33 | 34 | @mx.operator.register("mean_squared_error_with_ohem_for_one_class_detection") 35 | class mean_squared_error_with_ohem_for_one_class_detection_Prop(mx.operator.CustomOpProp): 36 | def __init__(self, ohem_ratio=0.25): 37 | super(mean_squared_error_with_ohem_for_one_class_detection_Prop, self).__init__(need_top_grad=False) 38 | self.ohem_ratio = ohem_ratio 39 | 40 | def list_arguments(self): 41 | return ['pred', 'label'] 42 | 43 | def list_outputs(self): 44 | return ['output'] 45 | 46 | def infer_shape(self, in_shape): 47 | pred_shape = in_shape[0] 48 | label_shape = in_shape[0] 49 | output_shape = in_shape[0] 50 | return [pred_shape, label_shape], [output_shape], [] 51 | 52 | def create_operator(self, ctx, shapes, dtypes): 53 | return mean_squared_error_with_ohem_for_one_class_detection(self.ohem_ratio) 54 | -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/solver_GOCD.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import logging 4 | import time 5 | 6 | 7 | class Solver(object): 8 | def __init__(self, 9 | task_name, 10 | torch_module, 11 | trainset_dataiter, 12 | net, 13 | net_initializer, 14 | optimizer, 15 | lr_scheduler, 16 | gpu_id_list, 17 | num_train_loops, 18 | loss_criterion, 19 | train_metric, 20 | display_interval=10, 21 | val_evaluation_interval=100, 22 | valset_dataiter=None, 23 | val_metric=None, 24 | num_val_loops=0, 25 | pretrained_model_param_path=None, 26 | save_prefix=None, 27 | start_index=0, 28 | model_save_interval=None, 29 | train_metric_update_frequency=1): 30 | self.task_name = task_name 31 | self.torch_module = torch_module 32 | self.trainset_dataiter = trainset_dataiter 33 | self.valset_dataiter = valset_dataiter 34 | self.net = net 35 | self.net_initializer = net_initializer 36 | self.gpu_id_list = gpu_id_list 37 | self.optimizer = optimizer 38 | self.lr_scheduler = lr_scheduler 39 | self.num_train_loops = num_train_loops 40 | self.num_val_loops = num_val_loops 41 | self.loss_criterion = loss_criterion 42 | self.train_metric = train_metric 43 | self.val_metric = val_metric 44 | self.display_interval = display_interval 45 | self.val_evaluation_interval = val_evaluation_interval 46 | self.save_prefix = save_prefix 47 | self.start_index = start_index 48 | self.pretrained_model_param_path = pretrained_model_param_path 49 | self.model_save_interval = model_save_interval 50 | 51 | self.train_metric_update_frequency = \ 52 | train_metric_update_frequency if train_metric_update_frequency <= \ 53 | display_interval else display_interval 54 | 55 | def fit(self): 56 | logging.info('Start training in gpu %s.-----------', str(self.gpu_id_list)) 57 | sum_time = 0 58 | for i in range(self.start_index + 1, self.num_train_loops + 1): 59 | start = time.time() 60 | batch = self.trainset_dataiter.next() 61 | images = batch.data[0].cuda() 62 | targets = batch.label 63 | 64 | images = (images - 127.5) / 127.5 65 | 66 | outputs = self.net(images) 67 | 68 | loss, loss_branch = self.loss_criterion(outputs, targets) 69 | 70 | # update parameters------------------------------------------------ 71 | self.optimizer.zero_grad() 72 | loss.backward() 73 | self.optimizer.step() 74 | self.lr_scheduler.step() 75 | 76 | """the train_metric need to debug""" 77 | # display training process---------------------------------------- 78 | if i % self.train_metric_update_frequency == 0: 79 | self.train_metric.update(loss_branch) 80 | 81 | sum_time += (time.time() - start) 82 | 83 | if i % self.display_interval == 0: 84 | names, values = self.train_metric.get() 85 | 86 | logging.info('Iter[%d] -- Time elapsed: %.1f s. Speed: %.1f images/s.', 87 | i, sum_time, self.display_interval * \ 88 | self.trainset_dataiter.get_batch_size() / sum_time) 89 | for name, value in zip(names, values): 90 | logging.info('%s: --> %.4f', name, value) 91 | logging.info('total loss = %.4f', loss * 10000) 92 | 93 | self.train_metric.reset() 94 | sum_time = 0 95 | 96 | # evaluate the validation set 97 | if i % self.val_evaluation_interval == 0 and self.num_val_loops: 98 | with torch.no_grad(): 99 | logging.info('Start validating---------------------------') 100 | for val_loop in range(self.num_val_loops): 101 | val_batch = self.valset_dataiter.next() 102 | val_images = val_batch[0].cuda() 103 | val_targets = val_batch[1:].cuda() 104 | 105 | val_outputs = self.net(val_images) 106 | 107 | self.val_metric.update(val_outputs, val_targets) 108 | 109 | names, values = self.val_metric.get() 110 | logging.info('Iter[%d] validation metric -------------', i) 111 | for name, value in zip(names, values): 112 | logging.info('%s: --> %.4f', name, value) 113 | logging.info('End validating ----------------------------') 114 | self.val_metric.reset() 115 | 116 | # save model----------------------------------------------------- 117 | if i % self.model_save_interval == 0: 118 | torch.save(self.net.state_dict(), 119 | self.save_prefix + '/' + self.task_name + \ 120 | '_{}.pth'.format(lr_scheduler._step_count)) -------------------------------------------------------------------------------- /ChasingTrainFramework_GeneralOneClassDetection/train_GOCD.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | 4 | 5 | def start_train(param_dict, 6 | task_name, 7 | torch_module, 8 | gpu_id_list, 9 | train_dataiter, 10 | train_metric, 11 | train_metric_update_frequency, 12 | num_train_loops, 13 | val_dataiter, 14 | val_metric, 15 | num_val_loops, 16 | validation_interval, 17 | optimizer, 18 | lr_scheduler, 19 | net, 20 | net_initializer, 21 | loss_criterion, 22 | pretrained_model_param_path, 23 | display_interval, 24 | save_prefix, 25 | model_save_interval, 26 | start_index 27 | ): 28 | 29 | logging.info('PyTorch Version: %s', str(torch_module.__version__)) 30 | logging.info('Training settings:-----------------------------------------------------------------') 31 | for param_name, param_value in param_dict.items(): 32 | logging.info(param_name + ':' + str(param_value)) 33 | logging.info('-----------------------------------------------------------------------------------') 34 | 35 | # init Solver module------------------------------------------------------------------------------------- 36 | from .solver_GOCD import Solver 37 | 38 | solver = Solver( 39 | task_name=task_name, 40 | torch_module=torch_module, 41 | trainset_dataiter=train_dataiter, 42 | net=net, 43 | net_initializer=net_initializer, 44 | optimizer=optimizer, 45 | lr_scheduler=lr_scheduler, 46 | gpu_id_list=gpu_id_list, 47 | num_train_loops=num_train_loops, 48 | loss_criterion=loss_criterion, 49 | train_metric=train_metric, 50 | display_interval=display_interval, 51 | val_evaluation_interval=validation_interval, 52 | valset_dataiter=val_dataiter, 53 | val_metric=val_metric, 54 | num_val_loops=num_val_loops, 55 | pretrained_model_param_path=pretrained_model_param_path, 56 | save_prefix=save_prefix, 57 | start_index=start_index, 58 | model_save_interval=model_save_interval, 59 | train_metric_update_frequency=train_metric_update_frequency) 60 | solver.fit() 61 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 becauseofAI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A Light and Fast Face Detector for Edge Devices 2 | **This repo is updated frequently, keeping up with the latest code is highly recommended.** 3 | 4 | ## Recent Update 5 | * `2019.10.14` The official PyTorch version of LFFD is first online. Now the repo is only preview version. Face detection code for v2 version is released nightly. 6 | * `2019.10.16` Now the face detection code for v2 version can train normally. The code of other tasks will be updated soon. 7 | 8 | ## Introduction 9 | This repo is the official PyTorch source code of paper "[LFFD: A Light and Fast Face Detector for Edge Devices](https://arxiv.org/abs/1904.10633)". Our paper presents a light and fast face detector (**LFFD**) for edge devices. 10 | LFFD considerably balances both accuracy and latency, resulting in small model size, fast inference speed while achieving excellent accuracy. 11 | **Understanding the essence of receptive field makes detection networks interpretable.** 12 | 13 | In practical, we have deployed it in cloud and edge devices (like NVIDIA Jetson series and ARM-based embedding system). The comprehensive performance 14 | of LFFD is robust enough to support our applications. 15 | 16 | In fact, our method is **_a general detection framework that applicable to one class detection_**, such as face detection, pedestrian detection, 17 | head detection, vehicle detection and so on. In general, an object class, whose average ratio of the longer side and the shorter side is 18 | less than 5, is appropriate to apply our framework for detection. 19 | 20 | Several practical advantages: 21 | 1. large scale coverage, and easy to extend to larger scales by adding more layers without much latency gain. 22 | 2. detect small objects (as small as 10 pixels) in images with extremely large resolution (8K or even larger) in only one inference. 23 | 3. easy backbone with very common operators makes it easy to deploy anywhere. 24 | 25 | ## Accuracy and Latency 26 | on the way 27 | 28 | ## Getting Started 29 | We re-implement the proposed method using PyTorch. The MXNet Version is [here](https://github.com/YonghaoHe/A-Light-and-Fast-Face-Detector-for-Edge-Devices) 30 | 31 | #### Prerequirements (global) 32 | * Python>=3.5 33 | * numpy>=1.16 (lower versions should work as well, but not tested) 34 | * PyTorch>=1.0.0 ([install guide](https://pytorch.org/get-started/locally/)) 35 | * cv2=3.x (pip3 install opencv-python==3.4.5.20, other version should work as well, but not tested) 36 | 37 | > Tips: 38 | * use PyTorch with cudnn. 39 | * build numpy from source with OpenBLAS. This will improve the training efficiency. 40 | * make sure cv2 links to libjpeg-turbo, not libjpeg. This will improve the jpeg decode efficiency. 41 | 42 | #### Sub-directory description 43 | * [face_detection](face_detection) contains the code of training, evaluation and inference for LFFD, 44 | the main content of this repo. The trained models of different versions are provided for off-the-shelf deployment. 45 | * [head_detection](head_detection) contains the trained models for head detection. The models are obtained by the 46 | proposed general one class detection framework. 47 | * [pedestrian_detection](pedestrian_detection) contains the trained models for pedestrian detection. The models are obtained by the 48 | proposed general one class detection framework. 49 | * [vehicle_detection](vehicle_detection) contains the trained models for vehicle detection. The models are obtained by the 50 | proposed general one class detection framework. 51 | * [ChasingTrainFramework_GeneralOneClassDetection](ChasingTrainFramework_GeneralOneClassDetection) is a simple 52 | wrapper based on MXNet Module API for general one class detection. 53 | 54 | #### Installation 55 | 1. Download the repo: 56 | ``` 57 | git clone https://github.com/becauseofAI/lffd-pytorch.git 58 | ``` 59 | 2. Refer to the corresponding sub-project for detailed usage. Now only the v2 version of [face_detection](face_detection) can be tried to train. 60 | 61 | ## Citation 62 | If you benefit from our work in your research and product, please kindly cite the paper 63 | ``` 64 | @inproceedings{LFFD, 65 | title={LFFD: A Light and Fast Face Detector for Edge Devices}, 66 | author={He, Yonghao and Xu, Dezhong and Wu, Lifang and Jian, Meng and Xiang, Shiming and Pan, Chunhong}, 67 | booktitle={arXiv:1904.10633}, 68 | year={2019} 69 | } 70 | ``` 71 | 72 | ## To Do List 73 | - [ ] face detection 74 | - [ ] pedestrian detection 75 | - [ ] head detection 76 | - [ ] vehicle detection 77 | - [ ] license plate detection 78 | - [ ] [reconstruction version](https://github.com/becauseofAI/refinanet) 79 | 80 | ## Contact 81 | becauseofAI[1], Yonghao He[2] 82 | 83 | [1]E-mails: helloai777@gmail.com 84 | [2]E-mails: yonghao.he@ia.ac.cn / yonghao.he@aliyun.com 85 | 86 | **If you are interested in this work, any innovative contributions are welcome!!!** 87 | 88 | **Internship is open at NLPR, CASIA all the time. Send me your resumes!** 89 | -------------------------------------------------------------------------------- /face_detection/README.md: -------------------------------------------------------------------------------- 1 | ## Face Detection 2 | This subdir includes face detection related codes. Some descriptions has 3 | been presented in repo README.md. 4 | 5 | ### Recent Update 6 | * `2019.10.14` The model v2 can be tried to train nightly. 7 | * `2019.10.16` **The model v2 can be trained normally.** 8 | 9 | ### Brief Introduction to Model Version 10 | * v1 - refer to the paper for details 11 | * v2 - the detection scale is 10-320 (vs 10-560 in v1), the number of layers is 20, 12 | the backbone is modified for faster inference. Refer to `./net_farm/naivenet_structures.xlsx` for details. 13 | 14 | ### Accuracy 15 | on the way 16 | 17 | ### Inference Latency 18 | on the way 19 | 20 | ### User Instructions 21 | > **Now only for traning v2 nightly.** 22 | 23 | First, we introduce the functionality of each sub directory. 24 | * [net_farm](net_farm). This folder contains net definitions for all model versions. 25 | * [metric_farm](metric_farm). This folder contains the metrics for training monitoring. 26 | * [data_provider_farm](data_provider_farm). This folder contains the code of raw data processing/formatting/packing&unpacking. 27 | * [data_iterator_farm](data_iterator_farm). This folder contains the code of multi-threaded data prefetching. 28 | **This is the most important part, since it describe the essence of LFFD!!!** 29 | * [config_farm](config_farm). This folder contains the configurations of all model versions. The training is started by running the corresponding config python script. 30 | 31 | Second, we present a common procedure for running the code for training (taking v2 as an example). 32 | 33 | 1. prepare net model `net_farm/naivenet.py` 34 | 2. prepare the training data by using the code in `data_provider_farm`. We provide a packed 35 | training data of WIDERFACE trainset. Please download from **Data Download**. 36 | 3. adjust the code around the line 241 in `data_iterator_farm/multithread_dataiter_for_cross_entropy_v2`. 37 | 4. set the variables in configuration py script in `config_farm`. 38 | 5. run `python configuration_10_320_20L_5scales_v2.py` in `config_farm` directory. 39 | 40 | ### Data Download 41 | We have packed the training data of WIDERFACE train set. In the data, the faces less than 8 pixels are ignored, and some pure negative 42 | images cropped from the training images are also added. We provide three ways to download the packed data: 43 | * [Baidu Yunpan](https://pan.baidu.com/s/1a8Wk4GNkfPYbKAFSrZzFIQ) (pwd:e7bv) 44 | * [MS OneDrive](https://1drv.ms/u/s!Av9h0YMgxdaSgwiP4nKDasu4m73J?e=v5UfWQ) 45 | * [Google Drive](https://drive.google.com/open?id=1O3nJ6mQKD_sdFpfXmYoK7xnTUg3To7kO) 46 | 47 | After you download the data, you can put it anywhere. Remember to set `param_trainset_pickle_file_path` variable in the configuration file. (we 48 | usually put the data into the folder: `./data_provider_farm/data_folder/`) 49 | -------------------------------------------------------------------------------- /face_detection/accuracy_evaluation/evaluation_on_fddb.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import cv2 5 | import math 6 | import sys 7 | sys.path.append('..') 8 | # change the config as your need 9 | from config_farm import configuration_10_320_20L_5scales_v2 as cfg 10 | import mxnet 11 | from predict import Predict 12 | 13 | # set the proper symbol file and model file 14 | symbol_file_path = '../symbol_farm/symbol_10_320_20L_5scales_v2_deploy.json' 15 | model_file_path = '../saved_model/configuration_10_320_20L_5scales_v2/train_10_320_20L_5scales_v2_iter_1800000.params' 16 | my_predictor = Predict(mxnet=mxnet, 17 | symbol_file_path=symbol_file_path, 18 | model_file_path=model_file_path, 19 | ctx=mxnet.gpu(0), 20 | receptive_field_list=cfg.param_receptive_field_list, 21 | receptive_field_stride=cfg.param_receptive_field_stride, 22 | bbox_small_list=cfg.param_bbox_small_list, 23 | bbox_large_list=cfg.param_bbox_large_list, 24 | receptive_field_center_start=cfg.param_receptive_field_center_start, 25 | num_output_scales=cfg.param_num_output_scales) 26 | 27 | 28 | # set fddb root, the path should look like XXXX/originalPics 29 | fddb_image_root = 'XXXX/originalPics' 30 | # set the list file path, the path should look like XXXX/FDDB-folds/annotatedList.txt 31 | image_list_file = 'XXXX/FDDB-folds/annotatedList.txt' 32 | result_file_name = './fddb_' + os.path.basename(model_file_path).split('.')[0] + '_result.txt' 33 | fin = open(image_list_file, 'r') 34 | fout = open(result_file_name, 'w') 35 | resize_scale = 1.0 36 | score_threshold = 0.11 37 | NMS_threshold = 0.4 38 | counter = 0 39 | for line in fin: 40 | line = line.strip('\n') 41 | 42 | im = cv2.imread(os.path.join(fddb_image_root, line + '.jpg'), cv2.IMREAD_COLOR) 43 | 44 | bboxes = my_predictor.predict(im, resize_scale=resize_scale, score_threshold=score_threshold, top_k=10000, NMS_threshold=NMS_threshold) 45 | 46 | # for bbox in bboxes: 47 | # cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 1) 48 | # cv2.imshow('im', im) 49 | # cv2.waitKey() 50 | 51 | fout.write(line + '\n') 52 | fout.write(str(len(bboxes)) + '\n') 53 | for bbox in bboxes: 54 | fout.write('%d %d %d %d %.03f' % ( 55 | math.floor(bbox[0]), math.floor(bbox[1]), math.ceil(bbox[2] - bbox[0]), math.ceil(bbox[3] - bbox[1]), 56 | bbox[4] if bbox[4] <= 1 else 1) + '\n') 57 | counter += 1 58 | print('[%d] %s is processed.' % (counter, line)) 59 | fin.close() 60 | fout.close() 61 | 62 | -------------------------------------------------------------------------------- /face_detection/accuracy_evaluation/evaluation_on_widerface.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import sys 4 | import cv2 5 | import math 6 | sys.path.append('..') 7 | # change the config as your need 8 | from config_farm import configuration_10_320_20L_5scales_v2 as cfg 9 | import mxnet 10 | from predict import Predict 11 | 12 | # set the proper symbol file and model file 13 | symbol_file_path = '../symbol_farm/symbol_10_320_20L_5scales_v2_deploy.json' 14 | model_file_path = '../saved_model/configuration_10_320_20L_5scales_v2/train_10_320_20L_5scales_v2_iter_1800000.params' 15 | my_predictor = Predict(mxnet=mxnet, 16 | symbol_file_path=symbol_file_path, 17 | model_file_path=model_file_path, 18 | ctx=mxnet.gpu(0), 19 | receptive_field_list=cfg.param_receptive_field_list, 20 | receptive_field_stride=cfg.param_receptive_field_stride, 21 | bbox_small_list=cfg.param_bbox_small_list, 22 | bbox_large_list=cfg.param_bbox_large_list, 23 | receptive_field_center_start=cfg.param_receptive_field_center_start, 24 | num_output_scales=cfg.param_num_output_scales) 25 | 26 | # set the val root, the path should look like XXXX/WIDER_val/images 27 | val_image_root = 'XXXX/WIDER_val/images' 28 | val_result_txt_save_root = './widerface_val_' + os.path.basename(model_file_path).split('.')[0] + '_result_txt/' 29 | if not os.path.exists(val_result_txt_save_root): 30 | os.makedirs(val_result_txt_save_root) 31 | 32 | resize_scale = 1 33 | score_threshold = 0.11 34 | NMS_threshold = 0.4 35 | counter = 0 36 | for parent, dir_names, file_names in os.walk(val_image_root): 37 | for file_name in file_names: 38 | if not file_name.lower().endswith('jpg'): 39 | continue 40 | 41 | im = cv2.imread(os.path.join(parent, file_name), cv2.IMREAD_COLOR) 42 | 43 | bboxes = my_predictor.predict(im, resize_scale=resize_scale, score_threshold=score_threshold, top_k=10000, NMS_threshold=NMS_threshold) 44 | 45 | # for bbox in bboxes: 46 | # cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 1) 47 | # cv2.imshow('im',im) 48 | # cv2.waitKey() 49 | 50 | event_name = parent.split('/')[-1] 51 | if not os.path.exists(os.path.join(val_result_txt_save_root, event_name)): 52 | os.makedirs(os.path.join(val_result_txt_save_root, event_name)) 53 | fout = open(os.path.join(val_result_txt_save_root, event_name, file_name.split('.')[0] + '.txt'), 'w') 54 | fout.write(file_name.split('.')[0] + '\n') 55 | fout.write(str(len(bboxes)) + '\n') 56 | for bbox in bboxes: 57 | fout.write('%d %d %d %d %.03f' % (math.floor(bbox[0]), math.floor(bbox[1]), math.ceil(bbox[2] - bbox[0]), math.ceil(bbox[3] - bbox[1]), bbox[4] if bbox[4] <= 1 else 1) + '\n') 58 | fout.close() 59 | counter += 1 60 | print('[%d] %s is processed.' % (counter, file_name)) 61 | 62 | 63 | -------------------------------------------------------------------------------- /face_detection/accuracy_evaluation/predict.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | import os 4 | import numpy 5 | import cv2 6 | import time 7 | 8 | 9 | # empty data batch class for dynamical properties 10 | class DataBatch: 11 | pass 12 | 13 | 14 | def NMS(boxes, overlap_threshold): 15 | ''' 16 | 17 | :param boxes: numpy nx5, n is the number of boxes, 0:4->x1, y1, x2, y2, 4->score 18 | :param overlap_threshold: 19 | :return: 20 | ''' 21 | if boxes.shape[0] == 0: 22 | return boxes 23 | 24 | # if the bounding boxes integers, convert them to floats -- 25 | # this is important since we'll be doing a bunch of divisions 26 | if boxes.dtype != numpy.float32: 27 | boxes = boxes.astype(numpy.float32) 28 | 29 | # initialize the list of picked indexes 30 | pick = [] 31 | # grab the coordinates of the bounding boxes 32 | x1 = boxes[:, 0] 33 | y1 = boxes[:, 1] 34 | x2 = boxes[:, 2] 35 | y2 = boxes[:, 3] 36 | sc = boxes[:, 4] 37 | widths = x2 - x1 38 | heights = y2 - y1 39 | 40 | # compute the area of the bounding boxes and sort the bounding 41 | # boxes by the bottom-right y-coordinate of the bounding box 42 | area = heights * widths 43 | idxs = numpy.argsort(sc) # 从小到大排序 44 | 45 | # keep looping while some indexes still remain in the indexes list 46 | while len(idxs) > 0: 47 | # grab the last index in the indexes list and add the 48 | # index value to the list of picked indexes 49 | last = len(idxs) - 1 50 | i = idxs[last] 51 | pick.append(i) 52 | 53 | # compare secend highest score boxes 54 | xx1 = numpy.maximum(x1[i], x1[idxs[:last]]) 55 | yy1 = numpy.maximum(y1[i], y1[idxs[:last]]) 56 | xx2 = numpy.minimum(x2[i], x2[idxs[:last]]) 57 | yy2 = numpy.minimum(y2[i], y2[idxs[:last]]) 58 | 59 | # compute the width and height of the bo( box 60 | w = numpy.maximum(0, xx2 - xx1 + 1) 61 | h = numpy.maximum(0, yy2 - yy1 + 1) 62 | 63 | # compute the ratio of overlap 64 | overlap = (w * h) / area[idxs[:last]] 65 | 66 | # delete all indexes from the index list that have 67 | idxs = numpy.delete(idxs, numpy.concatenate(([last], numpy.where(overlap > overlap_threshold)[0]))) 68 | 69 | # return only the bounding boxes that were picked using the 70 | # integer data type 71 | return boxes[pick] 72 | 73 | 74 | class Predict(object): 75 | 76 | def __init__(self, 77 | mxnet, 78 | symbol_file_path, 79 | model_file_path, 80 | ctx, 81 | receptive_field_list, 82 | receptive_field_stride, 83 | bbox_small_list, 84 | bbox_large_list, 85 | receptive_field_center_start, 86 | num_output_scales 87 | ): 88 | self.mxnet = mxnet 89 | self.symbol_file_path = symbol_file_path 90 | self.model_file_path = model_file_path 91 | self.ctx = ctx 92 | 93 | self.receptive_field_list = receptive_field_list 94 | self.receptive_field_stride = receptive_field_stride 95 | self.bbox_small_list = bbox_small_list 96 | self.bbox_large_list = bbox_large_list 97 | self.receptive_field_center_start = receptive_field_center_start 98 | self.num_output_scales = num_output_scales 99 | self.constant = [i / 2.0 for i in self.receptive_field_list] 100 | self.input_height = 480 101 | self.input_width = 640 102 | self.__load_model() 103 | 104 | def __load_model(self): 105 | # load symbol and parameters 106 | print('----> load symbol file: %s\n----> load model file: %s' % (self.symbol_file_path, self.model_file_path)) 107 | if not os.path.exists(self.symbol_file_path): 108 | print('The symbol file does not exist!!!!') 109 | sys.exit(1) 110 | if not os.path.exists(self.model_file_path): 111 | print('The model file does not exist!!!!') 112 | sys.exit(1) 113 | self.symbol_net = self.mxnet.symbol.load(self.symbol_file_path) 114 | data_name = 'data' 115 | data_name_shape = (data_name, (1, 3, self.input_height, self.input_width)) 116 | self.module = self.mxnet.module.Module(symbol=self.symbol_net, 117 | data_names=[data_name], 118 | label_names=None, 119 | context=self.ctx, 120 | work_load_list=None) 121 | self.module.bind(data_shapes=[data_name_shape], 122 | for_training=False) 123 | 124 | save_dict = self.mxnet.nd.load(self.model_file_path) 125 | self.arg_name_arrays = dict() 126 | self.arg_name_arrays['data'] = self.mxnet.nd.zeros((1, 3, self.input_height, self.input_width), self.ctx) 127 | self.aux_name_arrays = {} 128 | for k, v in save_dict.items(): 129 | tp, name = k.split(':', 1) 130 | if tp == 'arg': 131 | self.arg_name_arrays.update({name: v.as_in_context(self.ctx)}) 132 | if tp == 'aux': 133 | self.aux_name_arrays.update({name: v.as_in_context(self.ctx)}) 134 | self.module.init_params(arg_params=self.arg_name_arrays, 135 | aux_params=self.aux_name_arrays, 136 | allow_missing=True) 137 | print('----> Model is loaded successfully.') 138 | 139 | def predict(self, image, resize_scale=1, score_threshold=0.8, top_k=100, NMS_threshold=0.3, NMS_flag=True, skip_scale_branch_list=[]): 140 | 141 | if image.ndim != 3 or image.shape[2] != 3: 142 | print('Only RGB images are supported.') 143 | return None 144 | 145 | bbox_collection = [] 146 | 147 | shorter_side = min(image.shape[:2]) 148 | if shorter_side * resize_scale < 128: 149 | resize_scale = float(128) / shorter_side 150 | 151 | input_image = cv2.resize(image, (0, 0), fx=resize_scale, fy=resize_scale) 152 | 153 | input_image = input_image.astype(dtype=numpy.float32) 154 | input_image = input_image[:, :, :, numpy.newaxis] 155 | input_image = input_image.transpose([3, 2, 0, 1]) 156 | 157 | data_batch = DataBatch() 158 | data_batch.data = [self.mxnet.ndarray.array(input_image, self.ctx)] 159 | 160 | tic = time.time() 161 | self.module.forward(data_batch=data_batch, is_train=False) 162 | results = self.module.get_outputs() 163 | outputs = [] 164 | for output in results: 165 | outputs.append(output.asnumpy()) 166 | toc = time.time() 167 | infer_time = (toc - tic) * 1000 168 | 169 | for i in range(self.num_output_scales): 170 | if i in skip_scale_branch_list: 171 | continue 172 | 173 | score_map = numpy.squeeze(outputs[i * 2], (0, 1)) 174 | 175 | # score_map_show = score_map * 255 176 | # score_map_show[score_map_show < 0] = 0 177 | # score_map_show[score_map_show > 255] = 255 178 | # cv2.imshow('score_map' + str(i), cv2.resize(score_map_show.astype(dtype=numpy.uint8), (0, 0), fx=2, fy=2)) 179 | # cv2.waitKey() 180 | 181 | bbox_map = numpy.squeeze(outputs[i * 2 + 1], 0) 182 | 183 | RF_center_Xs = numpy.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * x for x in range(score_map.shape[1])]) 184 | RF_center_Xs_mat = numpy.tile(RF_center_Xs, [score_map.shape[0], 1]) 185 | RF_center_Ys = numpy.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * y for y in range(score_map.shape[0])]) 186 | RF_center_Ys_mat = numpy.tile(RF_center_Ys, [score_map.shape[1], 1]).T 187 | 188 | x_lt_mat = RF_center_Xs_mat - bbox_map[0, :, :] * self.constant[i] 189 | y_lt_mat = RF_center_Ys_mat - bbox_map[1, :, :] * self.constant[i] 190 | x_rb_mat = RF_center_Xs_mat - bbox_map[2, :, :] * self.constant[i] 191 | y_rb_mat = RF_center_Ys_mat - bbox_map[3, :, :] * self.constant[i] 192 | 193 | x_lt_mat = x_lt_mat / resize_scale 194 | x_lt_mat[x_lt_mat < 0] = 0 195 | y_lt_mat = y_lt_mat / resize_scale 196 | y_lt_mat[y_lt_mat < 0] = 0 197 | x_rb_mat = x_rb_mat / resize_scale 198 | x_rb_mat[x_rb_mat > image.shape[1]] = image.shape[1] 199 | y_rb_mat = y_rb_mat / resize_scale 200 | y_rb_mat[y_rb_mat > image.shape[0]] = image.shape[0] 201 | 202 | select_index = numpy.where(score_map > score_threshold) 203 | for idx in range(select_index[0].size): 204 | bbox_collection.append((x_lt_mat[select_index[0][idx], select_index[1][idx]], 205 | y_lt_mat[select_index[0][idx], select_index[1][idx]], 206 | x_rb_mat[select_index[0][idx], select_index[1][idx]], 207 | y_rb_mat[select_index[0][idx], select_index[1][idx]], 208 | score_map[select_index[0][idx], select_index[1][idx]])) 209 | 210 | # NMS 211 | bbox_collection = sorted(bbox_collection, key=lambda item: item[-1], reverse=True) 212 | if len(bbox_collection) > top_k: 213 | bbox_collection = bbox_collection[0:top_k] 214 | bbox_collection_numpy = numpy.array(bbox_collection, dtype=numpy.float32) 215 | 216 | if NMS_flag: 217 | final_bboxes = NMS(bbox_collection_numpy, NMS_threshold) 218 | final_bboxes_ = [] 219 | for i in range(final_bboxes.shape[0]): 220 | final_bboxes_.append((final_bboxes[i, 0], final_bboxes[i, 1], final_bboxes[i, 2], final_bboxes[i, 3], final_bboxes[i, 4])) 221 | 222 | return final_bboxes_, infer_time 223 | else: 224 | return bbox_collection_numpy, infer_time 225 | 226 | 227 | def run_prediction_folder(): 228 | sys.path.append('..') 229 | from config_farm import configuration_10_560_25L_8scales_v1 as cfg 230 | import mxnet 231 | 232 | debug_folder = '' # fill the folder that contains images 233 | file_name_list = [file_name for file_name in os.listdir(debug_folder) if file_name.lower().endswith('jpg')] 234 | 235 | symbol_file_path = '../symbol_farm/symbol_10_560_25L_8scales_v1_deploy.json' 236 | model_file_path = '../saved_model/configuration_10_560_25L_8scales_v1/train_10_560_25L_8scales_v1_iter_1400000.params' 237 | my_predictor = Predict(mxnet=mxnet, 238 | symbol_file_path=symbol_file_path, 239 | model_file_path=model_file_path, 240 | ctx=mxnet.gpu(0), 241 | receptive_field_list=cfg.param_receptive_field_list, 242 | receptive_field_stride=cfg.param_receptive_field_stride, 243 | bbox_small_list=cfg.param_bbox_small_list, 244 | bbox_large_list=cfg.param_bbox_large_list, 245 | receptive_field_center_start=cfg.param_receptive_field_center_start, 246 | num_output_scales=cfg.param_num_output_scales) 247 | 248 | for file_name in file_name_list: 249 | im = cv2.imread(os.path.join(debug_folder, file_name)) 250 | 251 | bboxes = my_predictor.predict(im, resize_scale=1, score_threshold=0.3, top_k=10000, NMS_threshold=0.3, NMS_flag=True, skip_scale_branch_list=[]) 252 | for bbox in bboxes: 253 | cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) 254 | 255 | if max(im.shape[:2]) > 1600: 256 | scale = 1600/max(im.shape[:2]) 257 | im = cv2.resize(im, (0, 0), fx=scale, fy=scale) 258 | cv2.imshow('im', im) 259 | cv2.waitKey() 260 | # cv2.imwrite(os.path.join(debug_folder, file_name.replace('.jpg','_result.jpg')), im) 261 | 262 | 263 | if __name__ == '__main__': 264 | run_prediction_folder() 265 | -------------------------------------------------------------------------------- /face_detection/config_farm/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @date : 18-11-28 3 | # @author : MindBreaker 4 | # @module : -------------------------------------------------------------------------------- /face_detection/data_iterator_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/data_iterator_farm/__init__.py -------------------------------------------------------------------------------- /face_detection/data_provider_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/data_provider_farm/__init__.py -------------------------------------------------------------------------------- /face_detection/data_provider_farm/data_folder/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/data_provider_farm/data_folder/.gitkeep -------------------------------------------------------------------------------- /face_detection/data_provider_farm/pickle_provider.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This provider accepts an adapter, save dataset in pickle file and load all dataset to memory for data iterators 3 | ''' 4 | 5 | import cv2 6 | import numpy 7 | import pickle 8 | import sys 9 | 10 | sys.path.append('../..') 11 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_provider import ProviderBaseclass 12 | from data_provider_farm.text_list_adapter import TextListAdapter 13 | 14 | 15 | class PickleProvider(ProviderBaseclass): 16 | """ 17 | This class provides methods to save and read data. 18 | By default, images are compressed using JPG format. 19 | If data_adapter is not None, it means saving data, or it is reading data 20 | """ 21 | 22 | def __init__(self, 23 | pickle_file_path, 24 | encode_quality=90, 25 | data_adapter=None): 26 | ProviderBaseclass.__init__(self) 27 | 28 | if data_adapter: # write data 29 | 30 | self.data_adapter = data_adapter 31 | self.data = {} 32 | self.counter = 0 33 | self.pickle_file_path = pickle_file_path 34 | 35 | else: # read data 36 | 37 | self.data = pickle.load(open(pickle_file_path, 'rb')) 38 | # get positive and negative indeices 39 | self._positive_index = [] 40 | self._negative_index = [] 41 | for k, v in self.data.items(): 42 | if v[1] == 0: # negative 43 | self._negative_index.append(k) 44 | else: # positive 45 | self._positive_index.append(k) 46 | 47 | self.compression_mode = '.jpg' 48 | self.encode_params = [cv2.IMWRITE_JPEG_QUALITY, encode_quality] 49 | 50 | @property 51 | def positive_index(self): 52 | return self._positive_index 53 | 54 | @property 55 | def negative_index(self): 56 | return self._negative_index 57 | 58 | def write(self): 59 | 60 | for data_item in self.data_adapter.get_one(): 61 | 62 | temp_sample = [] 63 | im, bboxes = data_item 64 | ret, buf = cv2.imencode(self.compression_mode, im, self.encode_params) 65 | if buf is None or buf.size == 0: 66 | print('buf is wrong.') 67 | continue 68 | if not ret: 69 | print('An error is occurred while com:pression.') 70 | continue 71 | temp_sample.append(buf) 72 | 73 | if isinstance(bboxes, str): # 负样本 74 | temp_sample.append(0) 75 | temp_sample.append(int(bboxes)) 76 | else: 77 | temp_sample.append(1) 78 | temp_sample.append(bboxes) 79 | 80 | self.data[self.counter] = temp_sample 81 | print('Successfully save the %d-th data item.' % self.counter) 82 | self.counter += 1 83 | 84 | pickle.dump(self.data, open(self.pickle_file_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) 85 | 86 | def read_by_index(self, index): 87 | im_buf, flag, bboxes = self.data[index] 88 | im = cv2.imdecode(im_buf, cv2.IMREAD_COLOR) 89 | return im, flag, bboxes 90 | 91 | 92 | def write_file(): 93 | data_list_file_path = './data_folder/data_list_2019-05-07-14-47-19.txt' 94 | adapter = TextListAdapter(data_list_file_path) 95 | 96 | pickle_file_path = './data_folder/data_2019-05-07-14-47-19.pkl' 97 | encode_quality = 90 98 | packer = PickleProvider(pickle_file_path, encode_quality, adapter) 99 | packer.write() 100 | 101 | 102 | def read_file(): 103 | pickle_file_path = './data_folder/data_2019-05-07-14-47-19.pkl' 104 | 105 | provider = PickleProvider(pickle_file_path) 106 | positive_index = provider.positive_index 107 | negative_index = provider.negative_index 108 | print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index))) 109 | # all_index = positive_index+negative_index 110 | import random 111 | random.shuffle(positive_index) 112 | 113 | for i, index in enumerate(positive_index): 114 | im, flag, bboxes_numpy = provider.read_by_index(index) 115 | if isinstance(bboxes_numpy, numpy.ndarray): 116 | for n in range(bboxes_numpy.shape[0]): 117 | cv2.rectangle(im, (bboxes_numpy[n, 0], bboxes_numpy[n, 1]), 118 | (bboxes_numpy[n, 0] + bboxes_numpy[n, 2], bboxes_numpy[n, 1] + bboxes_numpy[n, 3]), (0, 255, 0), 1) 119 | cv2.imshow('im', im) 120 | cv2.waitKey() 121 | 122 | 123 | if __name__ == '__main__': 124 | write_file() 125 | # read_file() 126 | -------------------------------------------------------------------------------- /face_detection/data_provider_farm/text_list_adapter.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This adapter accepts a text as input which describes the annotated data. 3 | Each line in text are formatted as: 4 | [image absolute path],[pos/neg flag],[num of bboxes],[x1],[y1],[width1],[height1],[x2],[y2],[width2],[height2]...... 5 | ''' 6 | 7 | import cv2 8 | import numpy 9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_data_adapter import DataAdapterBaseclass 10 | 11 | 12 | class TextListAdapter(DataAdapterBaseclass): 13 | 14 | def __init__(self, data_list_file_path): 15 | 16 | DataAdapterBaseclass.__init__(self) 17 | fin = open(data_list_file_path, 'r') 18 | self.lines = fin.readlines() 19 | fin.close() 20 | self.line_counter = 0 21 | 22 | def __del__(self): 23 | pass 24 | 25 | def get_one(self): 26 | """ 27 | This function use 'yield' to return samples 28 | """ 29 | while self.line_counter < len(self.lines): 30 | 31 | line = self.lines[self.line_counter].strip('\n').split(',') 32 | if line[1] == '1': # pos sample 33 | assert len(line[3:]) == 4 * int(line[2]) 34 | 35 | im = cv2.imread(line[0], cv2.IMREAD_UNCHANGED) 36 | 37 | if line[1] == '0': 38 | yield im, '0' 39 | self.line_counter += 1 40 | continue 41 | 42 | num_bboxes = int(line[2]) 43 | bboxes = [] 44 | for i in range(num_bboxes): 45 | x = float(line[3 + i * 4]) 46 | y = float(line[3 + i * 4 + 1]) 47 | width = float(line[3 + i * 4 + 2]) 48 | height = float(line[3 + i * 4 + 3]) 49 | 50 | bboxes.append([x, y, width, height]) 51 | 52 | bboxes = numpy.array(bboxes, dtype=numpy.float32) 53 | yield im, bboxes 54 | 55 | self.line_counter += 1 56 | 57 | 58 | if __name__ == '__main__': 59 | pass 60 | -------------------------------------------------------------------------------- /face_detection/demo/demo.py: -------------------------------------------------------------------------------- 1 | """LFFD Demo.""" 2 | import os, sys 3 | import argparse 4 | import cv2 5 | import time 6 | import mxnet as mx 7 | import numpy as np 8 | 9 | sys.path.append("..") 10 | from accuracy_evaluation import predict 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description='LFFD Demo.') 15 | parser.add_argument('--version', type=str, default='v2', 16 | help='The version of pretrained model, now support "v1" and "v2".') 17 | parser.add_argument('--mode', type=str, default='image', 18 | help='The format of input data, now support "image" of jpg and "video" of mp4.') 19 | parser.add_argument('--use-gpu', type=bool, default=False, 20 | help='Default is cpu.') 21 | parser.add_argument('--data', type=str, default='./data', 22 | help='The path of input and output file.') 23 | args = parser.parse_args() 24 | return args 25 | 26 | 27 | def main(): 28 | args = parse_args() 29 | # context list 30 | if args.use_gpu: 31 | ctx = mx.gpu(0) 32 | else: 33 | ctx = mx.cpu() 34 | 35 | if args.version == 'v1': 36 | from config_farm import configuration_10_320_20L_5scales_v1 as cfg 37 | 38 | symbol_file_path = '../symbol_farm/symbol_10_560_25L_8scales_v1_deploy.json' 39 | model_file_path = '../saved_model/configuration_10_560_25L_8scales_v1/train_10_560_25L_8scales_v1_iter_1400000.params' 40 | elif args.version == 'v2': 41 | from config_farm import configuration_10_320_20L_5scales_v2 as cfg 42 | 43 | symbol_file_path = '../symbol_farm/symbol_10_320_20L_5scales_v2_deploy.json' 44 | model_file_path = '../saved_model/configuration_10_320_20L_5scales_v2/train_10_320_20L_5scales_v2_iter_1800000.params' 45 | else: 46 | raise TypeError('Unsupported LFFD Version.') 47 | 48 | face_predictor = predict.Predict(mxnet=mx, 49 | symbol_file_path=symbol_file_path, 50 | model_file_path=model_file_path, 51 | ctx=ctx, 52 | receptive_field_list=cfg.param_receptive_field_list, 53 | receptive_field_stride=cfg.param_receptive_field_stride, 54 | bbox_small_list=cfg.param_bbox_small_list, 55 | bbox_large_list=cfg.param_bbox_large_list, 56 | receptive_field_center_start=cfg.param_receptive_field_center_start, 57 | num_output_scales=cfg.param_num_output_scales) 58 | 59 | if args.mode == 'image': 60 | data_folder = args.data 61 | file_name_list = [file_name for file_name in os.listdir(data_folder) \ 62 | if file_name.lower().endswith('jpg')] 63 | 64 | for file_name in file_name_list: 65 | im = cv2.imread(os.path.join(data_folder, file_name)) 66 | 67 | bboxes, infer_time = face_predictor.predict(im, resize_scale=1, score_threshold=0.6, top_k=10000, \ 68 | NMS_threshold=0.4, NMS_flag=True, skip_scale_branch_list=[]) 69 | 70 | for bbox in bboxes: 71 | cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) 72 | 73 | # if max(im.shape[:2]) > 1600: 74 | # scale = 1600/max(im.shape[:2]) 75 | # im = cv2.resize(im, (0, 0), fx=scale, fy=scale) 76 | cv2.imshow('im', im) 77 | cv2.waitKey(5000) 78 | cv2.imwrite(os.path.join(data_folder, file_name.replace('.jpg', '_result.png')), im) 79 | elif args.mode == 'video': 80 | # win_name = 'LFFD DEMO' 81 | # cv2.namedWindow(win_name, cv2.WINDOW_NORMAL) 82 | data_folder = args.data 83 | file_name_list = [file_name for file_name in os.listdir(data_folder) \ 84 | if file_name.lower().endswith('mp4')] 85 | for file_name in file_name_list: 86 | out_file = os.path.join(data_folder, file_name.replace('.mp4', '_v2_gpu_result.avi')) 87 | cap = cv2.VideoCapture(os.path.join(data_folder, file_name)) 88 | vid_writer = cv2.VideoWriter(out_file, \ 89 | cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 60, \ 90 | (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), \ 91 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))) 92 | while cv2.waitKey(1) < 0: 93 | ret, frame = cap.read() 94 | if ret: 95 | h, w, c = frame.shape 96 | 97 | if not ret: 98 | print("Done processing of %s" % file_name) 99 | print("Output file is stored as %s" % out_file) 100 | cv2.waitKey(3000) 101 | break 102 | 103 | tic = time.time() 104 | bboxes, infer_time = face_predictor.predict(frame, resize_scale=1, score_threshold=0.6, top_k=10000, \ 105 | NMS_threshold=0.4, NMS_flag=True, skip_scale_branch_list=[]) 106 | toc = time.time() 107 | detect_time = (toc - tic) * 1000 108 | 109 | face_num = 0 110 | for bbox in bboxes: 111 | face_num += 1 112 | cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) 113 | 114 | computing_platform = 'Computing platform: NVIDIA GPU FP32' 115 | cv2.putText(frame, computing_platform, (5, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3) 116 | input_resolution = 'Network input resolution: %sx%s' % (w, h) 117 | cv2.putText(frame, input_resolution, (5, 65), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3) 118 | infer_time_info = 'Inference time: %.2f ms' % (infer_time) 119 | cv2.putText(frame, infer_time_info, (5, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3) 120 | infer_speed = 'Inference speed: %.2f FPS' % (1000 / infer_time) 121 | cv2.putText(frame, infer_speed, (5, 135), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3) 122 | face_num_info = 'Face num: %d' % (face_num) 123 | cv2.putText(frame, face_num_info, (5, 170), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3) 124 | 125 | vid_writer.write(frame.astype(np.uint8)) 126 | # cv2.imshow(win_name, frame) 127 | 128 | if cv2.waitKey(1) & 0xFF == ord('q'): 129 | break 130 | 131 | cap.release() 132 | cv2.destroyAllWindows() 133 | else: 134 | raise TypeError('Unsupported File Format.') 135 | 136 | 137 | if __name__ == '__main__': 138 | main() 139 | -------------------------------------------------------------------------------- /face_detection/deploy_tensorrt/README.md: -------------------------------------------------------------------------------- 1 | ## Deployment with TensorRT 2 | We provide code for deployment with [TensorRT python API](https://developer.nvidia.com/tensorrt). 3 | In general, once you use NVIDIA GPU in your applications, 4 | TensorRT is the best choice for deployment, rather than training frameworks like TensorFlow, PyTorch, MXNet, Caffe... 5 | 6 | ### Prerequirements 7 | Refer to [inference_speed_evaluation](../inference_speed_evaluation) for details. 8 | 9 | ### Getting Started 10 | 1. usr `to_onnx.py` to generate onnx model file 11 | 2. run `predict_tensorrt.py` to do inference based on the generated model file 12 | 3. after you fully understand the code, you may reform and merge it to your own project. 13 | 14 | > In most practical cases, C++ is the primary choice for efficient running. 15 | So you can rewrite the code according to the python code structure. 16 | In the future, we will provide C++ version. 17 | 18 | ### NVIDIA Jetson NANO&TX2 Deployment Instructions 19 | TBD -------------------------------------------------------------------------------- /face_detection/deploy_tensorrt/debug_image/test1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/deploy_tensorrt/debug_image/test1.jpg -------------------------------------------------------------------------------- /face_detection/deploy_tensorrt/debug_image/test2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/deploy_tensorrt/debug_image/test2.jpg -------------------------------------------------------------------------------- /face_detection/deploy_tensorrt/debug_image/test3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/deploy_tensorrt/debug_image/test3.jpg -------------------------------------------------------------------------------- /face_detection/deploy_tensorrt/debug_image/test5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/deploy_tensorrt/debug_image/test5.jpg -------------------------------------------------------------------------------- /face_detection/deploy_tensorrt/debug_image/test6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/deploy_tensorrt/debug_image/test6.jpg -------------------------------------------------------------------------------- /face_detection/deploy_tensorrt/to_onnx.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import numpy 3 | import sys 4 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python') # add mxnet python path if need 5 | import mxnet 6 | from mxnet.contrib import onnx as onnx_mxnet 7 | from onnx import checker 8 | import onnx 9 | 10 | 11 | def generate_onnx_file(): 12 | logging.basicConfig(level=logging.INFO) 13 | 14 | # set the proper symbol path, param path and onnx path 15 | symbol_path = '../symbol_farm/symbol_10_320_20L_5scales_v2_deploy.json' 16 | param_path = '../saved_model/configuration_10_320_20L_5scales_v2/train_10_320_20L_5scales_v2_iter_1800000.params' 17 | onnx_path = './onnx_files/v2.onnx' 18 | 19 | net_symbol = mxnet.symbol.load(symbol_path) 20 | net_params_raw = mxnet.nd.load(param_path) 21 | net_params = dict() 22 | for k, v in net_params_raw.items(): 23 | tp, name = k.split(':', 1) 24 | net_params.update({name: v}) 25 | 26 | input_shape = (1, 3, 480, 640) # CAUTION: in TensorRT, the input size cannot be changed dynamically, so you must set it here. 27 | 28 | onnx_mxnet.export_model(net_symbol, net_params, [input_shape], numpy.float32, onnx_path, verbose=True) 29 | 30 | # Load onnx model 31 | model_proto = onnx.load_model(onnx_path) 32 | 33 | # Check if converted ONNX protobuf is valid 34 | checker.check_graph(model_proto.graph) 35 | 36 | 37 | if __name__ == '__main__': 38 | generate_onnx_file() 39 | -------------------------------------------------------------------------------- /face_detection/inference_speed_evaluation/README.md: -------------------------------------------------------------------------------- 1 | ## Inference Speed Evaluation 2 | 3 | ### Update History 4 | * `2019.8.1` inference python code for MXNet-cudnn and TensorRT-cudnn is online. 5 | 6 | ### Additional Prerequirements 7 | * [onnx](https://onnx.ai/) (pip3 install onnx==1.3.0) 8 | * [pycuda](https://developer.nvidia.com/pycuda) (pip3 install pycuda==2019.1.1 or [install guide](https://pypi.org/project/pycuda/)) 9 | * [tensorrt](https://developer.nvidia.com/tensorrt) =5.x (use pip3 to install the corresponding .whl file in python folder) 10 | 11 | > CAUTION: 12 | > 13 | > Carefully check the version compatible between CUDA, CUDNN, pycuda, TensorRT and onnx. 14 | 15 | 16 | ### Getting Started 17 | 1. (optional) temporally add mxnet python path to env if mxnet is not globally set 18 | 2. set `eval_with_mxnet_flag` to True to evaluate with mxnet with cudnn, or with tensorrt with cudnn (cannot run both at the same time due to some conflicts) 19 | 3. set `symbol_file_path`, `input_shape` and `real_run_loops` 20 | 4. run the script -------------------------------------------------------------------------------- /face_detection/inference_speed_evaluation/inference_speed_eval.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python') # append mxnet python path if need 4 | sys.path.append('../..') 5 | import mxnet 6 | 7 | eval_with_mxnet_flag = False 8 | symbol_file_path = '../symbol_farm/symbol_10_320_20L_5scales_v2_deploy.json' 9 | input_shape = (1, 3, 480, 640) # (1,3,240,320) (1,3,480,640) (1,3,720,1280) (1,3,1080,1920) (1,3,2160,3840) 10 | 11 | if eval_with_mxnet_flag: 12 | from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_mxnet_cudnn import InferenceSpeedEval as InferenceSpeedEvalMXNet 13 | 14 | inferenceSpeedEvalMXNet = InferenceSpeedEvalMXNet(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, device_type='gpu', gpu_index=0) 15 | inferenceSpeedEvalMXNet.run_speed_eval(warm_run_loops=10, real_run_loops=200) 16 | 17 | else: 18 | from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_tensorrt_cudnn import InferenceSpeedEval as InferenceSpeedEvalTRT 19 | 20 | inferenceSpeedEvalTRT = InferenceSpeedEvalTRT(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape) 21 | inferenceSpeedEvalTRT.run_speed_eval(warm_run_loops=10, real_run_loops=200) 22 | -------------------------------------------------------------------------------- /face_detection/metric_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/metric_farm/__init__.py -------------------------------------------------------------------------------- /face_detection/metric_farm/metric_default.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy 4 | import torch 5 | 6 | 7 | class Metric: 8 | def __init__(self, num_scales): 9 | self.num_scales = num_scales 10 | self.sum_metric = [0.0 for i in range(num_scales * 2)] 11 | self.num_update = 0 12 | self.multiply_factor = 10000 13 | 14 | def update(self, loss_branch): 15 | for i in range(self.num_scales): 16 | loss_score = loss_branch[i * 2] 17 | loss_bbox = loss_branch[i * 2 + 1] 18 | 19 | self.sum_metric[i * 2] += loss_score 20 | self.sum_metric[i * 2 + 1] += loss_bbox 21 | 22 | self.num_update += 1 23 | 24 | def get(self): 25 | return_string_list = [] 26 | for i in range(self.num_scales): 27 | return_string_list.append('cls_loss_score_' + str(i)) 28 | return_string_list.append('reg_loss_bbox_' + str(i)) 29 | 30 | return return_string_list, [m / self.num_update * self.multiply_factor for i, m in enumerate(self.sum_metric)] 31 | 32 | def reset(self): 33 | self.sum_metric = [0.0 for i in range(self.num_scales * 2)] 34 | self.num_update = 0 35 | -------------------------------------------------------------------------------- /face_detection/net_farm/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @date : 18-8-19 3 | # @author : MindBreaker 4 | # @module : -------------------------------------------------------------------------------- /face_detection/net_farm/naivenet_structures.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/net_farm/naivenet_structures.xlsx -------------------------------------------------------------------------------- /face_detection/qualitative_results/v1_qualitative_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/qualitative_results/v1_qualitative_1.jpg -------------------------------------------------------------------------------- /face_detection/qualitative_results/v1_qualitative_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/qualitative_results/v1_qualitative_2.jpg -------------------------------------------------------------------------------- /face_detection/qualitative_results/v1_qualitative_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/qualitative_results/v1_qualitative_3.jpg -------------------------------------------------------------------------------- /face_detection/qualitative_results/v1_qualitative_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/qualitative_results/v1_qualitative_4.jpg -------------------------------------------------------------------------------- /face_detection/qualitative_results/v1_qualitative_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/qualitative_results/v1_qualitative_5.jpg -------------------------------------------------------------------------------- /face_detection/saved_model/configuration_10_320_20L_5scales_v2/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/saved_model/configuration_10_320_20L_5scales_v2/.gitkeep -------------------------------------------------------------------------------- /face_detection/saved_model/configuration_10_560_25L_8scales_v1/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/saved_model/configuration_10_560_25L_8scales_v1/.gitkeep -------------------------------------------------------------------------------- /head_detection/README.md: -------------------------------------------------------------------------------- 1 | ## Head Detection 2 | We use the brainwash dataset introduced by paper [End-to-end people detection in crowded scenes](https://arxiv.org/abs/1506.04878). 3 | 4 | ### Recent Update 5 | * `2019.09.23` model v1 for brainwash dataset is released. 6 | * `2019.09.26` brainwash dataset (and packed pkl) is uploaded for downloading. 7 | 8 | ### Brief Introduction to Model Version 9 | * v1 - is designed for brainwash dataset, covering head scale [10, 160]. It has 4 branches. Please check 10 | `./symbol_farm/symbol_structures.xlsx` for details. 11 | 12 | ### Inference Latency 13 | 14 | * Platform info: NVIDIA RTX 2080TI, CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0 15 | 16 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160|7680×4320 17 | -------------|-------|-------|--------|---------|---------|--------- 18 | v1|0.83ms(1198.38FPS)|1.91ms(524.14FPS)|4.83ms(206.92FPS)|10.62ms(94.19FPS)|42.28ms(23.65FPS)|166.81ms(5.99FPS) 19 | 20 | * Platform info: NVIDIA GTX 1060(laptop), CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0 21 | 22 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160 23 | -------------|-------|-------|--------|---------|--------- 24 | v1|1.62ms(618.53FPS)|4.83ms(207.06FPS)|13.67ms(73.18FPS)|30.01ms(33.32FPS)|121.15ms(8.25FPS) 25 | 26 | > CAUTION: The latency may vary even in the same setting. 27 | 28 | ### Accuracy on Brainwash Dataset 29 | We train v1 on the training set (10769 images with 81975 annotated heads) and evaluate on the test set (500 images with 5007 30 | annotated heads). This dataset is relatively simple due to monotonous scenario. 31 | 32 | #### Quantitative Results on Test Set 33 | Average Precision (AP) is used for measuring the accuracy. In detail, we use code [Object-Detection-Metrics](https://github.com/rafaelpadilla/Object-Detection-Metrics) 34 | for calculating the AP metric. The following table presents the results: 35 | 36 | Method|AP 37 | --------|------ 38 | ReInspect, Lhungarian [1]|0.78 39 | FCHD [2]|0.70 40 | v1 (our)|0.91 41 | 42 | >[1] [End-to-end people detection in crowded scenes](https://arxiv.org/abs/1506.04878) 43 | > 44 | >[2] [FCHD: Fast and accurate head detection in crowded scenes](https://arxiv.org/abs/1809.08766) 45 | 46 | The v1 significantly outperforms the existing methods. 47 | 48 | #### Some Qualitative Results on Test Set 49 | ![image](./accuracy_evaluation/test_images/2.jpg) 50 | ![image](./accuracy_evaluation/test_images/72.jpg) 51 | ![image](./accuracy_evaluation/test_images/322.jpg) 52 | ![image](./accuracy_evaluation/test_images/411.jpg) 53 | 54 | ### User Instructions 55 | Please refer to [README in face_detection](../face_detection/README.md) for details. 56 | 57 | ### Data Download 58 | We provide original and packed data of brainwash dataset. We provide three ways to download the packed data: 59 | * [Baidu Yunpan](https://pan.baidu.com/s/1VdiXHhtw9aNaU1E9PhVwtQ) (pwd:zvma) 60 | * [MS OneDrive] 61 | * [Google Drive] -------------------------------------------------------------------------------- /head_detection/accuracy_evaluation/evaluation_on_brainwash.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import sys 4 | import cv2 5 | import math 6 | import re 7 | 8 | sys.path.append('..') 9 | # change the config as your need 10 | from config_farm import configuration_10_160_17L_4scales_v1 as cfg 11 | import mxnet 12 | from predict import Predict 13 | 14 | 15 | def generate_gt_files(): 16 | txt_file_path = '/media/heyonghao/HYH-4T-WD/public_dataset/head_detection/brainwash/brainwash/brainwash_test.idl' 17 | gt_file_root = './brainwash_testset_gt_files_for_evaluation' 18 | 19 | if not os.path.exists(gt_file_root): 20 | os.makedirs(gt_file_root) 21 | 22 | fin = open(txt_file_path, 'r') 23 | 24 | counter = 0 25 | for line in fin: 26 | line = line.strip(';\n') 27 | im_path = re.findall('["](.*?)["]', line)[0] 28 | 29 | bbox_str_list = re.findall('[(](.*?)[)]', line) 30 | bbox_list = [] 31 | for bbox_str in bbox_str_list: 32 | bbox_str = bbox_str.split(', ') 33 | xmin = int(float(bbox_str[0])) 34 | ymin = int(float(bbox_str[1])) 35 | xmax = int(float(bbox_str[2])) 36 | ymax = int(float(bbox_str[3])) 37 | bbox_list.append((xmin, ymin, xmax - xmin + 1, ymax - ymin + 1)) 38 | 39 | if len(bbox_list) != 0: 40 | gt_file_name = im_path.replace('/', '_') 41 | gt_file_name = gt_file_name.replace('png', 'txt') 42 | fout = open(os.path.join(gt_file_root, gt_file_name), 'w') 43 | for bbox in bbox_list: 44 | line_str = 'head ' + str(bbox[0]) + ' ' + str(bbox[1]) + ' ' + str(bbox[2]) + ' ' + str(bbox[3]) 45 | fout.write(line_str + '\n') 46 | fout.close() 47 | counter += 1 48 | print(counter) 49 | fin.close() 50 | 51 | 52 | def generate_predicted_files(): 53 | # set the proper symbol file and model file 54 | symbol_file_path = '../symbol_farm/symbol_10_160_17L_4scales_v1_deploy.json' 55 | model_file_path = '../saved_model/configuration_10_160_17L_4scales_v1_2019-09-20-13-08-26/train_10_160_17L_4scales_v1_iter_800000.params' 56 | my_predictor = Predict(mxnet=mxnet, 57 | symbol_file_path=symbol_file_path, 58 | model_file_path=model_file_path, 59 | ctx=mxnet.gpu(0), 60 | receptive_field_list=cfg.param_receptive_field_list, 61 | receptive_field_stride=cfg.param_receptive_field_stride, 62 | bbox_small_list=cfg.param_bbox_small_list, 63 | bbox_large_list=cfg.param_bbox_large_list, 64 | receptive_field_center_start=cfg.param_receptive_field_center_start, 65 | num_output_scales=cfg.param_num_output_scales) 66 | 67 | # set the val root, the path should look like XXXX/WIDER_val/images 68 | txt_file_path = '/media/heyonghao/HYH-4T-WD/public_dataset/head_detection/brainwash/brainwash/brainwash_test.idl' 69 | image_root = '/media/heyonghao/HYH-4T-WD/public_dataset/head_detection/brainwash/brainwash' 70 | predicted_file_root = './brainwash_testset_predicted_files_for_evaluation_' + os.path.basename(model_file_path).split('.')[0] 71 | 72 | if not os.path.exists(predicted_file_root): 73 | os.makedirs(predicted_file_root) 74 | 75 | fin = open(txt_file_path, 'r') 76 | 77 | resize_scale = 1 78 | score_threshold = 0.05 79 | NMS_threshold = 0.6 80 | counter = 0 81 | 82 | for line in fin: 83 | line = line.strip(';\n') 84 | im_path = re.findall('["](.*?)["]', line)[0] 85 | 86 | im = cv2.imread(os.path.join(image_root, im_path), cv2.IMREAD_COLOR) 87 | 88 | bboxes = my_predictor.predict(im, resize_scale=resize_scale, score_threshold=score_threshold, top_k=10000, NMS_threshold=NMS_threshold) 89 | 90 | # for bbox in bboxes: 91 | # cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 1) 92 | # cv2.imshow('im',im) 93 | # cv2.waitKey() 94 | predicted_file_name = im_path.replace('/', '_') 95 | predicted_file_name = predicted_file_name.replace('png', 'txt') 96 | fout = open(os.path.join(predicted_file_root, predicted_file_name), 'w') 97 | for bbox in bboxes: 98 | fout.write('head %.03f %d %d %d %d' % (bbox[4] if bbox[4] <= 1 else 1, math.floor(bbox[0]), math.floor(bbox[1]), math.ceil(bbox[2] - bbox[0]), math.ceil(bbox[3] - bbox[1])) + '\n') 99 | fout.close() 100 | counter += 1 101 | print('[%d] is processed.' % counter) 102 | 103 | 104 | if __name__ == '__main__': 105 | # generate_gt_files() 106 | generate_predicted_files() 107 | -------------------------------------------------------------------------------- /head_detection/accuracy_evaluation/predict.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | import os 4 | import numpy 5 | import cv2 6 | 7 | 8 | # empty data batch class for dynamical properties 9 | class DataBatch: 10 | pass 11 | 12 | 13 | def NMS(boxes, overlap_threshold): 14 | ''' 15 | 16 | :param boxes: numpy nx5, n is the number of boxes, 0:4->x1, y1, x2, y2, 4->score 17 | :param overlap_threshold: 18 | :return: 19 | ''' 20 | if boxes.shape[0] == 0: 21 | return boxes 22 | 23 | # if the bounding boxes integers, convert them to floats -- 24 | # this is important since we'll be doing a bunch of divisions 25 | if boxes.dtype != numpy.float32: 26 | boxes = boxes.astype(numpy.float32) 27 | 28 | # initialize the list of picked indexes 29 | pick = [] 30 | # grab the coordinates of the bounding boxes 31 | x1 = boxes[:, 0] 32 | y1 = boxes[:, 1] 33 | x2 = boxes[:, 2] 34 | y2 = boxes[:, 3] 35 | sc = boxes[:, 4] 36 | widths = x2 - x1 37 | heights = y2 - y1 38 | 39 | # compute the area of the bounding boxes and sort the bounding 40 | # boxes by the bottom-right y-coordinate of the bounding box 41 | area = heights * widths 42 | idxs = numpy.argsort(sc) # 从小到大排序 43 | 44 | # keep looping while some indexes still remain in the indexes list 45 | while len(idxs) > 0: 46 | # grab the last index in the indexes list and add the 47 | # index value to the list of picked indexes 48 | last = len(idxs) - 1 49 | i = idxs[last] 50 | pick.append(i) 51 | 52 | # compare secend highest score boxes 53 | xx1 = numpy.maximum(x1[i], x1[idxs[:last]]) 54 | yy1 = numpy.maximum(y1[i], y1[idxs[:last]]) 55 | xx2 = numpy.minimum(x2[i], x2[idxs[:last]]) 56 | yy2 = numpy.minimum(y2[i], y2[idxs[:last]]) 57 | 58 | # compute the width and height of the bo( box 59 | w = numpy.maximum(0, xx2 - xx1 + 1) 60 | h = numpy.maximum(0, yy2 - yy1 + 1) 61 | 62 | # compute the ratio of overlap 63 | overlap = (w * h) / area[idxs[:last]] 64 | 65 | # delete all indexes from the index list that have 66 | idxs = numpy.delete(idxs, numpy.concatenate(([last], numpy.where(overlap > overlap_threshold)[0]))) 67 | 68 | # return only the bounding boxes that were picked using the 69 | # integer data type 70 | return boxes[pick] 71 | 72 | 73 | class Predict(object): 74 | 75 | def __init__(self, 76 | mxnet, 77 | symbol_file_path, 78 | model_file_path, 79 | ctx, 80 | receptive_field_list, 81 | receptive_field_stride, 82 | bbox_small_list, 83 | bbox_large_list, 84 | receptive_field_center_start, 85 | num_output_scales 86 | ): 87 | self.mxnet = mxnet 88 | self.symbol_file_path = symbol_file_path 89 | self.model_file_path = model_file_path 90 | self.ctx = ctx 91 | 92 | self.receptive_field_list = receptive_field_list 93 | self.receptive_field_stride = receptive_field_stride 94 | self.bbox_small_list = bbox_small_list 95 | self.bbox_large_list = bbox_large_list 96 | self.receptive_field_center_start = receptive_field_center_start 97 | self.num_output_scales = num_output_scales 98 | self.constant = [i / 2.0 for i in self.receptive_field_list] 99 | self.input_height = 480 100 | self.input_width = 640 101 | self.__load_model() 102 | 103 | def __load_model(self): 104 | # load symbol and parameters 105 | print('----> load symbol file: %s\n----> load model file: %s' % (self.symbol_file_path, self.model_file_path)) 106 | if not os.path.exists(self.symbol_file_path): 107 | print('The symbol file does not exist!!!!') 108 | sys.exit(1) 109 | if not os.path.exists(self.model_file_path): 110 | print('The model file does not exist!!!!') 111 | sys.exit(1) 112 | self.symbol_net = self.mxnet.symbol.load(self.symbol_file_path) 113 | data_name = 'data' 114 | data_name_shape = (data_name, (1, 3, self.input_height, self.input_width)) 115 | self.module = self.mxnet.module.Module(symbol=self.symbol_net, 116 | data_names=[data_name], 117 | label_names=None, 118 | context=self.ctx, 119 | work_load_list=None) 120 | self.module.bind(data_shapes=[data_name_shape], 121 | for_training=False) 122 | 123 | save_dict = self.mxnet.nd.load(self.model_file_path) 124 | self.arg_name_arrays = dict() 125 | self.arg_name_arrays['data'] = self.mxnet.nd.zeros((1, 3, self.input_height, self.input_width), self.ctx) 126 | self.aux_name_arrays = {} 127 | for k, v in save_dict.items(): 128 | tp, name = k.split(':', 1) 129 | if tp == 'arg': 130 | self.arg_name_arrays.update({name: v.as_in_context(self.ctx)}) 131 | if tp == 'aux': 132 | self.aux_name_arrays.update({name: v.as_in_context(self.ctx)}) 133 | self.module.init_params(arg_params=self.arg_name_arrays, 134 | aux_params=self.aux_name_arrays, 135 | allow_missing=True) 136 | print('----> Model is loaded successfully.') 137 | 138 | def predict(self, image, resize_scale=1, score_threshold=0.8, top_k=100, NMS_threshold=0.3, NMS_flag=True, skip_scale_branch_list=[]): 139 | 140 | if image.ndim != 3 or image.shape[2] != 3: 141 | print('Only RGB images are supported.') 142 | return None 143 | 144 | bbox_collection = [] 145 | 146 | shorter_side = min(image.shape[:2]) 147 | if shorter_side * resize_scale < 128: 148 | resize_scale = float(128) / shorter_side 149 | 150 | input_image = cv2.resize(image, (0, 0), fx=resize_scale, fy=resize_scale) 151 | 152 | input_image = input_image.astype(dtype=numpy.float32) 153 | input_image = input_image[:, :, :, numpy.newaxis] 154 | input_image = input_image.transpose([3, 2, 0, 1]) 155 | 156 | data_batch = DataBatch() 157 | data_batch.data = [self.mxnet.ndarray.array(input_image, self.ctx)] 158 | 159 | self.module.forward(data_batch=data_batch, is_train=False) 160 | results = self.module.get_outputs() 161 | outputs = [] 162 | for output in results: 163 | outputs.append(output.asnumpy()) 164 | 165 | for i in range(self.num_output_scales): 166 | if i in skip_scale_branch_list: 167 | continue 168 | 169 | score_map = numpy.squeeze(outputs[i * 2], (0, 1)) 170 | 171 | # score_map_show = score_map * 255 172 | # score_map_show[score_map_show < 0] = 0 173 | # score_map_show[score_map_show > 255] = 255 174 | # cv2.imshow('score_map' + str(i), cv2.resize(score_map_show.astype(dtype=numpy.uint8), (0, 0), fx=2, fy=2)) 175 | # cv2.waitKey() 176 | 177 | bbox_map = numpy.squeeze(outputs[i * 2 + 1], 0) 178 | 179 | RF_center_Xs = numpy.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * x for x in range(score_map.shape[1])]) 180 | RF_center_Xs_mat = numpy.tile(RF_center_Xs, [score_map.shape[0], 1]) 181 | RF_center_Ys = numpy.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * y for y in range(score_map.shape[0])]) 182 | RF_center_Ys_mat = numpy.tile(RF_center_Ys, [score_map.shape[1], 1]).T 183 | 184 | x_lt_mat = RF_center_Xs_mat - bbox_map[0, :, :] * self.constant[i] 185 | y_lt_mat = RF_center_Ys_mat - bbox_map[1, :, :] * self.constant[i] 186 | x_rb_mat = RF_center_Xs_mat - bbox_map[2, :, :] * self.constant[i] 187 | y_rb_mat = RF_center_Ys_mat - bbox_map[3, :, :] * self.constant[i] 188 | 189 | x_lt_mat = x_lt_mat / resize_scale 190 | x_lt_mat[x_lt_mat < 0] = 0 191 | y_lt_mat = y_lt_mat / resize_scale 192 | y_lt_mat[y_lt_mat < 0] = 0 193 | x_rb_mat = x_rb_mat / resize_scale 194 | x_rb_mat[x_rb_mat > image.shape[1]] = image.shape[1] 195 | y_rb_mat = y_rb_mat / resize_scale 196 | y_rb_mat[y_rb_mat > image.shape[0]] = image.shape[0] 197 | 198 | select_index = numpy.where(score_map > score_threshold) 199 | for idx in range(select_index[0].size): 200 | bbox_collection.append((x_lt_mat[select_index[0][idx], select_index[1][idx]], 201 | y_lt_mat[select_index[0][idx], select_index[1][idx]], 202 | x_rb_mat[select_index[0][idx], select_index[1][idx]], 203 | y_rb_mat[select_index[0][idx], select_index[1][idx]], 204 | score_map[select_index[0][idx], select_index[1][idx]])) 205 | 206 | # NMS 207 | bbox_collection = sorted(bbox_collection, key=lambda item: item[-1], reverse=True) 208 | if len(bbox_collection) > top_k: 209 | bbox_collection = bbox_collection[0:top_k] 210 | bbox_collection_numpy = numpy.array(bbox_collection, dtype=numpy.float32) 211 | 212 | if NMS_flag: 213 | final_bboxes = NMS(bbox_collection_numpy, NMS_threshold) 214 | final_bboxes_ = [] 215 | for i in range(final_bboxes.shape[0]): 216 | final_bboxes_.append((final_bboxes[i, 0], final_bboxes[i, 1], final_bboxes[i, 2], final_bboxes[i, 3], final_bboxes[i, 4])) 217 | 218 | return final_bboxes_ 219 | else: 220 | return bbox_collection_numpy 221 | 222 | 223 | def run_prediction_pickle(): 224 | from config_farm import configuration_10_160_17L_4scales_v1 as cfg 225 | import mxnet 226 | 227 | data_pickle_file_path = '../data_provider_farm/data_folder/data_list_brainwash_test.pkl' 228 | from data_provider_farm.pickle_provider import PickleProvider 229 | pickle_provider = PickleProvider(data_pickle_file_path) 230 | positive_index = pickle_provider.positive_index 231 | negative_index = pickle_provider.negative_index 232 | all_index = positive_index 233 | print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index))) 234 | # import random 235 | # random.shuffle(all_index) 236 | 237 | symbol_file_path = '../symbol_farm/symbol_10_160_17L_4scales_v1_deploy.json' 238 | model_file_path = '../saved_model/configuration_10_160_17L_4scales_v1_2019-09-20-13-08-26/train_10_160_17L_4scales_v1_iter_800000.params' 239 | my_predictor = Predict(mxnet=mxnet, 240 | symbol_file_path=symbol_file_path, 241 | model_file_path=model_file_path, 242 | ctx=mxnet.gpu(0), 243 | receptive_field_list=cfg.param_receptive_field_list, 244 | receptive_field_stride=cfg.param_receptive_field_stride, 245 | bbox_small_list=cfg.param_bbox_small_list, 246 | bbox_large_list=cfg.param_bbox_large_list, 247 | receptive_field_center_start=cfg.param_receptive_field_center_start, 248 | num_output_scales=cfg.param_num_output_scales) 249 | 250 | for idx in all_index: 251 | im, _, bboxes_gt = pickle_provider.read_by_index(idx) 252 | 253 | bboxes = my_predictor.predict(im, resize_scale=1, score_threshold=0.5, top_k=10000, NMS_threshold=0.6) 254 | for bbox in bboxes: 255 | cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) 256 | 257 | cv2.imshow('im', im) 258 | key = cv2.waitKey() 259 | # if key & 0xFF == ord('s'): 260 | # cv2.imwrite('./test_images/' + str(idx) + '.jpg', im) 261 | 262 | 263 | if __name__ == '__main__': 264 | run_prediction_pickle() 265 | 266 | -------------------------------------------------------------------------------- /head_detection/accuracy_evaluation/test_images/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/2.jpg -------------------------------------------------------------------------------- /head_detection/accuracy_evaluation/test_images/247.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/247.jpg -------------------------------------------------------------------------------- /head_detection/accuracy_evaluation/test_images/322.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/322.jpg -------------------------------------------------------------------------------- /head_detection/accuracy_evaluation/test_images/342.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/342.jpg -------------------------------------------------------------------------------- /head_detection/accuracy_evaluation/test_images/377.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/377.jpg -------------------------------------------------------------------------------- /head_detection/accuracy_evaluation/test_images/411.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/411.jpg -------------------------------------------------------------------------------- /head_detection/accuracy_evaluation/test_images/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/5.jpg -------------------------------------------------------------------------------- /head_detection/accuracy_evaluation/test_images/7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/7.jpg -------------------------------------------------------------------------------- /head_detection/accuracy_evaluation/test_images/72.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/72.jpg -------------------------------------------------------------------------------- /head_detection/config_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/config_farm/__init__.py -------------------------------------------------------------------------------- /head_detection/data_iterator_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/data_iterator_farm/__init__.py -------------------------------------------------------------------------------- /head_detection/data_provider_farm/pickle_provider.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This provider accepts an adapter, save dataset in pickle file and load all dataset to memory for data iterators 3 | ''' 4 | 5 | import cv2 6 | import numpy 7 | import pickle 8 | 9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_provider import ProviderBaseclass 10 | from .text_list_adapter import TextListAdapter 11 | 12 | 13 | class PickleProvider(ProviderBaseclass): 14 | """ 15 | This class provides methods to save and read data. 16 | By default, images are compressed using JPG format. 17 | If data_adapter is not None, it means saving data, or it is reading data 18 | """ 19 | 20 | def __init__(self, 21 | pickle_file_path, 22 | encode_quality=90, 23 | data_adapter=None): 24 | ProviderBaseclass.__init__(self) 25 | 26 | if data_adapter: # write data 27 | 28 | self.data_adapter = data_adapter 29 | self.data = {} 30 | self.counter = 0 31 | self.pickle_file_path = pickle_file_path 32 | 33 | else: # read data 34 | 35 | self.data = pickle.load(open(pickle_file_path, 'rb')) 36 | # get positive and negative indeices 37 | self._positive_index = [] 38 | self._negative_index = [] 39 | for k, v in self.data.items(): 40 | if v[1] == 0: # negative 41 | self._negative_index.append(k) 42 | else: # positive 43 | self._positive_index.append(k) 44 | 45 | self.compression_mode = '.jpg' 46 | self.encode_params = [cv2.IMWRITE_JPEG_QUALITY, encode_quality] 47 | 48 | @property 49 | def positive_index(self): 50 | return self._positive_index 51 | 52 | @property 53 | def negative_index(self): 54 | return self._negative_index 55 | 56 | def write(self): 57 | 58 | for data_item in self.data_adapter.get_one(): 59 | 60 | temp_sample = [] 61 | im, bboxes = data_item 62 | ret, buf = cv2.imencode(self.compression_mode, im, self.encode_params) 63 | if buf is None or buf.size == 0: 64 | print('buf is wrong.') 65 | continue 66 | if not ret: 67 | print('An error is occurred.') 68 | continue 69 | temp_sample.append(buf) 70 | 71 | if isinstance(bboxes, str): # 负样本 72 | temp_sample.append(0) 73 | temp_sample.append(int(bboxes)) 74 | else: 75 | temp_sample.append(1) 76 | temp_sample.append(bboxes) 77 | 78 | self.data[self.counter] = temp_sample 79 | print('Successfully save the %d-th data item.' % self.counter) 80 | self.counter += 1 81 | 82 | pickle.dump(self.data, open(self.pickle_file_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) 83 | 84 | def read_by_index(self, index): 85 | im_buf, flag, bboxes = self.data[index] 86 | im = cv2.imdecode(im_buf, cv2.IMREAD_COLOR) 87 | return im, flag, bboxes 88 | 89 | 90 | def write_file(): 91 | data_list_file_path = './data_folder/data_list_brainwash_test.txt' 92 | adapter = TextListAdapter(data_list_file_path) 93 | 94 | pickle_file_path = './data_folder/data_list_brainwash_test.pkl' 95 | encode_quality = 90 96 | packer = PickleProvider(pickle_file_path, encode_quality, adapter) 97 | packer.write() 98 | 99 | 100 | def read_file(): 101 | pickle_file_path = './data_folder/data_list_brainwash_test.pkl' 102 | 103 | provider = PickleProvider(pickle_file_path) 104 | positive_index = provider.positive_index 105 | negative_index = provider.negative_index 106 | print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index))) 107 | # all_index = positive_index+negative_index 108 | import random 109 | random.shuffle(positive_index) 110 | 111 | for i, index in enumerate(positive_index): 112 | im, flag, bboxes_numpy = provider.read_by_index(index) 113 | if isinstance(bboxes_numpy, numpy.ndarray): 114 | for n in range(bboxes_numpy.shape[0]): 115 | cv2.rectangle(im, (bboxes_numpy[n, 0], bboxes_numpy[n, 1]), 116 | (bboxes_numpy[n, 0] + bboxes_numpy[n, 2], bboxes_numpy[n, 1] + bboxes_numpy[n, 3]), (0, 255, 0), 1) 117 | cv2.imshow('im', im) 118 | cv2.waitKey() 119 | 120 | 121 | if __name__ == '__main__': 122 | # write_file() 123 | read_file() 124 | -------------------------------------------------------------------------------- /head_detection/data_provider_farm/reformat_brainwash.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import os 3 | import cv2 4 | import json 5 | import math 6 | import re 7 | 8 | ''' 9 | ''' 10 | 11 | 12 | def generate_data_list(): 13 | txt_file_path = '/media/heyonghao/HYH-4T-WD/public_dataset/head_detection/brainwash/brainwash/brainwash_test.idl' 14 | image_root = '/media/heyonghao/HYH-4T-WD/public_dataset/head_detection/brainwash/brainwash' 15 | 16 | list_file_path = './data_folder/data_list_brainwash_test.txt' 17 | if not os.path.exists(os.path.dirname(list_file_path)): 18 | os.makedirs(os.path.dirname(list_file_path)) 19 | fin = open(txt_file_path, 'r') 20 | fout = open(list_file_path, 'w') 21 | 22 | counter = 0 23 | for line in fin: 24 | line = line.strip(';\n') 25 | im_path = re.findall('["](.*?)["]', line)[0] 26 | im_path = os.path.join(image_root, im_path) 27 | if not os.path.exists(im_path): 28 | print('im file does not exist : %s'%im_path) 29 | continue 30 | bbox_str_list = re.findall('[(](.*?)[)]', line) 31 | bbox_list = [] 32 | for bbox_str in bbox_str_list: 33 | bbox_str = bbox_str.split(', ') 34 | xmin = int(float(bbox_str[0])) 35 | ymin = int(float(bbox_str[1])) 36 | xmax = int(float(bbox_str[2])) 37 | ymax = int(float(bbox_str[3])) 38 | bbox_list.append((xmin, ymin, xmax-xmin+1, ymax-ymin+1)) 39 | 40 | if len(bbox_list) == 0: 41 | line_str = im_path+',0,0' 42 | fout.write(line_str+'\n') 43 | else: 44 | line_str = im_path+',1,'+str(len(bbox_list)) 45 | for bbox in bbox_list: 46 | line_str += ','+str(bbox[0])+','+str(bbox[1])+','+str(bbox[2])+','+str(bbox[3]) 47 | fout.write(line_str + '\n') 48 | counter += 1 49 | print(counter) 50 | 51 | fout.close() 52 | fin.close() 53 | 54 | 55 | def show_image(): 56 | list_file_path = './data_folder/data_list_brainwash_test.txt' 57 | 58 | fin = open(list_file_path, 'r') 59 | lines = fin.readlines() 60 | fin.close() 61 | 62 | import random 63 | random.shuffle(lines) 64 | for line in lines: 65 | line = line.strip('\n').split(',') 66 | 67 | im = cv2.imread(line[0]) 68 | 69 | bboxes = [] 70 | num_bboxes = int(line[2]) 71 | for i in range(num_bboxes): 72 | xmin = int(line[3 + i * 4]) 73 | ymin = int(line[4 + i * 4]) 74 | width = int(line[5 + i * 4]) 75 | height = int(line[6 + i * 4]) 76 | bboxes.append((xmin, ymin, xmin + width - 1, ymin + height - 1)) 77 | 78 | for bbox in bboxes: 79 | cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 2) 80 | 81 | cv2.imshow('im', im) 82 | cv2.waitKey() 83 | 84 | 85 | def dataset_statistics(): 86 | list_file_path = './data_folder/data_list_brainwash_test.txt' 87 | 88 | fin = open(list_file_path, 'r') 89 | lines = fin.readlines() 90 | fin.close() 91 | 92 | bin_size = 5 93 | longer_bin_dict = {} 94 | shorter_bin_dict = {} 95 | for line in lines: 96 | line = line.strip('\n').split(',') 97 | num_bboxes = int(line[2]) 98 | for i in range(num_bboxes): 99 | width = int(line[5 + i * 4]) 100 | height = int(line[6 + i * 4]) 101 | 102 | longer_side = max(width, height) 103 | shorter_side = min(width, height) 104 | 105 | key = int(longer_side / bin_size) 106 | if key in longer_bin_dict: 107 | longer_bin_dict[key] += 1 108 | else: 109 | longer_bin_dict[key] = 1 110 | 111 | key = int(shorter_side / bin_size) 112 | if key in shorter_bin_dict: 113 | shorter_bin_dict[key] += 1 114 | else: 115 | shorter_bin_dict[key] = 1 116 | 117 | print('shorter side based statistics:') 118 | shorter_bin_dict_key_list = sorted(shorter_bin_dict) 119 | for k in shorter_bin_dict_key_list: 120 | v = shorter_bin_dict[k] 121 | print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v)) 122 | 123 | print('longer side based statistics:') 124 | longer_bin_dict_key_list = sorted(longer_bin_dict) 125 | for k in longer_bin_dict_key_list: 126 | v = longer_bin_dict[k] 127 | print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v)) 128 | 129 | 130 | if __name__ == '__main__': 131 | # generate_data_list() 132 | # show_image() 133 | dataset_statistics() 134 | 135 | -------------------------------------------------------------------------------- /head_detection/data_provider_farm/text_list_adapter.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This adapter accepts a text as input which describes the annotated data. 3 | Each line in text are formatted as: 4 | [image absolute path],[pos/neg flag],[num of bboxes],[x1],[y1],[width1],[height1],[x2],[y2],[width2],[height2]...... 5 | ''' 6 | 7 | import cv2 8 | import numpy 9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_data_adapter import DataAdapterBaseclass 10 | 11 | 12 | class TextListAdapter(DataAdapterBaseclass): 13 | 14 | def __init__(self, data_list_file_path): 15 | 16 | DataAdapterBaseclass.__init__(self) 17 | fin = open(data_list_file_path, 'r') 18 | self.lines = fin.readlines() 19 | fin.close() 20 | self.line_counter = 0 21 | 22 | def __del__(self): 23 | pass 24 | 25 | def get_one(self): 26 | """ 27 | This function use 'yield' to return samples 28 | """ 29 | while self.line_counter < len(self.lines): 30 | 31 | line = self.lines[self.line_counter].strip('\n').split(',') 32 | if line[1] == '1': # 如果是正样本,需要校验bbox的个数是否一样 33 | assert len(line[3:]) == 4 * int(line[2]) 34 | 35 | im = cv2.imread(line[0], cv2.IMREAD_UNCHANGED) 36 | 37 | if line[1] == '0': 38 | yield im, '0' 39 | self.line_counter += 1 40 | continue 41 | 42 | num_bboxes = int(line[2]) 43 | bboxes = [] 44 | for i in range(num_bboxes): 45 | x = float(line[3 + i * 4]) 46 | y = float(line[3 + i * 4 + 1]) 47 | width = float(line[3 + i * 4 + 2]) 48 | height = float(line[3 + i * 4 + 3]) 49 | 50 | bboxes.append([x, y, width, height]) 51 | 52 | bboxes = numpy.array(bboxes, dtype=numpy.float32) 53 | yield im, bboxes 54 | 55 | self.line_counter += 1 56 | 57 | 58 | if __name__ == '__main__': 59 | pass 60 | -------------------------------------------------------------------------------- /head_detection/inference_speed_evaluation/inference_speed_eval.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | 4 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python') 5 | import mxnet 6 | 7 | eval_with_mxnet_flag = False 8 | symbol_file_path = '/home/heyonghao/projects/ChasingHeadDetection/symbol_farm/symbol_10_160_17L_4scales_v1_deploy.json' 9 | input_shape = (1,3,2160,3840) # (1,3,240,320) (1,3,480,640) (1,3,720,1280) (1,3,1080,1920) (1,3,2160,3840) 10 | real_run_loops = 200 11 | 12 | if eval_with_mxnet_flag: 13 | from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_mxnet_cudnn import InferenceSpeedEval as InferenceSpeedEvalMXNet 14 | 15 | inferenceSpeedEvalMXNet = InferenceSpeedEvalMXNet(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, device_type='gpu', gpu_index=0) 16 | inferenceSpeedEvalMXNet.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops) 17 | 18 | else: 19 | from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_tensorrt_cudnn import InferenceSpeedEval as InferenceSpeedEvalTRT 20 | 21 | inferenceSpeedEvalTRT = InferenceSpeedEvalTRT(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape) 22 | inferenceSpeedEvalTRT.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops) 23 | -------------------------------------------------------------------------------- /head_detection/metric_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/metric_farm/__init__.py -------------------------------------------------------------------------------- /head_detection/metric_farm/metric_default.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy 4 | import mxnet 5 | 6 | 7 | class Metric: 8 | # 需要输入多少个loss,即scale个数 9 | def __init__(self, num_scales): 10 | self.sum_metric = [0.0 for i in range(num_scales * 2)] 11 | self.num_update = 0 12 | self.num_scales = num_scales 13 | self.num_nonzero = [1.0 for i in range(num_scales * 2)] 14 | self.scale_factor = 10000 15 | 16 | # it is expected that the shape is num*c*h*w 17 | def update(self, labels, preds): # 这里需要注意label里面item的顺序。要参考prefetching_dataiter 18 | 19 | for i in range(self.num_scales): 20 | mask = labels[i * 2] # 先mask 21 | label = labels[i * 2 + 1] # 后label 22 | 23 | score_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=0, end=2).asnumpy() 24 | bbox_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=2, end=6).asnumpy() 25 | 26 | label_bbox = mxnet.ndarray.slice_axis(label, axis=1, begin=2, end=6).asnumpy() 27 | 28 | pred_score = preds[i * 2].asnumpy() 29 | pred_bbox = preds[i * 2 + 1].asnumpy() 30 | 31 | loss_score = numpy.sum(pred_score * score_mask) 32 | loss_bbox = numpy.sum((label_bbox - pred_bbox) ** 2.0) 33 | 34 | self.num_nonzero[i * 2] += numpy.sum(score_mask[:, 0, :, :] > 0.5) 35 | self.num_nonzero[i * 2 + 1] += numpy.sum(bbox_mask > 0.5) 36 | self.sum_metric[i * 2] += loss_score 37 | self.sum_metric[i * 2 + 1] += loss_bbox 38 | 39 | self.num_update += 1 40 | 41 | def get(self): 42 | return_string_list = [] 43 | for i in range(self.num_scales): 44 | return_string_list.append('CE_loss_score_' + str(i)) 45 | return_string_list.append('SE_loss_bbox_' + str(i)) 46 | 47 | return return_string_list, [m / self.num_nonzero[i] * self.scale_factor for i, m in enumerate(self.sum_metric)] 48 | 49 | def reset(self): 50 | self.sum_metric = [0.0 for i in range(self.num_scales * 2)] 51 | self.num_update = 0 52 | self.num_nonzero = [1.0 for i in range(self.num_scales * 2)] 53 | -------------------------------------------------------------------------------- /head_detection/symbol_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/symbol_farm/__init__.py -------------------------------------------------------------------------------- /head_detection/symbol_farm/symbol_structures.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/symbol_farm/symbol_structures.xlsx -------------------------------------------------------------------------------- /license_plate_detection/README.md: -------------------------------------------------------------------------------- 1 | ## License Plate (LP) Detection 2 | We use the CCPD dataset introduced by paper [Towards End-to-End License Plate Detection and Recognition: A Large Dataset and Baseline](https://github.com/detectRecog/CCPD). 3 | 4 | ### Recent Update 5 | * `2019.10.02` model v1 for CCPD dataset is released. 6 | 7 | ### Brief Introduction to Model Version 8 | * v1 - is designed for CCPD dataset, covering LP scale [64, 512]. It has 3 branches. Please check 9 | `./symbol_farm/symbol_structures.xlsx` for details. 10 | 11 | ### Inference Latency 12 | 13 | * Platform info: NVIDIA RTX 2080TI, CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0 14 | 15 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160|7680×4320 16 | -------------|-------|-------|--------|---------|---------|--------- 17 | v1|0.62ms(1613.18FPS)|1.02ms(978.64FPS)|2.10ms(476.80FPS)|4.21ms(237.32FPS)|15.68ms(63.78FPS)|62.82ms(15.92FPS) 18 | 19 | * Platform info: NVIDIA GTX 1060(laptop), CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0 20 | 21 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160 22 | -------------|-------|-------|--------|---------|--------- 23 | v1|0.86ms(1167.71FPS)|1.83ms(546.00FPS)|4.45ms(224.63FPS)|9.68ms(103.27FPS)|37.59ms(26.60FPS) 24 | 25 | > CAUTION: The latency may vary even in the same setting. 26 | 27 | ### Accuracy on CCPD Dataset 28 | We use the latest CCPD dataset, containing 351,974 images (it is larger than the version described in the paper). 29 | **Since the train/test split is not provided by the paper, we randomly select 3/5 data for training and the rest is for test.** 30 | We train v1 on the training set (211,180 images) and evaluate on the test set (140,794 images). 31 | 32 | #### Quantitative Results on Test Set 33 | Average Precision (AP) is used for measuring the accuracy. In detail, we use code [Object-Detection-Metrics](https://github.com/rafaelpadilla/Object-Detection-Metrics) 34 | for calculating the AP metric. The following table presents the results: 35 | 36 | > `The comparison is not fair due to different traning/test split. This is for reference only!` 37 | > 38 | > `We make only one inference for each image in test. So some extremely large plates are failed to detect.` 39 | 40 | Method|AP 41 | --------|------ 42 | RPnet [1]|0.945 43 | v1 (our)|0.989 44 | 45 | >[1] [Towards End-to-End License Plate Detection and Recognition: A Large Dataset and Baseline](http://openaccess.thecvf.com/content_ECCV_2018/papers/Zhenbo_Xu_Towards_End-to-End_License_ECCV_2018_paper.pdf) 46 | 47 | #### Some Qualitative Results on Test Set 48 | > Some challenging cases are presented. 49 | 50 | ![image](./accuracy_evaluation/test_images/test1.jpg_result.jpg) 51 | ![image](./accuracy_evaluation/test_images/test2.jpg_result.jpg) 52 | ![image](./accuracy_evaluation/test_images/test3.jpg_result.jpg) 53 | ![image](./accuracy_evaluation/test_images/test4.jpg_result.jpg) 54 | ![image](./accuracy_evaluation/test_images/test5.jpg_result.jpg) 55 | ![image](./accuracy_evaluation/test_images/test6.jpg_result.jpg) 56 | ![image](./accuracy_evaluation/test_images/test7.jpg_result.jpg) 57 | 58 | ### User Instructions 59 | Please refer to [README in face_detection](../face_detection/README.md) for details. 60 | 61 | ### Data Download 62 | Please visit [CCPD](https://github.com/detectRecog/CCPD) for accessing the data. -------------------------------------------------------------------------------- /license_plate_detection/accuracy_evaluation/evaluation_on_CCPD.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import sys 4 | import cv2 5 | import math 6 | import re 7 | 8 | sys.path.append('..') 9 | # change the config as your need 10 | from config_farm import configuration_64_512_16L_3scales_v1 as cfg 11 | import mxnet 12 | from predict import Predict 13 | 14 | 15 | def generate_gt_files(): 16 | txt_file_path = '../data_provider_farm/data_folder/data_list_CCPD_test.txt' 17 | gt_file_root = './CCPD_testset_gt_files_for_evaluation' 18 | 19 | if not os.path.exists(gt_file_root): 20 | os.makedirs(gt_file_root) 21 | 22 | fin = open(txt_file_path, 'r') 23 | 24 | counter = 0 25 | for line in fin: 26 | line = line.strip('\n').split(',') 27 | im_path = os.path.basename(line[0]) 28 | num_bboxes = int(line[2]) 29 | if num_bboxes == 0: 30 | continue 31 | bbox_list = [] 32 | for i in range(num_bboxes): 33 | xmin = int(float(line[3+i*4])) 34 | ymin = int(float(line[4+i*4])) 35 | width = int(float(line[5+i*4])) 36 | height = int(float(line[6+i*4])) 37 | bbox_list.append((xmin, ymin, width, height)) 38 | 39 | gt_file_name = im_path.replace('jpg', 'txt') 40 | 41 | fout = open(os.path.join(gt_file_root, gt_file_name), 'w') 42 | for bbox in bbox_list: 43 | line_str = 'LP ' + str(bbox[0]) + ' ' + str(bbox[1]) + ' ' + str(bbox[2]) + ' ' + str(bbox[3]) 44 | fout.write(line_str + '\n') 45 | fout.close() 46 | counter += 1 47 | print(counter) 48 | fin.close() 49 | 50 | 51 | def generate_predicted_files(): 52 | # set the proper symbol file and model file 53 | symbol_file_path = '../symbol_farm/symbol_64_512_16L_3scales_v1_deploy.json' 54 | model_file_path = '../saved_model/configuration_64_512_16L_3scales_v1_2019-09-29-13-41-44/train_64_512_16L_3scales_v1_iter_600000.params' 55 | my_predictor = Predict(mxnet=mxnet, 56 | symbol_file_path=symbol_file_path, 57 | model_file_path=model_file_path, 58 | ctx=mxnet.gpu(0), 59 | receptive_field_list=cfg.param_receptive_field_list, 60 | receptive_field_stride=cfg.param_receptive_field_stride, 61 | bbox_small_list=cfg.param_bbox_small_list, 62 | bbox_large_list=cfg.param_bbox_large_list, 63 | receptive_field_center_start=cfg.param_receptive_field_center_start, 64 | num_output_scales=cfg.param_num_output_scales) 65 | 66 | # set the val root, the path should look like XXXX/WIDER_val/images 67 | txt_file_path = '../data_provider_farm/data_folder/data_list_CCPD_test.txt' 68 | predicted_file_root = './CCPD_testset_predicted_files_for_evaluation_' + os.path.basename(model_file_path).split('.')[0] 69 | 70 | if not os.path.exists(predicted_file_root): 71 | os.makedirs(predicted_file_root) 72 | 73 | fin = open(txt_file_path, 'r') 74 | 75 | resize_scale = 1 76 | score_threshold = 0.2 77 | NMS_threshold = 0.6 78 | counter = 0 79 | 80 | for line in fin: 81 | line = line.strip('\n').split(',') 82 | 83 | im = cv2.imread(line[0], cv2.IMREAD_COLOR) 84 | 85 | bboxes = my_predictor.predict(im, resize_scale=resize_scale, score_threshold=score_threshold, top_k=10000, NMS_threshold=NMS_threshold) 86 | 87 | # for bbox in bboxes: 88 | # cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 1) 89 | # cv2.imshow('im',im) 90 | # cv2.waitKey() 91 | predicted_file_name = os.path.basename(line[0]).replace('jpg', 'txt') 92 | fout = open(os.path.join(predicted_file_root, predicted_file_name), 'w') 93 | for bbox in bboxes: 94 | fout.write('LP %.03f %d %d %d %d' % (bbox[4] if bbox[4] <= 1 else 1, math.floor(bbox[0]), math.floor(bbox[1]), math.ceil(bbox[2] - bbox[0]), math.ceil(bbox[3] - bbox[1])) + '\n') 95 | fout.close() 96 | counter += 1 97 | print('[%d] is processed.' % counter) 98 | 99 | 100 | if __name__ == '__main__': 101 | # generate_gt_files() 102 | generate_predicted_files() 103 | -------------------------------------------------------------------------------- /license_plate_detection/accuracy_evaluation/test_images/test1.jpg_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test1.jpg_result.jpg -------------------------------------------------------------------------------- /license_plate_detection/accuracy_evaluation/test_images/test2.jpg_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test2.jpg_result.jpg -------------------------------------------------------------------------------- /license_plate_detection/accuracy_evaluation/test_images/test3.jpg_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test3.jpg_result.jpg -------------------------------------------------------------------------------- /license_plate_detection/accuracy_evaluation/test_images/test4.jpg_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test4.jpg_result.jpg -------------------------------------------------------------------------------- /license_plate_detection/accuracy_evaluation/test_images/test5.jpg_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test5.jpg_result.jpg -------------------------------------------------------------------------------- /license_plate_detection/accuracy_evaluation/test_images/test6.jpg_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test6.jpg_result.jpg -------------------------------------------------------------------------------- /license_plate_detection/accuracy_evaluation/test_images/test7.jpg_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test7.jpg_result.jpg -------------------------------------------------------------------------------- /license_plate_detection/config_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/config_farm/__init__.py -------------------------------------------------------------------------------- /license_plate_detection/data_iterator_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/data_iterator_farm/__init__.py -------------------------------------------------------------------------------- /license_plate_detection/data_provider_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/data_provider_farm/__init__.py -------------------------------------------------------------------------------- /license_plate_detection/data_provider_farm/pickle_provider.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This provider accepts an adapter, save dataset in pickle file and load all dataset to memory for data iterators 3 | ''' 4 | 5 | import cv2 6 | import numpy 7 | import pickle 8 | 9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_provider import ProviderBaseclass 10 | from .text_list_adapter import TextListAdapter 11 | 12 | 13 | class PickleProvider(ProviderBaseclass): 14 | """ 15 | This class provides methods to save and read data. 16 | By default, images are compressed using JPG format. 17 | If data_adapter is not None, it means saving data, or it is reading data 18 | """ 19 | 20 | def __init__(self, 21 | pickle_file_path, 22 | encode_quality=90, 23 | data_adapter=None): 24 | ProviderBaseclass.__init__(self) 25 | 26 | if data_adapter: # write data 27 | 28 | self.data_adapter = data_adapter 29 | self.data = {} 30 | self.counter = 0 31 | self.pickle_file_path = pickle_file_path 32 | 33 | else: # read data 34 | 35 | self.data = pickle.load(open(pickle_file_path, 'rb')) 36 | # get positive and negative indeices 37 | self._positive_index = [] 38 | self._negative_index = [] 39 | for k, v in self.data.items(): 40 | if v[1] == 0: # negative 41 | self._negative_index.append(k) 42 | else: # positive 43 | self._positive_index.append(k) 44 | 45 | self.compression_mode = '.jpg' 46 | self.encode_params = [cv2.IMWRITE_JPEG_QUALITY, encode_quality] 47 | 48 | @property 49 | def positive_index(self): 50 | return self._positive_index 51 | 52 | @property 53 | def negative_index(self): 54 | return self._negative_index 55 | 56 | def write(self): 57 | 58 | for data_item in self.data_adapter.get_one(): 59 | 60 | temp_sample = [] 61 | im, bboxes = data_item 62 | ret, buf = cv2.imencode(self.compression_mode, im, self.encode_params) 63 | if buf is None or buf.size == 0: 64 | print('buf is wrong.') 65 | continue 66 | if not ret: 67 | print('An error is occurred.') 68 | continue 69 | temp_sample.append(buf) 70 | 71 | if isinstance(bboxes, str): # 负样本 72 | temp_sample.append(0) 73 | temp_sample.append(int(bboxes)) 74 | else: 75 | temp_sample.append(1) 76 | temp_sample.append(bboxes) 77 | 78 | self.data[self.counter] = temp_sample 79 | print('Successfully save the %d-th data item.' % self.counter) 80 | self.counter += 1 81 | 82 | pickle.dump(self.data, open(self.pickle_file_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) 83 | 84 | def read_by_index(self, index): 85 | im_buf, flag, bboxes = self.data[index] 86 | im = cv2.imdecode(im_buf, cv2.IMREAD_COLOR) 87 | return im, flag, bboxes 88 | 89 | 90 | def write_file(): 91 | data_list_file_path = './data_folder/data_list_CCPD_train_debug.txt' 92 | adapter = TextListAdapter(data_list_file_path) 93 | 94 | pickle_file_path = './data_folder/data_list_CCPD_train_debug.pkl' 95 | encode_quality = 90 96 | packer = PickleProvider(pickle_file_path, encode_quality, adapter) 97 | packer.write() 98 | 99 | 100 | def read_file(): 101 | pickle_file_path = './data_folder/data_list_CCPD_train_debug.pkl' 102 | 103 | provider = PickleProvider(pickle_file_path) 104 | positive_index = provider.positive_index 105 | negative_index = provider.negative_index 106 | print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index))) 107 | all_index = positive_index+negative_index 108 | import random 109 | random.shuffle(all_index) 110 | 111 | for i, index in enumerate(all_index): 112 | im, flag, bboxes_numpy = provider.read_by_index(index) 113 | if isinstance(bboxes_numpy, numpy.ndarray): 114 | for n in range(bboxes_numpy.shape[0]): 115 | cv2.rectangle(im, (bboxes_numpy[n, 0], bboxes_numpy[n, 1]), 116 | (bboxes_numpy[n, 0] + bboxes_numpy[n, 2], bboxes_numpy[n, 1] + bboxes_numpy[n, 3]), (0, 255, 0), 2) 117 | cv2.imshow('im', im) 118 | cv2.waitKey() 119 | 120 | 121 | if __name__ == '__main__': 122 | # write_file() 123 | read_file() 124 | -------------------------------------------------------------------------------- /license_plate_detection/data_provider_farm/reformat_CCPD.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import os 3 | import cv2 4 | import json 5 | import math 6 | import random 7 | 8 | 9 | def annotation_from_name(file_name): 10 | file_name = file_name[:-4] 11 | name_split = file_name.split('-') 12 | location = name_split[2] 13 | location = location.split('_') 14 | left_top = location[0].split('&') 15 | right_bottom = location[1].split('&') 16 | x1 = int(left_top[0]) 17 | y1 = int(left_top[1]) 18 | x2 = int(right_bottom[0]) 19 | y2 = int(right_bottom[1]) 20 | 21 | return (x1, y1, x2-x1+1, y2-y1+1) 22 | 23 | 24 | def generate_data_list(): 25 | image_roots = ['/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_base', 26 | '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_blur', 27 | '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_challenge', 28 | '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_db', 29 | '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_fn', 30 | '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_rotate', 31 | '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_tilt', 32 | '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_weather'] 33 | 34 | train_list_file_path = './data_folder/data_list_CCPD_train.txt' 35 | test_list_file_path = './data_folder/data_list_CCPD_test.txt' 36 | if not os.path.exists(os.path.dirname(train_list_file_path)): 37 | os.makedirs(os.path.dirname(train_list_file_path)) 38 | fout_train = open(train_list_file_path, 'w') 39 | fout_test = open(test_list_file_path, 'w') 40 | 41 | train_proportion = 0.6 42 | train_counter = 0 43 | test_counter = 0 44 | for root in image_roots: 45 | file_name_list = [name for name in os.listdir(root) if name.endswith('.jpg')] 46 | random.shuffle(file_name_list) 47 | 48 | file_name_list_train = file_name_list[:int(len(file_name_list)*train_proportion)] 49 | file_name_list_test = file_name_list[int(len(file_name_list)*train_proportion):] 50 | 51 | for file_name in file_name_list_train: 52 | location_annotation = annotation_from_name(file_name) 53 | line = os.path.join(root, file_name)+',1,1,'+str(location_annotation[0])+','+str(location_annotation[1])+','+str(location_annotation[2])+','+str(location_annotation[3]) 54 | fout_train.write(line+'\n') 55 | train_counter += 1 56 | print(train_counter) 57 | 58 | for file_name in file_name_list_test: 59 | location_annotation = annotation_from_name(file_name) 60 | line = os.path.join(root, file_name)+',1,1,'+str(location_annotation[0])+','+str(location_annotation[1])+','+str(location_annotation[2])+','+str(location_annotation[3]) 61 | fout_test.write(line+'\n') 62 | test_counter += 1 63 | print(test_counter) 64 | 65 | fout_train.close() 66 | fout_test.close() 67 | 68 | 69 | def show_image(): 70 | list_file_path = './data_folder/data_list_CCPD_train.txt' 71 | 72 | fin = open(list_file_path, 'r') 73 | lines = fin.readlines() 74 | fin.close() 75 | 76 | import random 77 | random.shuffle(lines) 78 | for line in lines: 79 | line = line.strip('\n').split(',') 80 | 81 | im = cv2.imread(line[0]) 82 | 83 | bboxes = [] 84 | num_bboxes = int(line[2]) 85 | for i in range(num_bboxes): 86 | xmin = int(line[3 + i * 4]) 87 | ymin = int(line[4 + i * 4]) 88 | width = int(line[5 + i * 4]) 89 | height = int(line[6 + i * 4]) 90 | bboxes.append((xmin, ymin, xmin + width - 1, ymin + height - 1)) 91 | 92 | for bbox in bboxes: 93 | cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 2) 94 | 95 | cv2.imshow('im', im) 96 | cv2.waitKey() 97 | 98 | 99 | def dataset_statistics(): 100 | list_file_path = './data_folder/data_list_CCPD_train.txt' 101 | 102 | fin = open(list_file_path, 'r') 103 | lines = fin.readlines() 104 | fin.close() 105 | 106 | bin_size = 8 107 | longer_bin_dict = {} 108 | shorter_bin_dict = {} 109 | counter_pos = 0 110 | counter_neg = 0 111 | for line in lines: 112 | line = line.strip('\n').split(',') 113 | if line[1] == '0': 114 | counter_neg += 1 115 | continue 116 | else: 117 | counter_pos += 1 118 | num_bboxes = int(line[2]) 119 | for i in range(num_bboxes): 120 | width = int(line[5 + i * 4]) 121 | height = int(line[6 + i * 4]) 122 | 123 | longer_side = max(width, height) 124 | shorter_side = min(width, height) 125 | 126 | key = int(longer_side / bin_size) 127 | if key in longer_bin_dict: 128 | longer_bin_dict[key] += 1 129 | else: 130 | longer_bin_dict[key] = 1 131 | 132 | key = int(shorter_side / bin_size) 133 | if key in shorter_bin_dict: 134 | shorter_bin_dict[key] += 1 135 | else: 136 | shorter_bin_dict[key] = 1 137 | 138 | total_pedestrian = 0 139 | print('shorter side based statistics:') 140 | shorter_bin_dict_key_list = sorted(shorter_bin_dict) 141 | for k in shorter_bin_dict_key_list: 142 | v = shorter_bin_dict[k] 143 | total_pedestrian += v 144 | print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v)) 145 | 146 | print('longer side based statistics:') 147 | longer_bin_dict_key_list = sorted(longer_bin_dict) 148 | for k in longer_bin_dict_key_list: 149 | v = longer_bin_dict[k] 150 | print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v)) 151 | 152 | print('num pos: %d, num neg: %d' % (counter_pos, counter_neg)) 153 | print('total LP: %d' % total_pedestrian) 154 | 155 | 156 | if __name__ == '__main__': 157 | # test_name2anno() 158 | # generate_data_list() 159 | # show_image() 160 | dataset_statistics() 161 | -------------------------------------------------------------------------------- /license_plate_detection/data_provider_farm/text_list_adapter.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This adapter accepts a text as input which describes the annotated data. 3 | Each line in text are formatted as: 4 | [image absolute path],[pos/neg flag],[num of bboxes],[x1],[y1],[width1],[height1],[x2],[y2],[width2],[height2]...... 5 | ''' 6 | 7 | import cv2 8 | import numpy 9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_data_adapter import DataAdapterBaseclass 10 | import random 11 | 12 | 13 | class TextListAdapter(DataAdapterBaseclass): 14 | 15 | def __init__(self, data_list_file_path): 16 | 17 | DataAdapterBaseclass.__init__(self) 18 | fin = open(data_list_file_path, 'r') 19 | self.lines = fin.readlines() 20 | fin.close() 21 | self.line_counter = 0 22 | 23 | def __del__(self): 24 | pass 25 | 26 | def get_one(self): 27 | """ 28 | This function use 'yield' to return samples 29 | """ 30 | while self.line_counter < len(self.lines): 31 | 32 | line = self.lines[self.line_counter].strip('\n').split(',') 33 | if line[1] == '1': # 如果是正样本,需要校验bbox的个数是否一样 34 | assert len(line[3:]) == 4 * int(line[2]) 35 | 36 | im = cv2.imread(line[0], cv2.IMREAD_UNCHANGED) 37 | 38 | if line[1] == '0': 39 | yield im, '0' 40 | self.line_counter += 1 41 | continue 42 | 43 | num_bboxes = int(line[2]) 44 | bboxes = [] 45 | for i in range(num_bboxes): 46 | x = float(line[3 + i * 4]) 47 | y = float(line[3 + i * 4 + 1]) 48 | width = float(line[3 + i * 4 + 2]) 49 | height = float(line[3 + i * 4 + 3]) 50 | 51 | bboxes.append([x, y, width, height]) 52 | 53 | bboxes = numpy.array(bboxes, dtype=numpy.float32) 54 | yield im, bboxes 55 | 56 | # generate negative samples 57 | left = numpy.min(bboxes[:, 0]) 58 | top = numpy.min(bboxes[:, 1]) 59 | right = numpy.max(bboxes[:, 0] + bboxes[:, 2]) 60 | bottom = numpy.max(bboxes[:, 1] + bboxes[:, 3]) 61 | if random.random() < 0.25: 62 | im_crop = im[:, :int(left), :].copy() 63 | if im_crop.shape[1] > 100: 64 | yield im_crop, '0' 65 | if random.random() < 0.25: 66 | im_crop = im[:, int(right):, :].copy() 67 | if im_crop.shape[1] > 100: 68 | yield im_crop, '0' 69 | if random.random() < 0.25: 70 | im_crop = im[:int(top), :, :].copy() 71 | if im_crop.shape[0] > 100: 72 | yield im_crop, '0' 73 | if random.random() < 0.25: 74 | im_crop = im[int(bottom):, :, :].copy() 75 | if im_crop.shape[0] > 100: 76 | yield im_crop, '0' 77 | 78 | self.line_counter += 1 79 | 80 | 81 | if __name__ == '__main__': 82 | pass 83 | -------------------------------------------------------------------------------- /license_plate_detection/inference_speed_evaluation/inference_speed_eval.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | 4 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python') 5 | import mxnet 6 | 7 | eval_with_mxnet_flag = False 8 | symbol_file_path = '/home/heyonghao/projects/ChasingLicensePlateDetection/symbol_farm/symbol_64_512_16L_3scales_v1_deploy.json' 9 | input_shape = (1,3,2160,3840) # (1,3,240,320) (1,3,480,640) (1,3,720,1280) (1,3,1080,1920) (1,3,2160,3840) 10 | real_run_loops = 200 11 | 12 | if eval_with_mxnet_flag: 13 | from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_mxnet_cudnn import InferenceSpeedEval as InferenceSpeedEvalMXNet 14 | 15 | inferenceSpeedEvalMXNet = InferenceSpeedEvalMXNet(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, device_type='gpu', gpu_index=0) 16 | inferenceSpeedEvalMXNet.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops) 17 | 18 | else: 19 | from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_tensorrt_cudnn import InferenceSpeedEval as InferenceSpeedEvalTRT 20 | 21 | inferenceSpeedEvalTRT = InferenceSpeedEvalTRT(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape) 22 | inferenceSpeedEvalTRT.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops) 23 | -------------------------------------------------------------------------------- /license_plate_detection/metric_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/metric_farm/__init__.py -------------------------------------------------------------------------------- /license_plate_detection/metric_farm/metric_default.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy 4 | import mxnet 5 | 6 | 7 | class Metric: 8 | # 需要输入多少个loss,即scale个数 9 | def __init__(self, num_scales): 10 | self.sum_metric = [0.0 for i in range(num_scales * 2)] 11 | self.num_update = 0 12 | self.num_scales = num_scales 13 | self.num_nonzero = [1.0 for i in range(num_scales * 2)] 14 | self.scale_factor = 10000 15 | 16 | # it is expected that the shape is num*c*h*w 17 | def update(self, labels, preds): # 这里需要注意label里面item的顺序。要参考prefetching_dataiter 18 | 19 | for i in range(self.num_scales): 20 | mask = labels[i * 2] # 先mask 21 | label = labels[i * 2 + 1] # 后label 22 | 23 | score_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=0, end=2).asnumpy() 24 | bbox_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=2, end=6).asnumpy() 25 | 26 | label_bbox = mxnet.ndarray.slice_axis(label, axis=1, begin=2, end=6).asnumpy() 27 | 28 | pred_score = preds[i * 2].asnumpy() 29 | pred_bbox = preds[i * 2 + 1].asnumpy() 30 | 31 | loss_score = numpy.sum(pred_score * score_mask) 32 | loss_bbox = numpy.sum((label_bbox - pred_bbox) ** 2.0) 33 | 34 | self.num_nonzero[i * 2] += numpy.sum(score_mask[:, 0, :, :] > 0.5) 35 | self.num_nonzero[i * 2 + 1] += numpy.sum(bbox_mask > 0.5) 36 | self.sum_metric[i * 2] += loss_score 37 | self.sum_metric[i * 2 + 1] += loss_bbox 38 | 39 | self.num_update += 1 40 | 41 | def get(self): 42 | return_string_list = [] 43 | for i in range(self.num_scales): 44 | return_string_list.append('CE_loss_score_' + str(i)) 45 | return_string_list.append('SE_loss_bbox_' + str(i)) 46 | 47 | return return_string_list, [m / self.num_nonzero[i] * self.scale_factor for i, m in enumerate(self.sum_metric)] 48 | 49 | def reset(self): 50 | self.sum_metric = [0.0 for i in range(self.num_scales * 2)] 51 | self.num_update = 0 52 | self.num_nonzero = [1.0 for i in range(self.num_scales * 2)] 53 | -------------------------------------------------------------------------------- /license_plate_detection/symbol_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/symbol_farm/__init__.py -------------------------------------------------------------------------------- /license_plate_detection/symbol_farm/symbol_structures.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/symbol_farm/symbol_structures.xlsx -------------------------------------------------------------------------------- /pedestrian_detection/README.md: -------------------------------------------------------------------------------- 1 | ## Pedestrian Detection 2 | We plan to use [Caltech Pedestrian Dataset](http://www.vision.caltech.edu/Image_Datasets/CaltechPedestrians/index.html) 3 | with [new annotations](http://www.vision.caltech.edu/Image_Datasets/CaltechPedestrians/index.html), 4 | [CityPersons](https://bitbucket.org/shanshanzhang/citypersons) (a part of [CityScapes](https://www.cityscapes-dataset.com/)) and 5 | [KITTI](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=2d) for benchmarking. 6 | 7 | ### Recent Update 8 | * `2019.09.18` preview version of model v1 for Caltech Pedestrian Dataset is released. 9 | 10 | ### Brief Introduction to Model Version 11 | * v1 - is designed for Caltech Pedestrian Dataset, covering pedestrian scale [30, 320]. It has 4 branches. Please check 12 | `./symbol_farm/symbol_structures.xlsx` for details. 13 | 14 | ### Inference Latency 15 | * Platform info: NVIDIA Jetson NANO, CUDA 10.0, CUDNN 7.5.0, TensorRT 5.1.6 16 | 17 | Model Version|160×140|320×240|640×480|1280×720 18 | -------------|-------|-------|-------|-------- 19 | v1|6.90ms(144.83FPS)|11.87ms(84.24FPS)|36.95ms(27.06FPS)|106.23ms(9.41FPS) 20 | v2|-|-|-|- 21 | 22 | * Platform info: NVIDIA Jetson TX2, CUDA 10.0, CUDNN 7.5.0, TensorRT 5.1.6 (power mode: MAXN) 23 | 24 | Model Version|160×140|320×240|640×480|1280×720|1920×1080 25 | -------------|-------|-------|-------|--------|--------- 26 | v1|3.63ms(275.43FPS)|6.80ms(147.36FPS)|15.87ms(63.01FPS)|43.33ms(23.08FPS)|93.93ms(10.65FPS) 27 | v2|-|-|-|-|- 28 | 29 | 30 | * Platform info: NVIDIA RTX 2080TI, CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0 31 | 32 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160|7680×4320 33 | -------------|-------|-------|--------|---------|---------|--------- 34 | v1|1.01ms(985.71FPS)|1.55ms(644.93FPS)|3.26ms(306.77FPS)|6.50ms(153.76FPS)|24.58ms(40.68FPS)|99.71ms(10.03FPS) 35 | v2|-|-|-|-|-|- 36 | 37 | * Platform info: NVIDIA GTX 1060(laptop), CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0 38 | 39 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160 40 | -------------|-------|-------|--------|---------|--------- 41 | v1|1.25ms(800.00FPS)|2.93ms(341.80FPS)|7.46ms(134.08FPS)|16.03ms(62.39FPS)|62.80ms(15.92FPS) 42 | v2|-|-|-|-|- 43 | 44 | > CAUTION: The latency may vary even in the same setting. 45 | 46 | ### Accuracy on Caltech Pedestrian Dataset 47 | After investigating the data, we found that Caltech Pedestrian Dataset is not well annotated, even giving the 48 | new annotations (not annotated, not aligned well, the highly occluded are annotated). The final data used for training: 49 | 1559 pos images (at least one pedestrian inside), 2691 neg images; 4786 pedestrian in total; the longer side of bboxes 50 | varies from 10 pixels to 500 pixels. 51 | 52 | Download links for packed training and test sets: 53 | * [Baidu Yunpan](https://pan.baidu.com/s/1SvoSeg5thFHDDwZc9gh09A) (pwd:8omv) 54 | * [MS OneDrive](https://1drv.ms/u/s!Av9h0YMgxdaSinO2G1DT-yPWkKc6?e=elsea6) 55 | * [Google Drive](https://drive.google.com/open?id=1ICNAEfLa2YHJvxE6_YZYAA8Cyl1N1kAD) 56 | 57 | #### Quantitative Results on Test Set 58 | Currently, the quantitative results are not prepared well. We will release later. 59 | 60 | #### Some Qualitative Results on Test Set 61 | **(we found that false positives are often appear in the small scales, probably due to noisy training instances. For large scales, v1 performs well.)** 62 | 63 | ![image](./accuracy_evaluation/test_images/1064.jpg) 64 | ![image](./accuracy_evaluation/test_images/1199.jpg) 65 | ![image](./accuracy_evaluation/test_images/1212.jpg) 66 | ![image](./accuracy_evaluation/test_images/3981.jpg) 67 | 68 | > To play with the trained v1 model, please check `./accuracy_evaluation/predict.py`. 69 | ### User Instructions 70 | Please refer to [README in face_detection](../face_detection/README.md) for details. -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/1064.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1064.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/1081.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1081.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/1104.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1104.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/1199.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1199.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/1212.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1212.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/1461.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1461.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/2210.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/2210.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/2221.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/2221.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/2396.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/2396.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/2407.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/2407.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/2756.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/2756.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/3043.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3043.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/326.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/326.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/3368.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3368.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/3812.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3812.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/3914.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3914.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/3981.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3981.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/3988.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3988.jpg -------------------------------------------------------------------------------- /pedestrian_detection/accuracy_evaluation/test_images/877.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/877.jpg -------------------------------------------------------------------------------- /pedestrian_detection/config_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/config_farm/__init__.py -------------------------------------------------------------------------------- /pedestrian_detection/data_iterator_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/data_iterator_farm/__init__.py -------------------------------------------------------------------------------- /pedestrian_detection/data_provider_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/data_provider_farm/__init__.py -------------------------------------------------------------------------------- /pedestrian_detection/data_provider_farm/pickle_provider.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This provider accepts an adapter, save dataset in pickle file and load all dataset to memory for data iterators 3 | ''' 4 | 5 | import cv2 6 | import numpy 7 | import pickle 8 | 9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_provider import ProviderBaseclass 10 | from .text_list_adapter import TextListAdapter 11 | 12 | 13 | class PickleProvider(ProviderBaseclass): 14 | """ 15 | This class provides methods to save and read data. 16 | By default, images are compressed using JPG format. 17 | If data_adapter is not None, it means saving data, or it is reading data 18 | """ 19 | 20 | def __init__(self, 21 | pickle_file_path, 22 | encode_quality=90, 23 | data_adapter=None): 24 | ProviderBaseclass.__init__(self) 25 | 26 | if data_adapter: # write data 27 | 28 | self.data_adapter = data_adapter 29 | self.data = {} 30 | self.counter = 0 31 | self.pickle_file_path = pickle_file_path 32 | 33 | else: # read data 34 | 35 | self.data = pickle.load(open(pickle_file_path, 'rb')) 36 | # get positive and negative indeices 37 | self._positive_index = [] 38 | self._negative_index = [] 39 | for k, v in self.data.items(): 40 | if v[1] == 0: # negative 41 | self._negative_index.append(k) 42 | else: # positive 43 | self._positive_index.append(k) 44 | 45 | self.compression_mode = '.jpg' 46 | self.encode_params = [cv2.IMWRITE_JPEG_QUALITY, encode_quality] 47 | 48 | @property 49 | def positive_index(self): 50 | return self._positive_index 51 | 52 | @property 53 | def negative_index(self): 54 | return self._negative_index 55 | 56 | def write(self): 57 | 58 | for data_item in self.data_adapter.get_one(): 59 | 60 | temp_sample = [] 61 | im, bboxes = data_item 62 | ret, buf = cv2.imencode(self.compression_mode, im, self.encode_params) 63 | if buf is None or buf.size == 0: 64 | print('buf is wrong.') 65 | continue 66 | if not ret: 67 | print('An error is occurred.') 68 | continue 69 | temp_sample.append(buf) 70 | 71 | if isinstance(bboxes, str): # 负样本 72 | temp_sample.append(0) 73 | temp_sample.append(int(bboxes)) 74 | else: 75 | temp_sample.append(1) 76 | temp_sample.append(bboxes) 77 | 78 | self.data[self.counter] = temp_sample 79 | print('Successfully save the %d-th data item.' % self.counter) 80 | self.counter += 1 81 | 82 | pickle.dump(self.data, open(self.pickle_file_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) 83 | 84 | def read_by_index(self, index): 85 | im_buf, flag, bboxes = self.data[index] 86 | im = cv2.imdecode(im_buf, cv2.IMREAD_COLOR) 87 | return im, flag, bboxes 88 | 89 | 90 | def write_file(): 91 | data_list_file_path = './data_folder/data_list_caltech_test.txt' 92 | adapter = TextListAdapter(data_list_file_path) 93 | 94 | pickle_file_path = './data_folder/data_list_caltech_test.pkl' 95 | encode_quality = 90 96 | packer = PickleProvider(pickle_file_path, encode_quality, adapter) 97 | packer.write() 98 | 99 | 100 | def read_file(): 101 | pickle_file_path = './data_folder/data_list_caltech_test.pkl' 102 | 103 | provider = PickleProvider(pickle_file_path) 104 | positive_index = provider.positive_index 105 | negative_index = provider.negative_index 106 | print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index))) 107 | # all_index = positive_index+negative_index 108 | import random 109 | random.shuffle(positive_index) 110 | 111 | for i, index in enumerate(positive_index): 112 | im, flag, bboxes_numpy = provider.read_by_index(index) 113 | if isinstance(bboxes_numpy, numpy.ndarray): 114 | for n in range(bboxes_numpy.shape[0]): 115 | cv2.rectangle(im, (bboxes_numpy[n, 0], bboxes_numpy[n, 1]), 116 | (bboxes_numpy[n, 0] + bboxes_numpy[n, 2], bboxes_numpy[n, 1] + bboxes_numpy[n, 3]), (0, 255, 0), 2) 117 | cv2.imshow('im', im) 118 | cv2.waitKey() 119 | 120 | 121 | if __name__ == '__main__': 122 | # write_file() 123 | read_file() 124 | -------------------------------------------------------------------------------- /pedestrian_detection/data_provider_farm/reformat_caltech.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import os 3 | import cv2 4 | import json 5 | import math 6 | 7 | 8 | def generate_data_list(): 9 | annotation_root = '/media/heyonghao/HYH-4T-WD/public_dataset/Caltech/Caltech_new_annotations/anno_test_1xnew' 10 | image_root = '/media/heyonghao/HYH-4T-WD/public_dataset/Caltech/Caltech_data/extracted_data' 11 | 12 | list_file_path = './data_folder/data_list_caltech_test.txt' 13 | if not os.path.exists(os.path.dirname(list_file_path)): 14 | os.makedirs(os.path.dirname(list_file_path)) 15 | fout = open(list_file_path, 'w') 16 | 17 | counter = 0 18 | for parent, dirnames, filenames in os.walk(annotation_root): 19 | for filename in filenames: 20 | if not filename.endswith('.txt'): 21 | continue 22 | 23 | filename_splits = filename[:-4].split('_') 24 | set_name = filename_splits[0] 25 | seq_name = filename_splits[1] 26 | img_name = filename_splits[2] 27 | 28 | img_path = os.path.join(image_root, set_name, seq_name, 'images', img_name) 29 | if not os.path.exists(img_path): 30 | print('The corresponding image does not exist! [%s]' % img_path) 31 | continue 32 | 33 | line = img_path 34 | 35 | fin_anno = open(os.path.join(parent, filename), 'r') 36 | 37 | bbox_list = [] 38 | for i, anno in enumerate(fin_anno): 39 | if i == 0: 40 | continue 41 | anno = anno.strip('\n').split(' ') 42 | if anno[0] != 'person': 43 | continue 44 | x = math.floor(float(anno[1])) 45 | y = math.floor(float(anno[2])) 46 | width = math.ceil(float(anno[3])) 47 | height = math.ceil(float(anno[4])) 48 | 49 | width_vis = math.ceil(float(anno[8])) 50 | height_vis = math.ceil(float(anno[9])) 51 | 52 | if (width_vis*height_vis)/(width*height) < 0.2: 53 | continue 54 | 55 | bbox_list.append((x, y, width, height)) 56 | if len(bbox_list) == 0: 57 | line += ',0,0' 58 | fout.write(line + '\n') 59 | else: 60 | bbox_line = '' 61 | for bbox in bbox_list: 62 | bbox_line += ',' + str(bbox[0]) + ',' + str(bbox[1]) + ',' + str(bbox[2]) + ',' + str(bbox[3]) 63 | line += ',1,' + str(len(bbox_list)) + bbox_line 64 | fout.write(line + '\n') 65 | counter += 1 66 | print(counter) 67 | 68 | fout.close() 69 | 70 | 71 | def show_image(): 72 | list_file_path = './data_folder/data_list_caltech_test.txt' 73 | 74 | fin = open(list_file_path, 'r') 75 | lines = fin.readlines() 76 | fin.close() 77 | 78 | import random 79 | random.shuffle(lines) 80 | for line in lines: 81 | line = line.strip('\n').split(',') 82 | 83 | im = cv2.imread(line[0]) 84 | 85 | bboxes = [] 86 | num_bboxes = int(line[2]) 87 | for i in range(num_bboxes): 88 | xmin = int(line[3 + i * 4]) 89 | ymin = int(line[4 + i * 4]) 90 | width = int(line[5 + i * 4]) 91 | height = int(line[6 + i * 4]) 92 | bboxes.append((xmin, ymin, xmin + width - 1, ymin + height - 1)) 93 | 94 | for bbox in bboxes: 95 | cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 2) 96 | 97 | cv2.imshow('im', im) 98 | cv2.waitKey() 99 | 100 | 101 | def dataset_statistics(): 102 | list_file_path = './data_folder/data_list_caltech_test.txt' 103 | 104 | fin = open(list_file_path, 'r') 105 | lines = fin.readlines() 106 | fin.close() 107 | 108 | bin_size = 10 109 | longer_bin_dict = {} 110 | shorter_bin_dict = {} 111 | counter_pos = 0 112 | counter_neg = 0 113 | for line in lines: 114 | line = line.strip('\n').split(',') 115 | if line[1] == '0': 116 | counter_neg += 1 117 | continue 118 | else: 119 | counter_pos += 1 120 | num_bboxes = int(line[2]) 121 | for i in range(num_bboxes): 122 | width = int(line[5 + i * 4]) 123 | height = int(line[6 + i * 4]) 124 | 125 | longer_side = max(width, height) 126 | shorter_side = min(width, height) 127 | 128 | key = int(longer_side / bin_size) 129 | if key in longer_bin_dict: 130 | longer_bin_dict[key] += 1 131 | else: 132 | longer_bin_dict[key] = 1 133 | 134 | key = int(shorter_side / bin_size) 135 | if key in shorter_bin_dict: 136 | shorter_bin_dict[key] += 1 137 | else: 138 | shorter_bin_dict[key] = 1 139 | 140 | total_pedestrian = 0 141 | print('shorter side based statistics:') 142 | shorter_bin_dict_key_list = sorted(shorter_bin_dict) 143 | for k in shorter_bin_dict_key_list: 144 | v = shorter_bin_dict[k] 145 | total_pedestrian += v 146 | print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v)) 147 | 148 | print('longer side based statistics:') 149 | longer_bin_dict_key_list = sorted(longer_bin_dict) 150 | for k in longer_bin_dict_key_list: 151 | v = longer_bin_dict[k] 152 | print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v)) 153 | 154 | print('num pos: %d, num neg: %d' % (counter_pos, counter_neg)) 155 | print('total pedestrian: %d' % total_pedestrian) 156 | 157 | 158 | if __name__ == '__main__': 159 | # generate_data_list() 160 | show_image() 161 | # dataset_statistics() 162 | -------------------------------------------------------------------------------- /pedestrian_detection/data_provider_farm/text_list_adapter.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This adapter accepts a text as input which describes the annotated data. 3 | Each line in text are formatted as: 4 | [image absolute path],[pos/neg flag],[num of bboxes],[x1],[y1],[width1],[height1],[x2],[y2],[width2],[height2]...... 5 | ''' 6 | 7 | import cv2 8 | import numpy 9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_data_adapter import DataAdapterBaseclass 10 | 11 | 12 | class TextListAdapter(DataAdapterBaseclass): 13 | 14 | def __init__(self, data_list_file_path): 15 | 16 | DataAdapterBaseclass.__init__(self) 17 | fin = open(data_list_file_path, 'r') 18 | self.lines = fin.readlines() 19 | fin.close() 20 | self.line_counter = 0 21 | 22 | def __del__(self): 23 | pass 24 | 25 | def get_one(self): 26 | """ 27 | This function use 'yield' to return samples 28 | """ 29 | while self.line_counter < len(self.lines): 30 | 31 | line = self.lines[self.line_counter].strip('\n').split(',') 32 | if line[1] == '1': # 33 | assert len(line[3:]) == 4 * int(line[2]) 34 | 35 | im = cv2.imread(line[0], cv2.IMREAD_UNCHANGED) 36 | 37 | if line[1] == '0': 38 | yield im, '0' 39 | self.line_counter += 1 40 | continue 41 | 42 | num_bboxes = int(line[2]) 43 | bboxes = [] 44 | for i in range(num_bboxes): 45 | x = float(line[3 + i * 4]) 46 | y = float(line[3 + i * 4 + 1]) 47 | width = float(line[3 + i * 4 + 2]) 48 | height = float(line[3 + i * 4 + 3]) 49 | 50 | bboxes.append([x, y, width, height]) 51 | 52 | bboxes = numpy.array(bboxes, dtype=numpy.float32) 53 | yield im, bboxes 54 | 55 | self.line_counter += 1 56 | 57 | 58 | if __name__ == '__main__': 59 | pass 60 | -------------------------------------------------------------------------------- /pedestrian_detection/inference_speed_evaluation/inference_speed_eval.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import sys 3 | 4 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python') 5 | import mxnet 6 | 7 | eval_with_mxnet_flag = False 8 | symbol_file_path = '/home/heyonghao/projects/ChasingPedestrainDetection/symbol_farm/symbol_30_320_20L_4scales_v1_deploy.json' 9 | input_shape = (1, 3, 2160, 3840) # (1,3,240,320) (1,3,480,640) (1,3,720,1280) (1,3,1080,1920) (1,3,2160,3840) 10 | real_run_loops = 200 11 | 12 | if eval_with_mxnet_flag: 13 | from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_mxnet_cudnn import InferenceSpeedEval as InferenceSpeedEvalMXNet 14 | 15 | inferenceSpeedEvalMXNet = InferenceSpeedEvalMXNet(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, device_type='gpu', gpu_index=0) 16 | inferenceSpeedEvalMXNet.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops) 17 | 18 | else: 19 | from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_tensorrt_cudnn import InferenceSpeedEval as InferenceSpeedEvalTRT 20 | 21 | inferenceSpeedEvalTRT = InferenceSpeedEvalTRT(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape) 22 | inferenceSpeedEvalTRT.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops) 23 | -------------------------------------------------------------------------------- /pedestrian_detection/metric_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/metric_farm/__init__.py -------------------------------------------------------------------------------- /pedestrian_detection/metric_farm/metric_default.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy 4 | import mxnet 5 | 6 | 7 | class Metric: 8 | # 需要输入多少个loss,即scale个数 9 | def __init__(self, num_scales): 10 | self.sum_metric = [0.0 for i in range(num_scales * 2)] 11 | self.num_update = 0 12 | self.num_scales = num_scales 13 | self.num_nonzero = [1.0 for i in range(num_scales * 2)] 14 | self.scale_factor = 10000 15 | 16 | # it is expected that the shape is num*c*h*w 17 | def update(self, labels, preds): # 这里需要注意label里面item的顺序。要参考prefetching_dataiter 18 | 19 | for i in range(self.num_scales): 20 | mask = labels[i * 2] # 先mask 21 | label = labels[i * 2 + 1] # 后label 22 | 23 | score_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=0, end=2).asnumpy() 24 | bbox_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=2, end=6).asnumpy() 25 | 26 | label_bbox = mxnet.ndarray.slice_axis(label, axis=1, begin=2, end=6).asnumpy() 27 | 28 | pred_score = preds[i * 2].asnumpy() 29 | pred_bbox = preds[i * 2 + 1].asnumpy() 30 | 31 | loss_score = numpy.sum(pred_score * score_mask) 32 | loss_bbox = numpy.sum((label_bbox - pred_bbox) ** 2.0) 33 | 34 | self.num_nonzero[i * 2] += numpy.sum(score_mask[:, 0, :, :] > 0.5) 35 | self.num_nonzero[i * 2 + 1] += numpy.sum(bbox_mask > 0.5) 36 | self.sum_metric[i * 2] += loss_score 37 | self.sum_metric[i * 2 + 1] += loss_bbox 38 | 39 | self.num_update += 1 40 | 41 | def get(self): 42 | return_string_list = [] 43 | for i in range(self.num_scales): 44 | return_string_list.append('CE_loss_score_' + str(i)) 45 | return_string_list.append('SE_loss_bbox_' + str(i)) 46 | 47 | return return_string_list, [m / self.num_nonzero[i] * self.scale_factor for i, m in enumerate(self.sum_metric)] 48 | 49 | def reset(self): 50 | self.sum_metric = [0.0 for i in range(self.num_scales * 2)] 51 | self.num_update = 0 52 | self.num_nonzero = [1.0 for i in range(self.num_scales * 2)] 53 | -------------------------------------------------------------------------------- /pedestrian_detection/symbol_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/symbol_farm/__init__.py -------------------------------------------------------------------------------- /pedestrian_detection/symbol_farm/symbol_structures.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/symbol_farm/symbol_structures.xlsx -------------------------------------------------------------------------------- /vehicle_detection/README.md: -------------------------------------------------------------------------------- 1 | Coming soon... --------------------------------------------------------------------------------