├── .gitignore
├── ChasingTrainFramework_GeneralOneClassDetection
    ├── README.md
    ├── __init__.py
    ├── data_iterator_base
    │   ├── __init__.py
    │   └── data_batch.py
    ├── data_provider_base
    │   ├── __init__.py
    │   ├── base_data_adapter.py
    │   ├── base_provider.py
    │   ├── pickle_provider.py
    │   └── text_list_adapter.py
    ├── image_augmentation
    │   ├── __init__.py
    │   └── augmentor.py
    ├── inference_speed_eval
    │   ├── __init__.py
    │   ├── inference_speed_eval_with_mxnet_cudnn.py
    │   └── inference_speed_eval_with_tensorrt_cudnn.py
    ├── logging_GOCD.py
    ├── loss_layer_farm
    │   ├── __init__.py
    │   ├── cross_entropy_with_focal_loss_for_one_class_detection.py
    │   ├── cross_entropy_with_hnm_for_one_class_detection.py
    │   ├── loss.py
    │   ├── mean_squared_error_with_hnm_for_one_class_detection.py
    │   └── mean_squared_error_with_ohem_for_one_class_detection.py
    ├── solver_GOCD.py
    └── train_GOCD.py
├── LICENSE
├── README.md
├── face_detection
    ├── README.md
    ├── accuracy_evaluation
    │   ├── evaluation_on_fddb.py
    │   ├── evaluation_on_widerface.py
    │   └── predict.py
    ├── config_farm
    │   ├── __init__.py
    │   ├── configuration_10_320_20L_5scales_v2.py
    │   └── configuration_10_560_25L_8scales_v1.py
    ├── data_iterator_farm
    │   ├── __init__.py
    │   ├── multithread_dataiter_for_cross_entropy_v1.py
    │   └── multithread_dataiter_for_cross_entropy_v2.py
    ├── data_provider_farm
    │   ├── __init__.py
    │   ├── data_folder
    │   │   └── .gitkeep
    │   ├── pickle_provider.py
    │   └── text_list_adapter.py
    ├── demo
    │   └── demo.py
    ├── deploy_tensorrt
    │   ├── README.md
    │   ├── debug_image
    │   │   ├── test1.jpg
    │   │   ├── test2.jpg
    │   │   ├── test3.jpg
    │   │   ├── test5.jpg
    │   │   └── test6.jpg
    │   ├── predict_tensorrt.py
    │   └── to_onnx.py
    ├── inference_speed_evaluation
    │   ├── README.md
    │   └── inference_speed_eval.py
    ├── metric_farm
    │   ├── __init__.py
    │   └── metric_default.py
    ├── net_farm
    │   ├── __init__.py
    │   ├── naivenet.py
    │   ├── naivenet20_resv2.gv
    │   ├── naivenet20_resv2.gv.svg
    │   └── naivenet_structures.xlsx
    ├── qualitative_results
    │   ├── v1_qualitative_1.jpg
    │   ├── v1_qualitative_2.jpg
    │   ├── v1_qualitative_3.jpg
    │   ├── v1_qualitative_4.jpg
    │   └── v1_qualitative_5.jpg
    └── saved_model
    │   ├── configuration_10_320_20L_5scales_v2
    │       └── .gitkeep
    │   └── configuration_10_560_25L_8scales_v1
    │       └── .gitkeep
├── head_detection
    ├── README.md
    ├── accuracy_evaluation
    │   ├── evaluation_on_brainwash.py
    │   ├── predict.py
    │   └── test_images
    │   │   ├── 2.jpg
    │   │   ├── 247.jpg
    │   │   ├── 322.jpg
    │   │   ├── 342.jpg
    │   │   ├── 377.jpg
    │   │   ├── 411.jpg
    │   │   ├── 5.jpg
    │   │   ├── 7.jpg
    │   │   └── 72.jpg
    ├── config_farm
    │   ├── __init__.py
    │   └── configuration_10_160_17L_4scales_v1.py
    ├── data_iterator_farm
    │   ├── __init__.py
    │   └── multithread_dataiter_for_cross_entropy_v1.py
    ├── data_provider_farm
    │   ├── pickle_provider.py
    │   ├── reformat_brainwash.py
    │   └── text_list_adapter.py
    ├── inference_speed_evaluation
    │   └── inference_speed_eval.py
    ├── metric_farm
    │   ├── __init__.py
    │   └── metric_default.py
    └── symbol_farm
    │   ├── __init__.py
    │   ├── symbol_10_160_17L_4scales_v1.py
    │   ├── symbol_10_160_17L_4scales_v1_deploy.json
    │   └── symbol_structures.xlsx
├── license_plate_detection
    ├── README.md
    ├── accuracy_evaluation
    │   ├── evaluation_on_CCPD.py
    │   ├── predict.py
    │   └── test_images
    │   │   ├── test1.jpg_result.jpg
    │   │   ├── test2.jpg_result.jpg
    │   │   ├── test3.jpg_result.jpg
    │   │   ├── test4.jpg_result.jpg
    │   │   ├── test5.jpg_result.jpg
    │   │   ├── test6.jpg_result.jpg
    │   │   └── test7.jpg_result.jpg
    ├── config_farm
    │   ├── __init__.py
    │   └── configuration_64_512_16L_3scales_v1.py
    ├── data_iterator_farm
    │   ├── __init__.py
    │   └── multithread_dataiter_for_cross_entropy_v1.py
    ├── data_provider_farm
    │   ├── __init__.py
    │   ├── pickle_provider.py
    │   ├── reformat_CCPD.py
    │   └── text_list_adapter.py
    ├── inference_speed_evaluation
    │   └── inference_speed_eval.py
    ├── metric_farm
    │   ├── __init__.py
    │   └── metric_default.py
    └── symbol_farm
    │   ├── __init__.py
    │   ├── symbol_64_512_16L_3scales_v1.py
    │   ├── symbol_64_512_16L_3scales_v1_deploy.json
    │   └── symbol_structures.xlsx
├── pedestrian_detection
    ├── README.md
    ├── accuracy_evaluation
    │   ├── predict.py
    │   └── test_images
    │   │   ├── 1064.jpg
    │   │   ├── 1081.jpg
    │   │   ├── 1104.jpg
    │   │   ├── 1199.jpg
    │   │   ├── 1212.jpg
    │   │   ├── 1461.jpg
    │   │   ├── 2210.jpg
    │   │   ├── 2221.jpg
    │   │   ├── 2396.jpg
    │   │   ├── 2407.jpg
    │   │   ├── 2756.jpg
    │   │   ├── 3043.jpg
    │   │   ├── 326.jpg
    │   │   ├── 3368.jpg
    │   │   ├── 3812.jpg
    │   │   ├── 3914.jpg
    │   │   ├── 3981.jpg
    │   │   ├── 3988.jpg
    │   │   └── 877.jpg
    ├── config_farm
    │   ├── __init__.py
    │   └── configuration_30_320_20L_4scales_v1.py
    ├── data_iterator_farm
    │   ├── __init__.py
    │   └── multithread_dataiter_for_cross_entropy_v1.py
    ├── data_provider_farm
    │   ├── __init__.py
    │   ├── pickle_provider.py
    │   ├── reformat_caltech.py
    │   └── text_list_adapter.py
    ├── inference_speed_evaluation
    │   └── inference_speed_eval.py
    ├── metric_farm
    │   ├── __init__.py
    │   └── metric_default.py
    └── symbol_farm
    │   ├── __init__.py
    │   ├── symbol_30_320_20L_4scales_v1.py
    │   ├── symbol_30_320_20L_4scales_v1_deploy.json
    │   └── symbol_structures.xlsx
└── vehicle_detection
    └── README.md


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # IPython
 76 | profile_default/
 77 | ipython_config.py
 78 | 
 79 | # pyenv
 80 | .python-version
 81 | 
 82 | # celery beat schedule file
 83 | celerybeat-schedule
 84 | 
 85 | # SageMath parsed files
 86 | *.sage.py
 87 | 
 88 | # Environments
 89 | .env
 90 | .venv
 91 | env/
 92 | venv/
 93 | ENV/
 94 | env.bak/
 95 | venv.bak/
 96 | 
 97 | # Spyder project settings
 98 | .spyderproject
 99 | .spyproject
100 | 
101 | # Rope project settings
102 | .ropeproject
103 | 
104 | # mkdocs documentation
105 | /site
106 | 
107 | # mypy
108 | .mypy_cache/
109 | .dmypy.json
110 | dmypy.json
111 | <<<<<<< HEAD
112 | 
113 | .idea/
114 | 
115 | # pytorch model
116 | *.pth
117 | 
118 | # pkl format dataset
119 | *.pkl
120 | 
121 | # mxnet model
122 | *.params
123 | 


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/README.md:
--------------------------------------------------------------------------------
 1 | ## ChasingTrainFramework_GeneralSingleClassDetection
 2 | ChasingTrainFramework_GeneralSingleClassDetection is a simple 
 3 | wrapper based on MXNet Module API for general one class detection.
 4 | `Chasing` is just a project code.
 5 | 
 6 | ### Framework Introduction
 7 | * **data_iterator_base** provide some utils for batch iterator. The design of a data 
 8 | iterator relies on the specific task. So we do not provide a default iterator here.
 9 | 
10 | * **data_provider_base** reformat, pack raw data. In most cases, we can load all data into
11 | the memory for fast access.
12 | 
13 | * **image_augmentation** provide some often used augmentations.
14 | 
15 | * **inference_speed_eval** provide two ways for inference speed evaluation -- MXNet with CUDNN and TensorRT with CUDNN.
16 | 
17 | * **loss_layer_farm** provide customized loss type like hard negative mining, focal loss.
18 | 
19 | * **logging_GOCD** is a logging wrapper.
20 | 
21 | * **solver_GOCD** execute training process.
22 | 
23 | * **train_GOCD** is the entrance of the framework.


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/__init__.py


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/data_iterator_base/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/data_iterator_base/__init__.py


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/data_iterator_base/data_batch.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | 
 4 | class DataBatch:
 5 |     def __init__(self, torch_module):
 6 |         self._data = []
 7 |         self._label = []
 8 |         self.torch_module = torch_module
 9 | 
10 |     def append_data(self, new_data):
11 |         self._data.append(self.__as_tensor(new_data))
12 | 
13 |     def append_label(self, new_label):
14 |         self._label.append(self.__as_tensor(new_label))
15 | 
16 |     def __as_tensor(self, in_data):
17 |         return self.torch_module.from_numpy(in_data)
18 | 
19 |     @property
20 |     def data(self):
21 |         return self._data
22 | 
23 |     @property
24 |     def label(self):
25 |         return self._label
26 | 


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/__init__.py


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/base_data_adapter.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module is to read, modify and return a single sample.
 3 | It only works in data packing phase.
 4 | """
 5 | 
 6 | 
 7 | class DataAdapterBaseclass(object):
 8 | 
 9 |     def __init__(self):
10 |         pass
11 | 
12 |     def __del__(self):
13 |         pass
14 | 
15 |     def get_one(self):
16 |         """
17 |         return only one sample each time
18 |         :return:
19 |         """
20 |         raise NotImplementedError()
21 | 


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/base_provider.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module takes an adapter as data supplier, pack data and provide data for data iterators
 3 | 
 4 | """
 5 | 
 6 | 
 7 | class ProviderBaseclass(object):
 8 |     """
 9 |     This is the baseclass of packer. Any other detailed packer must inherit this class.
10 |     """
11 | 
12 |     def __init__(self):
13 |         pass
14 | 
15 |     def __str__(self):
16 |         return self.__class__.__name__
17 | 
18 |     def __del__(self):
19 |         pass
20 | 
21 |     def write(self):
22 |         """
23 |         Write a single sample to the files
24 |         :return:
25 |         """
26 |         raise NotImplementedError()
27 | 
28 |     def read_by_index(self, index):
29 |         """
30 |         Read a single sample
31 |         :return:
32 |         """
33 |         raise NotImplementedError()
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     provider = ProviderBaseclass()
38 |     print(provider)
39 | 


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/pickle_provider.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This provider accepts an adapter, save dataset in pickle file and load all dataset to memory for data iterators
  3 | '''
  4 | 
  5 | import cv2
  6 | import numpy
  7 | import pickle
  8 | 
  9 | from .base_provider import ProviderBaseclass
 10 | from .text_list_adapter import TextListAdapter
 11 | 
 12 | 
 13 | class PickleProvider(ProviderBaseclass):
 14 |     """
 15 |     This class provides methods to save and read data.
 16 |     By default, images are compressed using JPG format.
 17 |     If data_adapter is not None, it means saving data, or it is reading data
 18 |     """
 19 | 
 20 |     def __init__(self,
 21 |                  pickle_file_path,
 22 |                  encode_quality=90,
 23 |                  data_adapter=None):
 24 |         ProviderBaseclass.__init__(self)
 25 | 
 26 |         if data_adapter:  # write data
 27 | 
 28 |             self.data_adapter = data_adapter
 29 |             self.data = {}
 30 |             self.counter = 0
 31 |             self.pickle_file_path = pickle_file_path
 32 | 
 33 |         else:  # read data
 34 | 
 35 |             self.data = pickle.load(open(pickle_file_path, 'rb'))
 36 |             # get positive and negative indeices
 37 |             self._positive_index = []
 38 |             self._negative_index = []
 39 |             for k, v in self.data.items():
 40 |                 if v[1] == 0:  # negative
 41 |                     self._negative_index.append(k)
 42 |                 else:  # positive
 43 |                     self._positive_index.append(k)
 44 | 
 45 |         self.compression_mode = '.jpg'
 46 |         self.encode_params = [cv2.IMWRITE_JPEG_QUALITY, encode_quality]
 47 | 
 48 |     @property
 49 |     def positive_index(self):
 50 |         return self._positive_index
 51 | 
 52 |     @property
 53 |     def negative_index(self):
 54 |         return self._negative_index
 55 | 
 56 |     def write(self):
 57 | 
 58 |         for data_item in self.data_adapter.get_one():
 59 | 
 60 |             temp_sample = []
 61 |             im, bboxes = data_item
 62 |             ret, buf = cv2.imencode(self.compression_mode, im, self.encode_params)
 63 |             if buf is None or buf.size == 0:
 64 |                 print('buf is wrong.')
 65 |                 continue
 66 |             if not ret:
 67 |                 print('An error is occurred while com:pression.')
 68 |                 continue
 69 |             temp_sample.append(buf)
 70 | 
 71 |             if isinstance(bboxes, str):  # 负样本
 72 |                 temp_sample.append(0)
 73 |                 temp_sample.append(int(bboxes))
 74 |             else:
 75 |                 temp_sample.append(1)
 76 |                 temp_sample.append(bboxes)
 77 | 
 78 |             self.data[self.counter] = temp_sample
 79 |             print('Successfully save the %d-th data item.' % self.counter)
 80 |             self.counter += 1
 81 | 
 82 |         pickle.dump(self.data, open(self.pickle_file_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
 83 | 
 84 |     def read_by_index(self, index):
 85 |         im_buf, flag, bboxes = self.data[index]
 86 |         im = cv2.imdecode(im_buf, cv2.IMREAD_COLOR)
 87 |         return im, flag, bboxes
 88 | 
 89 | 
 90 | def write_file():
 91 |     data_list_file_path = './data_folder/data_list_2019-05-07-14-47-19.txt'
 92 |     LFPD_adapter = TextListAdapter(data_list_file_path)
 93 | 
 94 |     pickle_file_path = './data_folder/data_2019-05-07-14-47-19.pkl'
 95 |     encode_quality = 90
 96 |     LFPD_packer = PickleProvider(pickle_file_path, encode_quality, LFPD_adapter)
 97 |     LFPD_packer.write()
 98 | 
 99 | 
100 | def read_file():
101 |     pickle_file_path = './data_folder/data_2019-05-07-14-47-19.pkl'
102 | 
103 |     LFPD_provider = PickleProvider(pickle_file_path)
104 |     positive_index = LFPD_provider.positive_index
105 |     negative_index = LFPD_provider.negative_index
106 |     print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index)))
107 |     # all_index = positive_index+negative_index
108 |     import random
109 |     random.shuffle(positive_index)
110 | 
111 |     for i, index in enumerate(positive_index):
112 |         im, flag, bboxes_numpy = LFPD_provider.read_by_index(index)
113 |         if isinstance(bboxes_numpy, numpy.ndarray):
114 |             for n in range(bboxes_numpy.shape[0]):
115 |                 cv2.rectangle(im, (bboxes_numpy[n, 0], bboxes_numpy[n, 1]),
116 |                               (bboxes_numpy[n, 0] + bboxes_numpy[n, 2], bboxes_numpy[n, 1] + bboxes_numpy[n, 3]), (0, 255, 0), 1)
117 |         cv2.imshow('im', im)
118 |         cv2.waitKey()
119 | 
120 | 
121 | if __name__ == '__main__':
122 |     pass
123 |     write_file()
124 |     # read_file()
125 |     # generate_rec_20181202()
126 | 


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/data_provider_base/text_list_adapter.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This adapter accepts a text as input which describes the annotated data.
 3 | Each line in text are formatted as:
 4 | [image absolute path],[pos/neg flag],[num of bboxes],[x1],[y1],[width1],[height1],[x2],[y2],[width2],[height2]......
 5 | '''
 6 | 
 7 | import cv2
 8 | import numpy
 9 | from .base_data_adapter import DataAdapterBaseclass
10 | 
11 | 
12 | class TextListAdapter(DataAdapterBaseclass):
13 | 
14 |     def __init__(self, data_list_file_path):
15 | 
16 |         DataAdapterBaseclass.__init__(self)
17 |         fin = open(data_list_file_path, 'r')
18 |         self.lines = fin.readlines()
19 |         fin.close()
20 |         self.line_counter = 0
21 | 
22 |     def __del__(self):
23 |         pass
24 | 
25 |     def get_one(self):
26 |         """
27 |         This function use 'yield' to return samples
28 |         """
29 |         while self.line_counter < len(self.lines):
30 | 
31 |             line = self.lines[self.line_counter].strip('\n').split(',')
32 |             if line[1] == '1':  # 如果是正样本，需要校验bbox的个数是否一样
33 |                 assert len(line[3:]) == 4 * int(line[2])
34 | 
35 |             im = cv2.imread(line[0], cv2.IMREAD_UNCHANGED)
36 | 
37 |             if line[1] == '0':
38 |                 yield im, '0'
39 |                 self.line_counter += 1
40 |                 continue
41 | 
42 |             num_bboxes = int(line[2])
43 |             bboxes = []
44 |             for i in range(num_bboxes):
45 |                 x = float(line[3 + i * 4])
46 |                 y = float(line[3 + i * 4 + 1])
47 |                 width = float(line[3 + i * 4 + 2])
48 |                 height = float(line[3 + i * 4 + 3])
49 | 
50 |                 bboxes.append([x, y, width, height])
51 | 
52 |             bboxes = numpy.array(bboxes, dtype=numpy.float32)
53 |             yield im, bboxes
54 | 
55 |             self.line_counter += 1
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     pass
60 | 


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/image_augmentation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/image_augmentation/__init__.py


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/image_augmentation/augmentor.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | This module provides many types of image augmentation. One can choose appropriate augmentation for
  4 | detection, segmentation and classification.
  5 | """
  6 | import cv2
  7 | import numpy
  8 | import random
  9 | 
 10 | 
 11 | class Augmentor(object):
 12 |     """
 13 |     All augmentation operations are static methods of this class.
 14 |     """
 15 | 
 16 |     def __init__(self):
 17 |         pass
 18 | 
 19 |     @staticmethod
 20 |     def histogram_equalisation(image):
 21 |         """
 22 |         do histogram equlisation for grayscale image
 23 |         :param image: input image with single channel 8bits
 24 |         :return: processed image
 25 |         """
 26 |         if image.ndim != 2:
 27 |             print('Input image is not grayscale!')
 28 |             return None
 29 |         if image.dtype != numpy.uint8:
 30 |             print('Input image is not uint8!')
 31 |             return None
 32 | 
 33 |         result = cv2.equalizeHist(image)
 34 |         return result
 35 | 
 36 |     @staticmethod
 37 |     def grayscale(image):
 38 |         """
 39 |         convert BGR image to grayscale image
 40 |         :param image: input image with BGR channels
 41 |         :return:
 42 |         """
 43 |         if image.ndim != 3:
 44 |             return None
 45 |         if image.dtype != numpy.uint8:
 46 |             print('Input image is not uint8!')
 47 |             return None
 48 | 
 49 |         result = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 50 |         return result
 51 | 
 52 |     @staticmethod
 53 |     def inversion(image):
 54 |         """
 55 |         invert the image (255-)
 56 |         :param image: input image with BGR or grayscale
 57 |         :return:
 58 |         """
 59 |         if image.dtype != numpy.uint8:
 60 |             print('Input image is not uint8!')
 61 |             return None
 62 | 
 63 |         result = 255 - image
 64 |         return result
 65 | 
 66 |     @staticmethod
 67 |     def binarization(image, block_size=5, C=10):
 68 |         """
 69 |         convert input image to binary image
 70 |         cv2.adaptiveThreshold is used, for detailed information, refer to opencv docs
 71 |         :param image:
 72 |         :return:
 73 |         """
 74 |         if image.ndim == 3:
 75 |             image_grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 76 |         else:
 77 |             image_grayscale = image
 78 | 
 79 |         binary_image = cv2.adaptiveThreshold(image_grayscale, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
 80 |                                              cv2.THRESH_BINARY, block_size, C)
 81 |         return binary_image
 82 | 
 83 |     @staticmethod
 84 |     def brightness(image, min_factor=0.5, max_factor=1.5):
 85 |         '''
 86 |         adjust the image brightness
 87 |         :param image:
 88 |         :param min_factor:
 89 |         :param max_factor:
 90 |         :return:
 91 |         '''
 92 |         if image.dtype != numpy.uint8:
 93 |             print('Input image is not uint8!')
 94 |             return None
 95 | 
 96 |         factor = numpy.random.uniform(min_factor, max_factor)
 97 |         result = image * factor
 98 |         if factor > 1:
 99 |             result[result > 255] = 255
100 |         result = result.astype(numpy.uint8)
101 |         return result
102 | 
103 |     @staticmethod
104 |     def saturation(image, min_factor=0.5, max_factor=1.5):
105 |         '''
106 |         adjust the image saturation
107 |         :param image:
108 |         :param min_factor:
109 |         :param max_factor:
110 |         :return:
111 |         '''
112 |         if image.dtype != numpy.uint8:
113 |             print('Input image is not uint8!')
114 |             return None
115 | 
116 |         image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
117 |         factor = numpy.random.uniform(min_factor, max_factor)
118 | 
119 |         result = numpy.zeros(image.shape, dtype=numpy.float32)
120 |         result[:, :, 0] = image[:, :, 0] * factor + image_gray * (1 - factor)
121 |         result[:, :, 1] = image[:, :, 1] * factor + image_gray * (1 - factor)
122 |         result[:, :, 2] = image[:, :, 2] * factor + image_gray * (1 - factor)
123 |         result[result > 255] = 255
124 |         result[result < 0] = 0
125 |         result = result.astype(numpy.uint8)
126 |         return result
127 | 
128 |     @staticmethod
129 |     def contrast(image, min_factor=0.5, max_factor=1.5):
130 |         '''
131 |         adjust the image contrast
132 |         :param image:
133 |         :param min_factor:
134 |         :param max_factor:
135 |         :return:
136 |         '''
137 |         if image.dtype != numpy.uint8:
138 |             print('Input image is not uint8!')
139 |             return None
140 | 
141 |         image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
142 |         gray_mean = numpy.mean(image_gray)
143 |         temp = numpy.ones((image.shape[0], image.shape[1]), dtype=numpy.float32) * gray_mean
144 |         factor = numpy.random.uniform(min_factor, max_factor)
145 | 
146 |         result = numpy.zeros(image.shape, dtype=numpy.float32)
147 |         result[:, :, 0] = image[:, :, 0] * factor + temp * (1 - factor)
148 |         result[:, :, 1] = image[:, :, 1] * factor + temp * (1 - factor)
149 |         result[:, :, 2] = image[:, :, 2] * factor + temp * (1 - factor)
150 | 
151 |         result[result > 255] = 255
152 |         result[result < 0] = 0
153 |         result = result.astype(numpy.uint8)
154 | 
155 |         return result
156 | 
157 |     @staticmethod
158 |     def blur(image, mode='random', kernel_size=3, sigma=1):
159 |         """
160 | 
161 |         :param image:
162 |         :param mode: options 'normalized' 'gaussian' 'median'
163 |         :param kernel_size:
164 |         :param sigma: used for gaussian blur
165 |         :return:
166 |         """
167 |         if image.dtype != numpy.uint8:
168 |             print('Input image is not uint8!')
169 |             return None
170 | 
171 |         if mode == 'random':
172 |             mode = random.choice(['normalized', 'gaussian', 'median'])
173 | 
174 |         if mode == 'normalized':
175 |             result = cv2.blur(image, (kernel_size, kernel_size))
176 |         elif mode == 'gaussian':
177 |             result = cv2.GaussianBlur(image, (kernel_size, kernel_size), sigmaX=sigma, sigmaY=sigma)
178 |         elif mode == 'median':
179 |             result = cv2.medianBlur(image, kernel_size)
180 |         else:
181 |             print('Blur mode is not supported: %s.' % mode)
182 |             result = image
183 |         return result
184 | 
185 |     @staticmethod
186 |     def rotation(image, degree=10, mode='crop', scale=1):
187 |         """
188 | 
189 |         :param image:
190 |         :param degree:
191 |         :param mode: 'crop'-keep original size, 'fill'-keep full image
192 |         :param scale:
193 |         :return:
194 |         """
195 |         if image.dtype != numpy.uint8:
196 |             print('Input image is not uint8!')
197 |             return None
198 | 
199 |         h, w = image.shape[:2]
200 |         center_x, center_y = w / 2, h / 2
201 |         M = cv2.getRotationMatrix2D((center_x, center_y), degree, scale)
202 | 
203 |         if mode == 'crop':
204 |             new_w, new_h = w, h
205 |         else:
206 |             cos = numpy.abs(M[0, 0])
207 |             sin = numpy.abs(M[0, 1])
208 |             new_w = int(h * sin + w * cos)
209 |             new_h = int(h * cos + w * sin)
210 |             M[0, 2] += (new_w / 2) - center_x
211 |             M[1, 2] += (new_h / 2) - center_y
212 | 
213 |         result = cv2.warpAffine(image, M, (new_w, new_h))
214 |         return result
215 | 
216 |     @staticmethod
217 |     def flip(image, orientation='h'):
218 |         '''
219 | 
220 |         :param image:
221 |         :param orientation:
222 |         :return:
223 |         '''
224 |         if image.dtype != numpy.uint8:
225 |             print('Input image is not uint8!')
226 |             return None
227 | 
228 |         if orientation == 'h':
229 |             return cv2.flip(image, 1)
230 |         elif orientation == 'v':
231 |             return cv2.flip(image, 0)
232 |         else:
233 |             print('Unsupported orientation: %s.' % orientation)
234 |             return image
235 | 
236 |     @staticmethod
237 |     def resize(image, size_in_pixel=None, size_in_scale=None):
238 |         """
239 | 
240 |         :param image:
241 |         :param size_in_pixel: tuple (width, height)
242 |         :param size_in_scale: tuple (width_scale, height_scale)
243 |         :return:
244 |         """
245 |         if image.dtype != numpy.uint8:
246 |             print('Input image is not uint8!')
247 |             return None
248 | 
249 |         if size_in_pixel is not None:
250 |             return cv2.resize(image, size_in_pixel)
251 |         elif size_in_scale is not None:
252 |             return cv2.resize(image, (0, 0), fx=size_in_scale[0], fy=size_in_scale[1])
253 |         else:
254 |             print('size_in_pixel and size_in_scale are both None.')
255 |             return image
256 | 
257 |     @staticmethod
258 |     def crop(image, x, y, width, height):
259 |         """
260 | 
261 |         :param image:
262 |         :param x: crop area top-left x coordinate
263 |         :param y: crop area top-left y coordinate
264 |         :param width: crop area width
265 |         :param height: crop area height
266 |         :return:
267 |         """
268 |         if image.dtype != numpy.uint8:
269 |             print('Input image is not uint8!')
270 |             return None
271 | 
272 |         if image.ndim == 3:
273 |             return image[y:y + height, x:x + width, :]
274 |         else:
275 |             return image[y:y + height, x:x + width]
276 | 
277 |     @staticmethod
278 |     def random_crop(image, width, height):
279 |         """
280 | 
281 |         :param image:
282 |         :param width: crop area width
283 |         :param height: crop area height
284 |         :return:
285 |         """
286 |         if image.dtype != numpy.uint8:
287 |             print('Input image is not uint8!')
288 |             return False, image
289 | 
290 |         w_interval = image.shape[1] - width
291 |         h_interval = image.shape[0] - height
292 | 
293 |         if image.ndim == 3:
294 |             result = numpy.zeros((height, width, 3), dtype=numpy.uint8)
295 |         else:
296 |             result = numpy.zeros((height, width), dtype=numpy.uint8)
297 | 
298 |         if w_interval >= 0 and h_interval >= 0:
299 |             crop_x, crop_y = random.randint(0, w_interval), random.randint(0, h_interval)
300 |             if image.ndim == 3:
301 |                 result = image[crop_y:crop_y + height, crop_x:crop_x + width, :]
302 |             else:
303 |                 result = image[crop_y:crop_y + height, crop_x:crop_x + width]
304 |         elif w_interval < 0 and h_interval >= 0:
305 |             put_x = -w_interval / 2
306 |             crop_y = random.randint(0, h_interval)
307 |             if image.ndim == 3:
308 |                 result[:, put_x:put_x + image.shape[1], :] = image[crop_y:crop_y + height, :, :]
309 |             else:
310 |                 result[:, put_x:put_x + image.shape[1]] = image[crop_y:crop_y + height, :]
311 |         elif w_interval >= 0 and h_interval < 0:
312 |             crop_x = random.randint(0, w_interval)
313 |             put_y = -h_interval / 2
314 |             if image.ndim == 3:
315 |                 result[put_y:put_y + image.shape[0], :, :] = image[:, crop_x:crop_x + width, :]
316 |             else:
317 |                 result[put_y:put_y + image.shape[0], :] = image[:, crop_x:crop_x + width]
318 |         else:
319 |             put_x, put_y = -w_interval / 2, -h_interval / 2
320 |             if image.ndim == 3:
321 |                 result[put_y:put_y + image.shape[0], put_x:put_x + image.shape[1], :] = image[:, :, :]
322 |             else:
323 |                 result[put_y:put_y + image.shape[0], put_x:put_x + image.shape[1]] = image[:, :]
324 | 
325 |         return result
326 | 
327 | 


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/inference_speed_eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/inference_speed_eval/__init__.py


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/inference_speed_eval/inference_speed_eval_with_mxnet_cudnn.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import sys
 3 | import os
 4 | import time
 5 | import logging
 6 | 
 7 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '1'
 8 | logging.getLogger().setLevel(logging.INFO)
 9 | 
10 | 
11 | class InferenceSpeedEval(object):
12 |     def __init__(self, symbol_file_path, mxnet_module, input_shape, input_name='data', device_type='gpu', gpu_index=0):
13 |         '''
14 | 
15 |         :param symbol_file_path: symbol file path
16 |         :param mxnet_module: mxnet module
17 |         :param input_shape: input shape in tuple--(batch_size, num_channel, height, width)
18 |         :param input_name: input name defined in symbol, by default 'data'
19 |         :param device_type: device type: 'gpu', 'cpu'
20 |         :param gpu_index: gpu index
21 |         '''
22 |         self.symbol_file_path = symbol_file_path
23 |         self.mxnet_module = mxnet_module
24 |         self.input_name = input_name
25 |         self.input_shape = input_shape
26 |         self.device_type = device_type
27 |         if self.device_type == 'cpu':  # CAUTION: x86 cpu inference needs MXNet with mkldnn, or inference speed will be very slow
28 |             self.context = self.mxnet_module.cpu()
29 |         elif self.device_type == 'gpu':
30 |             self.context = self.mxnet_module.gpu(gpu_index)
31 |         else:
32 |             logging.error('Unknow device_type: %s .' % self.device_type)
33 |             sys.exit(1)
34 | 
35 |         # load symbol file
36 |         if not os.path.exists(self.symbol_file_path):
37 |             logging.error('Symbol file: %s does not exist!' % symbol_file_path)
38 |             sys.exit(1)
39 |         self.symbol_net = self.mxnet_module.symbol.load(self.symbol_file_path)
40 | 
41 |         # create module
42 |         self.module = self.mxnet_module.module.Module(symbol=self.symbol_net,
43 |                                                       data_names=[self.input_name],
44 |                                                       label_names=None,
45 |                                                       context=self.context)
46 |         self.module.bind(data_shapes=[(self.input_name, self.input_shape)], for_training=False, grad_req='write')
47 | 
48 |         self.module.init_params(initializer=self.mxnet_module.initializer.Xavier(), allow_missing=True)
49 |         self.module.init_optimizer(kvstore=None)
50 | 
51 |     def run_speed_eval(self, warm_run_loops=10, real_run_loops=100):
52 |         random_input_data = [self.mxnet_module.random.uniform(-1.0, 1.0, shape=self.input_shape, ctx=self.context)]
53 |         temp_batch = self.mxnet_module.io.DataBatch(random_input_data, [])
54 | 
55 |         # basic info of this eval
56 |         logging.info('Test symbol file: %s' % self.symbol_file_path)
57 |         logging.info('Test device: %s' % self.device_type)
58 |         logging.info('Test input shape: %s' % str(self.input_shape))
59 | 
60 |         # warn run
61 |         for i in range(warm_run_loops):
62 |             self.module.forward(temp_batch)
63 |             for output in self.module.get_outputs():
64 |                 output.asnumpy()
65 | 
66 |         logging.info('Start real run loops---------------')
67 |         tic = time.time()
68 |         # real run
69 |         for i in range(real_run_loops):
70 |             self.module.forward(temp_batch)
71 |             for output in self.module.get_outputs():
72 |                 output.asnumpy()
73 | 
74 |         toc = time.time()
75 | 
76 |         print('Finish %d loops in %.02f ms. \n[%.02f ms] for each loop \n[%.02f ms] for each image (namely %.02f FPS)' %
77 |               (real_run_loops,
78 |                (toc - tic) * 1000,
79 |                (toc - tic) * 1000 / real_run_loops,
80 |                (toc - tic) * 1000 / real_run_loops / self.input_shape[0],
81 |                real_run_loops * self.input_shape[0] / (toc - tic)))
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     sys.path.append('/home/heyonghao/libs/incubator-mxnet/python')  # set MXNet python path if needed
86 |     import mxnet
87 | 
88 |     symbol_file_path = '/home/heyonghao/projects/tocreate_LFFD_ICCV2019_FaceDetector/symbol_farm/symbol_10_560_25L_8scales_s5_v2_deploy.json'
89 |     input_shape = (1, 3, 720, 1280)  # (1, 3, 240, 320) (1, 3, 480, 640) (1, 3, 720, 1280) (1, 3, 1080, 1920) (1, 3, 2160, 3840) (1, 3, 4320, 7680)
90 |     device_type = 'gpu'
91 |     gpu_index = 0
92 | 
93 |     speedEval = InferenceSpeedEval(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, device_type=device_type, gpu_index=gpu_index)
94 |     speedEval.run_speed_eval(warm_run_loops=10, real_run_loops=500)
95 | 


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/inference_speed_eval/inference_speed_eval_with_tensorrt_cudnn.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import sys
  3 | import os
  4 | import time
  5 | import logging
  6 | import numpy
  7 | 
  8 | import pycuda.driver as cuda
  9 | import pycuda.autoinit
 10 | import tensorrt as trt
 11 | 
 12 | logging.getLogger().setLevel(logging.INFO)
 13 | 
 14 | 
 15 | # Simple helper data class that's a little nicer to use than a 2-tuple.
 16 | class HostDeviceMem(object):
 17 |     def __init__(self, host_mem, device_mem):
 18 |         self.host = host_mem
 19 |         self.device = device_mem
 20 | 
 21 |     def __str__(self):
 22 |         return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
 23 | 
 24 |     def __repr__(self):
 25 |         return self.__str__()
 26 | 
 27 | 
 28 | class InferenceSpeedEval(object):
 29 |     def __init__(self, symbol_file_path, mxnet_module, input_shape, data_mode='fp32'):
 30 | 
 31 |         if not os.path.exists(symbol_file_path):
 32 |             logging.error('symbol file does not exist!')
 33 |             sys.exit(1)
 34 | 
 35 |         if len(input_shape) != 4:
 36 |             logging.error('input shape should have 4 elements in the order of NCHW.')
 37 |             sys.exit(1)
 38 | 
 39 |         symbol_net = mxnet_module.symbol.load(symbol_file_path)
 40 |         # create module
 41 |         module = mxnet_module.module.Module(symbol=symbol_net,
 42 |                                             data_names=['data'],
 43 |                                             label_names=None,
 44 |                                             context=mxnet_module.cpu())
 45 |         module.bind(data_shapes=[('data', input_shape)], for_training=False, grad_req='write')
 46 |         module.init_params(initializer=mxnet_module.initializer.Xavier(), allow_missing=True)
 47 |         arg_params, aux_params = module.get_params()
 48 |         net_params = dict()
 49 |         net_params.update(arg_params)
 50 |         net_params.update(aux_params)
 51 |         self.onnx_temp_file = 'temp.onnx'
 52 |         logging.info('Convert mxnet symbol to onnx...')
 53 |         mxnet_module.contrib.onnx.export_model(symbol_net, net_params, [input_shape], numpy.float32, self.onnx_temp_file, verbose=False)
 54 | 
 55 |         # build engine
 56 |         trt_logger = trt.Logger(trt.Logger.WARNING)
 57 |         builder = trt.Builder(trt_logger)
 58 |         builder.max_batch_size = input_shape[0]
 59 |         builder.average_find_iterations = 2
 60 |         builder.max_workspace_size = 2 << 30
 61 | 
 62 |         if data_mode == 'fp32':
 63 |             pass
 64 |         elif data_mode == 'fp16':
 65 |             if not builder.platform_has_fast_fp16:
 66 |                 logging.error('fp16 is not supported by this platform!')
 67 |                 sys.exit(1)
 68 |             builder.fp16_mode = True
 69 |         elif data_mode == 'int8':
 70 |             logging.error('Currently, not implemented yet.')
 71 |             sys.exit(1)
 72 |             if not builder.platform_has_fast_int8:
 73 |                 logging.error('int8 is not supported by this platform!')
 74 |                 sys.exit(1)
 75 |             builder.int8_mode = True
 76 |         else:
 77 |             logging.error('Unknown data_mode: %s' % data_mode)
 78 |             logging.error('Available choices: \'fp32\'(default), \'fp16\', \'int8\'')
 79 |             sys.exit(1)
 80 | 
 81 |         network = builder.create_network()
 82 |         parser = trt.OnnxParser(network, trt_logger)
 83 |         logging.info('Parsing onnx for trt network...')
 84 |         with open(self.onnx_temp_file, 'rb') as onnx_fin:
 85 |             parser.parse(onnx_fin.read())
 86 | 
 87 |         num_parser_errors = parser.num_errors
 88 |         if num_parser_errors != 0:
 89 |             logging.error('Errors occur while parsing the onnx file!')
 90 |             for i in range(num_parser_errors):
 91 |                 logging.error('Error %d: %s' % (i, parser.get_error(i).desc()))
 92 |             sys.exit(1)
 93 | 
 94 |         logging.info('Start to build trt engine...(this step may cost much time)')
 95 |         time_start = time.time()
 96 |         self.engine = builder.build_cuda_engine(network)
 97 |         time_end = time.time()
 98 |         logging.info('Engine building time: %.02f s' % (time_end - time_start))
 99 | 
100 |         for binding in self.engine:
101 |             if self.engine.binding_is_input(binding):
102 |                 logging.info('Input name: %s, shape: %s' % (binding, str(self.engine.get_binding_shape(binding))))
103 | 
104 |         self.executor = self.engine.create_execution_context()
105 |         self.max_batch_size = builder.max_batch_size
106 | 
107 |     def __del__(self):
108 |         if os.path.exists(self.onnx_temp_file):
109 |             os.remove(self.onnx_temp_file)
110 | 
111 |     def run_speed_eval(self, warm_run_loops=10, real_run_loops=100):
112 | 
113 |         def allocate_buffers(engine):
114 |             inputs = []
115 |             outputs = []
116 |             bindings = []
117 |             for binding in engine:
118 |                 size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
119 |                 dtype = trt.nptype(engine.get_binding_dtype(binding))
120 |                 # Allocate host and device buffers
121 |                 host_mem = cuda.pagelocked_empty(size, dtype)
122 |                 device_mem = cuda.mem_alloc(host_mem.nbytes)
123 |                 # Append the device buffer to device bindings.
124 |                 bindings.append(int(device_mem))
125 |                 # Append to the appropriate list.
126 |                 if engine.binding_is_input(binding):
127 |                     inputs.append(HostDeviceMem(host_mem, device_mem))
128 |                 else:
129 |                     outputs.append(HostDeviceMem(host_mem, device_mem))
130 |             return inputs, outputs, bindings
131 | 
132 |         inputs, outputs, bindings = allocate_buffers(self.engine)
133 |         # warm run
134 |         for i in range(warm_run_loops):
135 |             [cuda.memcpy_htod(inp.device, inp.host) for inp in inputs]
136 |             self.executor.execute(batch_size=self.max_batch_size, bindings=bindings)
137 |             [cuda.memcpy_dtoh(out.host, out.device) for out in outputs]
138 | 
139 |         # real run
140 |         logging.info('Start real run loop.')
141 |         sum_time_data_copy = 0.
142 |         sum_time_inference_only = 0.
143 |         for i in range(real_run_loops):
144 |             time_start = time.time()
145 |             [cuda.memcpy_htod(inp.device, inp.host) for inp in inputs]
146 |             sum_time_data_copy += time.time() - time_start
147 | 
148 |             time_start = time.time()
149 |             self.executor.execute(batch_size=self.max_batch_size, bindings=bindings)
150 |             sum_time_inference_only += time.time() - time_start
151 | 
152 |             time_start = time.time()
153 |             [cuda.memcpy_dtoh(out.host, out.device) for out in outputs]
154 |             sum_time_data_copy += time.time() - time_start
155 | 
156 |         logging.info('Total time (data transfer & inference) elapsed: %.02f ms. [%.02f ms] for each image (%.02f PFS)'
157 |                      % ((sum_time_data_copy + sum_time_inference_only) * 1000,
158 |                         (sum_time_data_copy + sum_time_inference_only) * 1000 / real_run_loops / self.max_batch_size,
159 |                         real_run_loops * self.max_batch_size / (sum_time_data_copy + sum_time_inference_only)))
160 | 
161 | 
162 | if __name__ == '__main__':
163 |     sys.path.append('/home/heyonghao/libs/incubator-mxnet/python')
164 |     import mxnet
165 | 
166 |     symbol_file_path = '/home/heyonghao/projects/tocreate_LFFD_ICCV2019_FaceDetector/symbol_farm/symbol_10_560_25L_8scales_s5_v2_deploy.json'
167 |     input_shape = (1, 3, 720, 1280)  # (1, 3, 240, 320) (1, 3, 480, 640) (1, 3, 720, 1280) (1, 3, 1080, 1920) (1, 3, 2160, 3840) (1, 3, 4320, 7680)
168 | 
169 |     speedEval = InferenceSpeedEval(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, data_mode='fp32')
170 |     speedEval.run_speed_eval(warm_run_loops=10, real_run_loops=500)
171 | 


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/logging_GOCD.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | import logging
 3 | import os
 4 | import sys
 5 | 
 6 | '''
 7 | logging module
 8 | '''
 9 | 
10 | 
11 | def init_logging(log_file_path=None, log_file_mode='w', log_overwrite_flag=False, log_level=logging.INFO):
12 |     # basically, the basic log offers console output
13 |     console_handler = logging.StreamHandler()
14 |     formatter = logging.Formatter('%(asctime)s[%(levelname)s]: %(message)s')
15 |     console_handler.setFormatter(formatter)
16 | 
17 |     logging.getLogger().setLevel(log_level)
18 |     logging.getLogger().addHandler(console_handler)
19 | 
20 |     if not log_file_path or log_file_path == '':
21 |         print('No log file is specified. The log information is only displayed in console.')
22 |         return
23 | 
24 |     # check that the log_file is already existed or not
25 |     if not os.path.exists(log_file_path):
26 |         location_dir = os.path.dirname(log_file_path)
27 |         if not os.path.exists(location_dir):
28 |             os.makedirs(location_dir)
29 | 
30 |         file_handler = logging.FileHandler(filename=log_file_path, mode=log_file_mode)
31 |         file_handler.setFormatter(formatter)
32 |         logging.getLogger().addHandler(file_handler)
33 |     else:
34 |         if log_overwrite_flag:
35 |             print('The file [%s] is existed. And it is to be handled according to the arg [file_mode](the default is \'w\').' % log_file_path)
36 |             file_handler = logging.FileHandler(filename=log_file_path, mode=log_file_mode)
37 |             file_handler.setFormatter(formatter)
38 |             logging.getLogger().addHandler(file_handler)
39 |         else:
40 |             print('The file [%s] is existed. The [overwrite_flag] is False, please change the log file name.')
41 |             sys.exit(0)
42 | 
43 | 
44 | def temp_test():
45 |     log_file = './test.log'
46 |     file_mode = 'w'
47 |     init_logging(log_file_path=log_file, log_file_mode=file_mode, log_overwrite_flag=True, log_level=logging.DEBUG)
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     temp_test()
52 |     logging.info('test info')
53 | 


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/__init__.py


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/cross_entropy_with_focal_loss_for_one_class_detection.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @date         : 19-1-23
 3 | # @author       : MindBreaker
 4 | # @module       :
 5 | 
 6 | import mxnet as mx
 7 | import numpy as np
 8 | import logging
 9 | 
10 | 
11 | class focal_loss_for_twoclass(mx.operator.CustomOp):
12 |     '''
13 |     1, the in_data[0], namely the pred, must be applied with softmax before running this loss operator
14 |     2, this CE operator is only for two-class situation, the 0-index indicates pos(foreground), and the 1-index is for neg(background)
15 |     '''
16 | 
17 |     def __init__(self, alpha=0.25, gamma=2):
18 |         super(focal_loss_for_twoclass, self).__init__()
19 |         self.alpha = alpha
20 |         self.gamma = gamma
21 | 
22 |     def forward(self, is_train, req, in_data, out_data, aux):
23 |         pred = in_data[0]
24 |         label = in_data[1]
25 |         pred_softmax = mx.ndarray.softmax(pred, axis=1)
26 |         pred_log = mx.ndarray.log(pred_softmax)
27 |         cross_entropy = - label * pred_log
28 | 
29 |         self.assign(out_data[0], req[0], cross_entropy)
30 | 
31 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
32 |         pred = in_data[0]
33 |         label = in_data[1]
34 |         mask = in_data[2]
35 | 
36 |         pred_softmax = mx.ndarray.softmax(pred, axis=1)
37 | 
38 |         # print('pos mean prob:', mx.ndarray.mean(pred_softmax[:, 0, :, :][label[:, 0, :, :] > 0.5]).asnumpy())
39 |         # print('neg mean prob:', mx.ndarray.mean(pred_softmax[:, 1, :, :][label[:, 1, :, :] > 0.5]).asnumpy())
40 | 
41 |         # pos_flag = label[:, 0, :, :] > 0.5
42 |         # neg_flag = label[:, 1, :, :] > 0.5
43 | 
44 |         FL_gradient = -self.gamma * mx.ndarray.power(1 - pred_softmax, self.gamma - 1) * mx.ndarray.log(pred_softmax) * pred_softmax + mx.ndarray.power(1 - pred_softmax, self.gamma)
45 | 
46 |         FL_gradient[:, 0, :, :] *= self.alpha
47 |         FL_gradient[:, 1, :, :] *= 1 - self.alpha
48 | 
49 |         FL_gradient *= (pred_softmax-label)
50 | 
51 |         FL_gradient /= mx.ndarray.sum(mask).asnumpy()[0]
52 |         # print('mean grad:', mx.ndarray.mean(mx.ndarray.abs(FL_gradient)).asnumpy())
53 | 
54 |         self.assign(in_grad[0], req[0], FL_gradient)
55 | 
56 | 
57 | @mx.operator.register("focal_loss_for_twoclass")
58 | class focal_loss_for_twoclass_Prop(mx.operator.CustomOpProp):
59 |     def __init__(self):
60 |         super(focal_loss_for_twoclass_Prop, self).__init__(need_top_grad=False)
61 | 
62 |     def list_arguments(self):
63 |         return ['pred', 'label', 'mask']
64 | 
65 |     def list_outputs(self):
66 |         return ['output']
67 | 
68 |     def infer_shape(self, in_shape):
69 |         data_shape = in_shape[0]
70 |         label_shape = in_shape[0]
71 |         mask_shape = in_shape[0]
72 |         output_shape = in_shape[0]
73 |         return [data_shape, label_shape, mask_shape], [output_shape], []
74 | 
75 |     def create_operator(self, ctx, shapes, dtypes):
76 |         return focal_loss_for_twoclass()
77 | 


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/cross_entropy_with_hnm_for_one_class_detection.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | 
 3 | 
 4 | class cross_entropy_with_hnm_for_one_class_detection(mx.operator.CustomOp):
 5 | 
 6 |     def __init__(self, hnm_ratio):
 7 |         super(cross_entropy_with_hnm_for_one_class_detection, self).__init__()
 8 |         self.hnm_ratio = int(hnm_ratio)
 9 | 
10 |     def forward(self, is_train, req, in_data, out_data, aux):
11 |         pred = in_data[0]
12 |         label = in_data[1]
13 |         pred_softmax = mx.ndarray.softmax(pred, axis=1)
14 |         pred_log = mx.ndarray.log(pred_softmax)
15 |         cross_entropy = - label * pred_log
16 | 
17 |         self.assign(out_data[0], req[0], cross_entropy)
18 | 
19 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
20 |         pred = in_data[0]
21 |         label = in_data[1]
22 |         mask = in_data[2]
23 | 
24 |         pred_softmax = mx.ndarray.softmax(pred, axis=1)
25 |         CE_gradient = pred_softmax - label  # Standard CE gradient
26 |         loss_mask = mx.ndarray.ones((CE_gradient.shape[0], 1, CE_gradient.shape[2], CE_gradient.shape[3]), ctx=CE_gradient.context)
27 | 
28 |         if self.hnm_ratio > 0:
29 |             pos_flag = (label[:, 0, :, :] > 0.5)
30 |             pos_num = mx.ndarray.sum(pos_flag).asnumpy()[0]  # 得到正样本的个数
31 | 
32 |             if pos_num > 0:
33 |                 neg_flag = (label[:, 1, :, :] > 0.5)
34 |                 neg_num = mx.ndarray.sum(neg_flag).asnumpy()[0]
35 |                 neg_num_selected = min(int(self.hnm_ratio * pos_num), int(neg_num))
36 |                 neg_prob = pred_softmax[:, 1, :, :] * neg_flag  # non-negative value
37 |                 neg_prob_sort = mx.ndarray.sort(neg_prob.reshape((1, -1)), is_ascend=True)
38 | 
39 |                 prob_threshold = neg_prob_sort[0][neg_num_selected].asnumpy()[0]
40 |                 neg_grad_flag = (neg_prob <= prob_threshold)
41 |                 loss_mask = mx.ndarray.logical_or(neg_grad_flag, pos_flag)
42 |             else:
43 |                 neg_choice_ratio = 0.1
44 |                 neg_num_selected = int(pred_softmax[:, 1, :, :].size * neg_choice_ratio)
45 |                 neg_prob = pred_softmax[:, 1, :, :]
46 |                 neg_prob_sort = mx.ndarray.sort(neg_prob.reshape((1, -1)), is_ascend=True)
47 |                 prob_threshold = neg_prob_sort[0][neg_num_selected].asnumpy()[0]
48 |                 loss_mask = (neg_prob <= prob_threshold)
49 | 
50 |             for i in range(CE_gradient.shape[1]):
51 |                 CE_gradient[:, i, :, :] *= loss_mask * mask[:, i, :, :]
52 | 
53 |         CE_gradient /= mx.ndarray.sum(loss_mask).asnumpy()[0]
54 | 
55 |         self.assign(in_grad[0], req[0], CE_gradient)
56 | 
57 | 
58 | @mx.operator.register("cross_entropy_with_hnm_for_one_class_detection")
59 | class cross_entropy_with_hnm_for_one_class_detection_Prop(mx.operator.CustomOpProp):
60 |     def __init__(self, hnm_ratio=5):
61 |         super(cross_entropy_with_hnm_for_one_class_detection_Prop, self).__init__(need_top_grad=False)
62 |         self.hnm_ratio = hnm_ratio
63 | 
64 |     def list_arguments(self):
65 |         return ['pred', 'label', 'mask']
66 | 
67 |     def list_outputs(self):
68 |         return ['output']
69 | 
70 |     def infer_shape(self, in_shape):
71 |         data_shape = in_shape[0]
72 |         label_shape = in_shape[0]
73 |         mask_shape = in_shape[0]
74 |         output_shape = in_shape[0]
75 |         return [data_shape, label_shape, mask_shape], [output_shape], []
76 | 
77 |     def create_operator(self, ctx, shapes, dtypes):
78 |         return cross_entropy_with_hnm_for_one_class_detection(self.hnm_ratio)
79 | 


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/loss.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | 
  7 | class cross_entropy_with_hnm_for_one_class_detection2(nn.Module):
  8 |     def __init__(self, hnm_ratio, num_output_scales):
  9 |         super(cross_entropy_with_hnm_for_one_class_detection, self).__init__()
 10 |         self.hnm_ratio = int(hnm_ratio)
 11 |         self.num_output_scales = num_output_scales
 12 | 
 13 |     def forward(self, outputs, targets):
 14 |         loss_branch_list = []
 15 |         for i in range(self.num_output_scales):
 16 |             pred_score = outputs[i * 2]
 17 |             pred_bbox = outputs[i * 2 + 1]
 18 |             gt_mask = targets[i * 2].cuda()
 19 |             gt_label = targets[i * 2 + 1].cuda()
 20 | 
 21 |             pred_score_softmax = torch.softmax(pred_score, dim=1)
 22 |             # loss_mask = torch.ones(pred_score_softmax.shape[0],
 23 |             #                        1,
 24 |             #                        pred_score_softmax.shape[2],
 25 |             #                        pred_score_softmax.shape[3])
 26 |             loss_mask = torch.ones(pred_score_softmax.shape)
 27 | 
 28 |             if self.hnm_ratio > 0:
 29 |                 # print('gt_label.shape:', gt_label.shape)
 30 |                 # print('gt_label.size():', gt_label.size())
 31 |                 pos_flag = (gt_label[:, 0, :, :] > 0.5)
 32 |                 pos_num = torch.sum(pos_flag) # get num. of positive examples
 33 | 
 34 |                 if pos_num > 0:
 35 |                     neg_flag = (gt_label[:, 1, :, :] > 0.5)
 36 |                     neg_num = torch.sum(neg_flag)
 37 |                     neg_num_selected = min(int(self.hnm_ratio * pos_num), int(neg_num))
 38 |                     # non-negative value
 39 |                     neg_prob = torch.where(neg_flag, pred_score_softmax[:, 1, :, :], \
 40 |                                            torch.zeros_like(pred_score_softmax[:, 1, :, :]))
 41 |                     neg_prob_sort, _ = torch.sort(neg_prob.reshape(1, -1), descending=False)
 42 | 
 43 |                     prob_threshold = neg_prob_sort[0][neg_num_selected-1]
 44 |                     neg_grad_flag = (neg_prob <= prob_threshold)
 45 |                     loss_mask = torch.cat([pos_flag.unsqueeze(1), neg_grad_flag.unsqueeze(1)], dim=1)
 46 |                 else:
 47 |                     neg_choice_ratio = 0.1
 48 |                     neg_num_selected = int(pred_score_softmax[:, 1, :, :].numel() * neg_choice_ratio)
 49 |                     neg_prob = pred_score_softmax[:, 1, :, :]
 50 |                     neg_prob_sort, _ = torch.sort(neg_prob.reshape(1, -1), descending=False)
 51 |                     prob_threshold = neg_prob_sort[0][neg_num_selected-1]
 52 |                     neg_grad_flag = (neg_prob <= prob_threshold)
 53 |                     loss_mask = torch.cat([pos_flag.unsqueeze(1), neg_grad_flag.unsqueeze(1)], dim=1)
 54 | 
 55 |             # cross entropy with mask
 56 |             pred_score_softmax_masked = pred_score_softmax[loss_mask]
 57 |             pred_score_log = torch.log(pred_score_softmax_masked)
 58 |             score_cross_entropy = -gt_label[:, :2, :, :][loss_mask] * pred_score_log
 59 |             loss_score = torch.sum(score_cross_entropy) / score_cross_entropy.numel()
 60 | 
 61 |             mask_bbox = gt_mask[:, 2:6, :, :]
 62 |             if torch.sum(mask_bbox) == 0:
 63 |                 loss_bbox = torch.zeros_like(loss_score)
 64 |             else:
 65 |                 predict_bbox = pred_bbox * mask_bbox
 66 |                 label_bbox = gt_label[:, 2:6, :, :] * mask_bbox
 67 |                 loss_bbox = F.mse_loss(predict_bbox, label_bbox, reduction='mean')
 68 |                 # loss_bbox = F.smooth_l1_loss(predict_bbox, label_bbox, reduction='mean')
 69 |                 # loss_bbox = torch.nn.MSELoss(predict_bbox, label_bbox, size_average=True, reduce=True)
 70 |                 # loss_bbox = torch.nn.SmoothL1Loss(predict_bbox, label_bbox, size_average=True, reduce=True)
 71 | 
 72 |             loss_branch = loss_score + loss_bbox
 73 |             loss_branch_list.append(loss_branch)
 74 |         return loss_branch_list
 75 | 
 76 | 
 77 | class cross_entropy_with_hnm_for_one_class_detection(nn.Module):
 78 |     def __init__(self, hnm_ratio, num_output_scales):
 79 |         super(cross_entropy_with_hnm_for_one_class_detection, self).__init__()
 80 |         self.hnm_ratio = int(hnm_ratio)
 81 |         self.num_output_scales = num_output_scales
 82 | 
 83 |     def forward(self, outputs, targets):
 84 |         loss_cls = 0
 85 |         loss_reg = 0
 86 |         loss_branch = []
 87 |         for i in range(self.num_output_scales):
 88 |             pred_score = outputs[i * 2]
 89 |             pred_bbox = outputs[i * 2 + 1]
 90 |             gt_mask = targets[i * 2].cuda()
 91 |             gt_label = targets[i * 2 + 1].cuda()
 92 | 
 93 |             pred_score_softmax = torch.softmax(pred_score, dim=1)
 94 |             # loss_mask = torch.ones(pred_score_softmax.shape[0],
 95 |             #                        1,
 96 |             #                        pred_score_softmax.shape[2],
 97 |             #                        pred_score_softmax.shape[3])
 98 |             loss_mask = torch.ones(pred_score_softmax.shape)
 99 | 
100 |             if self.hnm_ratio > 0:
101 |                 # print('gt_label.shape:', gt_label.shape)
102 |                 # print('gt_label.size():', gt_label.size())
103 |                 pos_flag = (gt_label[:, 0, :, :] > 0.5)
104 |                 pos_num = torch.sum(pos_flag) # get num. of positive examples
105 | 
106 |                 if pos_num > 0:
107 |                     neg_flag = (gt_label[:, 1, :, :] > 0.5)
108 |                     neg_num = torch.sum(neg_flag)
109 |                     neg_num_selected = min(int(self.hnm_ratio * pos_num), int(neg_num))
110 |                     # non-negative value
111 |                     neg_prob = torch.where(neg_flag, pred_score_softmax[:, 1, :, :], \
112 |                                            torch.zeros_like(pred_score_softmax[:, 1, :, :]))
113 |                     neg_prob_sort, _ = torch.sort(neg_prob.reshape(1, -1), descending=False)
114 | 
115 |                     prob_threshold = neg_prob_sort[0][neg_num_selected-1]
116 |                     neg_grad_flag = (neg_prob <= prob_threshold)
117 |                     loss_mask = torch.cat([pos_flag.unsqueeze(1), neg_grad_flag.unsqueeze(1)], dim=1)
118 |                 else:
119 |                     neg_choice_ratio = 0.1
120 |                     neg_num_selected = int(pred_score_softmax[:, 1, :, :].numel() * neg_choice_ratio)
121 |                     neg_prob = pred_score_softmax[:, 1, :, :]
122 |                     neg_prob_sort, _ = torch.sort(neg_prob.reshape(1, -1), descending=False)
123 |                     prob_threshold = neg_prob_sort[0][neg_num_selected-1]
124 |                     neg_grad_flag = (neg_prob <= prob_threshold)
125 |                     loss_mask = torch.cat([pos_flag.unsqueeze(1), neg_grad_flag.unsqueeze(1)], dim=1)
126 | 
127 |             # cross entropy with mask
128 |             pred_score_softmax_masked = pred_score_softmax[loss_mask]
129 |             pred_score_log = torch.log(pred_score_softmax_masked)
130 |             score_cross_entropy = -gt_label[:, :2, :, :][loss_mask] * pred_score_log
131 |             loss_score = torch.sum(score_cross_entropy) / score_cross_entropy.numel()
132 | 
133 |             mask_bbox = gt_mask[:, 2:6, :, :]
134 |             if torch.sum(mask_bbox) == 0:
135 |                 loss_bbox = torch.zeros_like(loss_score)
136 |             else:
137 |                 predict_bbox = pred_bbox * mask_bbox
138 |                 label_bbox = gt_label[:, 2:6, :, :] * mask_bbox
139 |                 loss_bbox = F.mse_loss(predict_bbox, label_bbox, reduction='sum') / torch.sum(mask_bbox)
140 |                 # loss_bbox = F.smooth_l1_loss(predict_bbox, label_bbox, reduction='sum') / torch.sum(mask_bbox)
141 |                 # loss_bbox = torch.nn.MSELoss(predict_bbox, label_bbox, size_average=False, reduce=True)
142 |                 # loss_bbox = torch.nn.SmoothL1Loss(predict_bbox, label_bbox, size_average=False, reduce=True)
143 | 
144 |             loss_cls += loss_score
145 |             loss_reg += loss_bbox
146 |             loss_branch.append(loss_score)
147 |             loss_branch.append(loss_bbox)
148 |         loss = loss_cls + loss_reg
149 |         return loss, loss_branch


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/mean_squared_error_with_hnm_for_one_class_detection.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | '''
 3 | squared error with hard negative mining
 4 | '''
 5 | import mxnet as mx
 6 | 
 7 | 
 8 | class mean_squared_error_with_hnm_for_one_class_detection(mx.operator.CustomOp):
 9 |     def __init__(self, hnm_ratio):
10 |         super(mean_squared_error_with_hnm_for_one_class_detection, self).__init__()
11 |         self.hnm_ratio = int(hnm_ratio)
12 | 
13 |     def forward(self, is_train, req, in_data, out_data, aux):
14 |         pred = in_data[0]
15 |         self.assign(out_data[0], req[0], pred)
16 | 
17 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
18 |         pred = in_data[0]
19 |         label = in_data[1]
20 |         loss = pred - label  # Standard gradient in MXNET for Regression loss.
21 |         if self.hnm_ratio != 0:
22 |             pos_flag = (label > 0)
23 |             pos_num = mx.ndarray.sum(pos_flag).asnumpy()[0]  # 得到正样本的个数
24 |             if pos_num > 0:
25 |                 neg_flag = (label < 0.0001)
26 |                 neg_num = mx.ndarray.sum(neg_flag).asnumpy()[0]
27 |                 neg_num_selected = min(int(self.hnm_ratio * pos_num), int(neg_num))
28 |                 neg_loss = mx.ndarray.abs(loss * neg_flag)  # non-negative value
29 |                 neg_loss_tem = mx.ndarray.sort(neg_loss.reshape((1, -1)), is_ascend=False)
30 | 
31 |                 top_loss_min = neg_loss_tem[0][neg_num_selected].asnumpy()[0]
32 |                 neg_loss_flag = (neg_loss >= top_loss_min)
33 |                 loss_mask = mx.ndarray.logical_or(neg_loss_flag, pos_flag)
34 |             else:
35 |                 neg_choice_ratio = 0.1
36 |                 neg_num_selected = int(loss.size * neg_choice_ratio)
37 |                 loss_abs = mx.ndarray.abs(loss)
38 |                 neg_loss_tem = mx.ndarray.sort(loss_abs.reshape((1, -1)), is_ascend=False)
39 |                 top_loss_min = neg_loss_tem[0][neg_num_selected].asnumpy()[0]
40 |                 # logging.info('top_loss_min:%0.4f', top_loss_min)
41 |                 loss_mask = (loss_abs >= top_loss_min)
42 | 
43 |             # logging.info('remained_num:%d', mx.ndarray.sum(mask).asnumpy()[0])
44 | 
45 |             loss *= loss_mask
46 |         loss /= loss[0].size
47 |         self.assign(in_grad[0], req[0], loss)
48 | 
49 | 
50 | @mx.operator.register("mean_squared_error_with_hnm_for_one_class_detection")
51 | class mean_squared_error_with_hnm_for_one_class_detection_Prop(mx.operator.CustomOpProp):
52 |     def __init__(self, hnm_ratio=10):
53 |         super(mean_squared_error_with_hnm_for_one_class_detection_Prop, self).__init__(need_top_grad=False)
54 |         self.hnm_ratio = hnm_ratio
55 | 
56 |     def list_arguments(self):
57 |         return ['pred', 'label', 'mask']
58 | 
59 |     def list_outputs(self):
60 |         return ['output']
61 | 
62 |     def infer_shape(self, in_shape):
63 |         data_shape = in_shape[0]
64 |         label_shape = in_shape[0]
65 |         mask_shape = in_shape[0]
66 |         output_shape = in_shape[0]
67 |         return [data_shape, label_shape, mask_shape], [output_shape], []
68 | 
69 |     def create_operator(self, ctx, shapes, dtypes):
70 |         return mean_squared_error_with_hnm_for_one_class_detection(self.hnm_ratio)
71 | 


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/loss_layer_farm/mean_squared_error_with_ohem_for_one_class_detection.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | '''
 3 | squared error with online hard example mining
 4 | '''
 5 | import mxnet as mx
 6 | 
 7 | 
 8 | class mean_squared_error_with_ohem_for_one_class_detection(mx.operator.CustomOp):
 9 |     def __init__(self, ohem_ratio):
10 |         super(mean_squared_error_with_ohem_for_one_class_detection, self).__init__()
11 |         self.ohem_ratio = ohem_ratio
12 | 
13 |     def forward(self, is_train, req, in_data, out_data, aux):
14 |         pred = in_data[0]
15 |         self.assign(out_data[0], req[0], pred)
16 | 
17 |     def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
18 |         pred = out_data[0]
19 |         label = in_data[1]
20 |         loss = pred - label
21 | 
22 |         # perform OHEM
23 |         num_select = int(label.size * self.ohem_ratio)
24 |         loss_abs = mx.nd.abs(loss)
25 |         loss_sort = mx.nd.sort(loss_abs.reshape((1, -1)), is_ascend=False)
26 |         min_threshold = loss_sort[0][num_select].asnumpy()[0]
27 |         select_flag = loss_abs >= min_threshold
28 |         loss *= select_flag
29 |         loss /= num_select
30 | 
31 |         self.assign(in_grad[0], req[0], loss)
32 | 
33 | 
34 | @mx.operator.register("mean_squared_error_with_ohem_for_one_class_detection")
35 | class mean_squared_error_with_ohem_for_one_class_detection_Prop(mx.operator.CustomOpProp):
36 |     def __init__(self, ohem_ratio=0.25):
37 |         super(mean_squared_error_with_ohem_for_one_class_detection_Prop, self).__init__(need_top_grad=False)
38 |         self.ohem_ratio = ohem_ratio
39 | 
40 |     def list_arguments(self):
41 |         return ['pred', 'label']
42 | 
43 |     def list_outputs(self):
44 |         return ['output']
45 | 
46 |     def infer_shape(self, in_shape):
47 |         pred_shape = in_shape[0]
48 |         label_shape = in_shape[0]
49 |         output_shape = in_shape[0]
50 |         return [pred_shape, label_shape], [output_shape], []
51 | 
52 |     def create_operator(self, ctx, shapes, dtypes):
53 |         return mean_squared_error_with_ohem_for_one_class_detection(self.ohem_ratio)
54 | 


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/solver_GOCD.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import logging
  4 | import time
  5 | 
  6 | 
  7 | class Solver(object):
  8 |     def __init__(self,
  9 |                  task_name,
 10 |                  torch_module,
 11 |                  trainset_dataiter,
 12 |                  net,
 13 |                  net_initializer,
 14 |                  optimizer,
 15 |                  lr_scheduler,
 16 |                  gpu_id_list,
 17 |                  num_train_loops,
 18 |                  loss_criterion,
 19 |                  train_metric,
 20 |                  display_interval=10,
 21 |                  val_evaluation_interval=100,
 22 |                  valset_dataiter=None,
 23 |                  val_metric=None,
 24 |                  num_val_loops=0,
 25 |                  pretrained_model_param_path=None,
 26 |                  save_prefix=None,
 27 |                  start_index=0,
 28 |                  model_save_interval=None,
 29 |                  train_metric_update_frequency=1):
 30 |         self.task_name = task_name
 31 |         self.torch_module = torch_module
 32 |         self.trainset_dataiter = trainset_dataiter
 33 |         self.valset_dataiter = valset_dataiter
 34 |         self.net = net
 35 |         self.net_initializer = net_initializer
 36 |         self.gpu_id_list = gpu_id_list
 37 |         self.optimizer = optimizer
 38 |         self.lr_scheduler = lr_scheduler
 39 |         self.num_train_loops = num_train_loops
 40 |         self.num_val_loops = num_val_loops
 41 |         self.loss_criterion = loss_criterion
 42 |         self.train_metric = train_metric
 43 |         self.val_metric = val_metric
 44 |         self.display_interval = display_interval
 45 |         self.val_evaluation_interval = val_evaluation_interval
 46 |         self.save_prefix = save_prefix
 47 |         self.start_index = start_index
 48 |         self.pretrained_model_param_path = pretrained_model_param_path
 49 |         self.model_save_interval = model_save_interval
 50 | 
 51 |         self.train_metric_update_frequency = \
 52 |             train_metric_update_frequency if train_metric_update_frequency <= \
 53 |             display_interval else display_interval
 54 | 
 55 |     def fit(self):
 56 |         logging.info('Start training in gpu %s.-----------', str(self.gpu_id_list))
 57 |         sum_time = 0
 58 |         for i in range(self.start_index + 1, self.num_train_loops + 1):
 59 |             start = time.time()
 60 |             batch = self.trainset_dataiter.next()
 61 |             images = batch.data[0].cuda()
 62 |             targets = batch.label
 63 | 
 64 |             images = (images - 127.5) / 127.5
 65 | 
 66 |             outputs = self.net(images)
 67 | 
 68 |             loss, loss_branch = self.loss_criterion(outputs, targets)
 69 | 
 70 |             # update parameters------------------------------------------------
 71 |             self.optimizer.zero_grad()
 72 |             loss.backward()
 73 |             self.optimizer.step()
 74 |             self.lr_scheduler.step()
 75 | 
 76 |             """the train_metric need to debug"""
 77 |             # display training process----------------------------------------
 78 |             if i % self.train_metric_update_frequency == 0:
 79 |                 self.train_metric.update(loss_branch)
 80 | 
 81 |             sum_time += (time.time() - start)
 82 | 
 83 |             if i % self.display_interval == 0:
 84 |                 names, values = self.train_metric.get()
 85 | 
 86 |                 logging.info('Iter[%d] -- Time elapsed: %.1f s. Speed: %.1f images/s.',
 87 |                              i, sum_time, self.display_interval * \
 88 |                              self.trainset_dataiter.get_batch_size() / sum_time)
 89 |                 for name, value in zip(names, values):
 90 |                     logging.info('%s: --> %.4f', name, value)
 91 |                 logging.info('total loss = %.4f', loss * 10000)
 92 | 
 93 |                 self.train_metric.reset()
 94 |                 sum_time = 0
 95 | 
 96 |             # evaluate the validation set
 97 |             if i % self.val_evaluation_interval == 0 and self.num_val_loops:
 98 |                 with torch.no_grad():
 99 |                     logging.info('Start validating---------------------------')
100 |                     for val_loop in range(self.num_val_loops):
101 |                         val_batch = self.valset_dataiter.next()
102 |                         val_images = val_batch[0].cuda()
103 |                         val_targets = val_batch[1:].cuda()
104 | 
105 |                         val_outputs = self.net(val_images)
106 | 
107 |                         self.val_metric.update(val_outputs, val_targets)
108 | 
109 |                     names, values = self.val_metric.get()
110 |                     logging.info('Iter[%d] validation metric -------------', i)
111 |                     for name, value in zip(names, values):
112 |                         logging.info('%s: --> %.4f', name, value)
113 |                     logging.info('End validating ----------------------------')
114 |                     self.val_metric.reset()
115 | 
116 |             # save model-----------------------------------------------------
117 |             if i % self.model_save_interval == 0:
118 |                 torch.save(self.net.state_dict(), 
119 |                            self.save_prefix + '/' + self.task_name + \
120 |                            '_{}.pth'.format(lr_scheduler._step_count))


--------------------------------------------------------------------------------
/ChasingTrainFramework_GeneralOneClassDetection/train_GOCD.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | 
 5 | def start_train(param_dict,
 6 |                 task_name,
 7 |                 torch_module,
 8 |                 gpu_id_list,
 9 |                 train_dataiter,
10 |                 train_metric,
11 |                 train_metric_update_frequency,
12 |                 num_train_loops,
13 |                 val_dataiter,
14 |                 val_metric,
15 |                 num_val_loops,
16 |                 validation_interval,
17 |                 optimizer,
18 |                 lr_scheduler,
19 |                 net,
20 |                 net_initializer,
21 |                 loss_criterion,
22 |                 pretrained_model_param_path,
23 |                 display_interval,
24 |                 save_prefix,
25 |                 model_save_interval,
26 |                 start_index
27 |                 ):
28 | 
29 |     logging.info('PyTorch Version: %s', str(torch_module.__version__))
30 |     logging.info('Training settings:-----------------------------------------------------------------')
31 |     for param_name, param_value in param_dict.items():
32 |         logging.info(param_name + ':' + str(param_value))
33 |     logging.info('-----------------------------------------------------------------------------------')
34 | 
35 |     # init Solver module-------------------------------------------------------------------------------------
36 |     from .solver_GOCD import Solver
37 | 
38 |     solver = Solver(
39 |         task_name=task_name,
40 |         torch_module=torch_module,
41 |         trainset_dataiter=train_dataiter,
42 |         net=net,
43 |         net_initializer=net_initializer,
44 |         optimizer=optimizer,
45 |         lr_scheduler=lr_scheduler,
46 |         gpu_id_list=gpu_id_list,
47 |         num_train_loops=num_train_loops,
48 |         loss_criterion=loss_criterion,
49 |         train_metric=train_metric,
50 |         display_interval=display_interval,
51 |         val_evaluation_interval=validation_interval,
52 |         valset_dataiter=val_dataiter,
53 |         val_metric=val_metric,
54 |         num_val_loops=num_val_loops,
55 |         pretrained_model_param_path=pretrained_model_param_path,
56 |         save_prefix=save_prefix,
57 |         start_index=start_index,
58 |         model_save_interval=model_save_interval,
59 |         train_metric_update_frequency=train_metric_update_frequency)
60 |     solver.fit()
61 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 becauseofAI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # A Light and Fast Face Detector for Edge Devices
 2 | **This repo is updated frequently, keeping up with the latest code is highly recommended.**
 3 | 
 4 | ## Recent Update
 5 | * `2019.10.14` The official PyTorch version of LFFD is first online. Now the repo is only preview version. Face detection code for v2 version is released nightly.
 6 | * `2019.10.16` Now the face detection code for v2 version can train normally. The code of other tasks will be updated soon.
 7 | 
 8 | ## Introduction
 9 | This repo is the official PyTorch source code of paper "[LFFD: A Light and Fast Face Detector for Edge Devices](https://arxiv.org/abs/1904.10633)". Our paper presents a light and fast face detector (**LFFD**) for edge devices.
10 | LFFD considerably balances both accuracy and latency, resulting in small model size, fast inference speed while achieving excellent accuracy.
11 | **Understanding the essence of receptive field makes detection networks interpretable.**
12 |   
13 | In practical, we have deployed it in cloud and edge devices (like NVIDIA Jetson series and ARM-based embedding system). The comprehensive performance
14 | of LFFD is robust enough to support our applications.
15 | 
16 | In fact, our method is **_a general detection framework that applicable to one class detection_**, such as face detection, pedestrian detection, 
17 | head detection, vehicle detection and so on. In general, an object class, whose average ratio of the longer side and the shorter side is 
18 | less than 5, is appropriate to apply our framework for detection.
19 | 
20 | Several practical advantages:
21 | 1. large scale coverage, and easy to extend to larger scales by adding more layers without much latency gain.
22 | 2. detect small objects (as small as 10 pixels) in images with extremely large resolution (8K or even larger) in only one inference.
23 | 3. easy backbone with very common operators makes it easy to deploy anywhere.
24 | 
25 | ## Accuracy and Latency
26 | on the way
27 | 
28 | ## Getting Started
29 | We re-implement the proposed method using PyTorch. The MXNet Version is [here](https://github.com/YonghaoHe/A-Light-and-Fast-Face-Detector-for-Edge-Devices)
30 | 
31 | #### Prerequirements (global)
32 | * Python>=3.5
33 | * numpy>=1.16 (lower versions should work as well, but not tested)
34 | * PyTorch>=1.0.0 ([install guide](https://pytorch.org/get-started/locally/))
35 | * cv2=3.x (pip3 install opencv-python==3.4.5.20, other version should work as well, but not tested)
36 | 
37 | > Tips: 
38 |   * use PyTorch with cudnn.
39 |   * build numpy from source with OpenBLAS. This will improve the training efficiency.
40 |   * make sure cv2 links to libjpeg-turbo, not libjpeg. This will improve the jpeg decode efficiency.
41 | 
42 | #### Sub-directory description
43 | * [face_detection](face_detection) contains the code of training, evaluation and inference for LFFD,
44 | the main content of this repo. The trained models of different versions are provided for off-the-shelf deployment.
45 | * [head_detection](head_detection) contains the trained models for head detection. The models are obtained by the
46 | proposed general one class detection framework.
47 | * [pedestrian_detection](pedestrian_detection) contains the trained models for pedestrian detection. The models are obtained by the
48 | proposed general one class detection framework.
49 | * [vehicle_detection](vehicle_detection) contains the trained models for vehicle detection. The models are obtained by the
50 | proposed general one class detection framework.
51 | * [ChasingTrainFramework_GeneralOneClassDetection](ChasingTrainFramework_GeneralOneClassDetection) is a simple 
52 | wrapper based on MXNet Module API for general one class detection.
53 | 
54 | #### Installation
55 | 1. Download the repo:
56 | ```
57 | git clone https://github.com/becauseofAI/lffd-pytorch.git
58 | ```
59 | 2. Refer to the corresponding sub-project for detailed usage. Now only the v2 version of [face_detection](face_detection) can be tried to train.
60 | 
61 | ## Citation
62 | If you benefit from our work in your research and product, please kindly cite the paper
63 | ```
64 | @inproceedings{LFFD,
65 | title={LFFD: A Light and Fast Face Detector for Edge Devices},
66 | author={He, Yonghao and Xu, Dezhong and Wu, Lifang and Jian, Meng and Xiang, Shiming and Pan, Chunhong},
67 | booktitle={arXiv:1904.10633},
68 | year={2019}
69 | }
70 | ```
71 | 
72 | ## To Do List
73 | - [ ] face detection
74 | - [ ] pedestrian detection
75 | - [ ] head detection
76 | - [ ] vehicle detection
77 | - [ ] license plate detection
78 | - [ ] [reconstruction version](https://github.com/becauseofAI/refinanet)
79 | 
80 | ## Contact
81 | becauseofAI<sup>[1]</sup>, Yonghao He<sup>[2]</sup>
82 | 
83 | <sup>[1]</sup>E-mails: helloai777@gmail.com  
84 | <sup>[2]</sup>E-mails: yonghao.he@ia.ac.cn / yonghao.he@aliyun.com
85 | 
86 | **If you are interested in this work, any innovative contributions are welcome!!!**
87 | 
88 | **Internship is open at NLPR, CASIA all the time. Send me your resumes!**
89 | 


--------------------------------------------------------------------------------
/face_detection/README.md:
--------------------------------------------------------------------------------
 1 | ## Face Detection
 2 | This subdir includes face detection related codes. Some descriptions has 
 3 | been presented in repo README.md. 
 4 | 
 5 | ### Recent Update
 6 | * `2019.10.14` The model v2 can be tried to train nightly.
 7 | * `2019.10.16` **The model v2 can be trained normally.**
 8 | 
 9 | ### Brief Introduction to Model Version
10 | * v1 - refer to the paper for details
11 | * v2 - the detection scale is 10-320 (vs 10-560 in v1), the number of layers is 20, 
12 | the backbone is modified for faster inference. Refer to `./net_farm/naivenet_structures.xlsx` for details.
13 | 
14 | ### Accuracy
15 | on the way
16 | 
17 | ### Inference Latency
18 | on the way
19 | 
20 | ### User Instructions
21 | > **Now only for traning v2 nightly.**  
22 | 
23 | First, we introduce the functionality of each sub directory.
24 | * [net_farm](net_farm). This folder contains net definitions for all model versions.
25 | * [metric_farm](metric_farm). This folder contains the metrics for training monitoring.
26 | * [data_provider_farm](data_provider_farm). This folder contains the code of raw data processing/formatting/packing&unpacking.
27 | * [data_iterator_farm](data_iterator_farm). This folder contains the code of multi-threaded data prefetching. 
28 | **This is the most important part, since it describe the essence of LFFD!!!**
29 | * [config_farm](config_farm). This folder contains the configurations of all model versions. The training is started by running the corresponding config python script.
30 | 
31 | Second, we present a common procedure for running the code for training (taking v2 as an example).
32 | 
33 | 1. prepare net model `net_farm/naivenet.py`
34 | 2. prepare the training data by using the code in `data_provider_farm`. We provide a packed 
35 | training data of WIDERFACE trainset. Please download from **Data Download**.
36 | 3. adjust the code around the line 241 in `data_iterator_farm/multithread_dataiter_for_cross_entropy_v2`.
37 | 4. set the variables in configuration py script in `config_farm`.
38 | 5. run `python configuration_10_320_20L_5scales_v2.py` in `config_farm` directory.
39 | 
40 | ### Data Download
41 | We have packed the training data of WIDERFACE train set. In the data, the faces less than 8 pixels are ignored, and some pure negative 
42 | images cropped from the training images are also added. We provide three ways to download the packed data:
43 | * [Baidu Yunpan](https://pan.baidu.com/s/1a8Wk4GNkfPYbKAFSrZzFIQ) (pwd:e7bv)
44 | * [MS OneDrive](https://1drv.ms/u/s!Av9h0YMgxdaSgwiP4nKDasu4m73J?e=v5UfWQ)
45 | * [Google Drive](https://drive.google.com/open?id=1O3nJ6mQKD_sdFpfXmYoK7xnTUg3To7kO)
46 | 
47 | After you download the data, you can put it anywhere. Remember to set `param_trainset_pickle_file_path` variable in the configuration file. (we 
48 | usually put the data into the folder: `./data_provider_farm/data_folder/`)
49 | 


--------------------------------------------------------------------------------
/face_detection/accuracy_evaluation/evaluation_on_fddb.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import cv2
 5 | import math
 6 | import sys
 7 | sys.path.append('..')
 8 | # change the config as your need
 9 | from config_farm import configuration_10_320_20L_5scales_v2 as cfg
10 | import mxnet
11 | from predict import Predict
12 | 
13 | # set the proper symbol file and model file
14 | symbol_file_path = '../symbol_farm/symbol_10_320_20L_5scales_v2_deploy.json'
15 | model_file_path = '../saved_model/configuration_10_320_20L_5scales_v2/train_10_320_20L_5scales_v2_iter_1800000.params'
16 | my_predictor = Predict(mxnet=mxnet,
17 |                        symbol_file_path=symbol_file_path,
18 |                        model_file_path=model_file_path,
19 |                        ctx=mxnet.gpu(0),
20 |                        receptive_field_list=cfg.param_receptive_field_list,
21 |                        receptive_field_stride=cfg.param_receptive_field_stride,
22 |                        bbox_small_list=cfg.param_bbox_small_list,
23 |                        bbox_large_list=cfg.param_bbox_large_list,
24 |                        receptive_field_center_start=cfg.param_receptive_field_center_start,
25 |                        num_output_scales=cfg.param_num_output_scales)
26 | 
27 | 
28 | # set fddb root, the path should look like XXXX/originalPics
29 | fddb_image_root = 'XXXX/originalPics'
30 | # set the list file path, the path should look like XXXX/FDDB-folds/annotatedList.txt
31 | image_list_file = 'XXXX/FDDB-folds/annotatedList.txt'
32 | result_file_name = './fddb_' + os.path.basename(model_file_path).split('.')[0] + '_result.txt'
33 | fin = open(image_list_file, 'r')
34 | fout = open(result_file_name, 'w')
35 | resize_scale = 1.0
36 | score_threshold = 0.11
37 | NMS_threshold = 0.4
38 | counter = 0
39 | for line in fin:
40 |     line = line.strip('\n')
41 | 
42 |     im = cv2.imread(os.path.join(fddb_image_root, line + '.jpg'), cv2.IMREAD_COLOR)
43 | 
44 |     bboxes = my_predictor.predict(im, resize_scale=resize_scale, score_threshold=score_threshold, top_k=10000, NMS_threshold=NMS_threshold)
45 | 
46 |     # for bbox in bboxes:
47 |     #     cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 1)
48 |     # cv2.imshow('im', im)
49 |     # cv2.waitKey()
50 | 
51 |     fout.write(line + '\n')
52 |     fout.write(str(len(bboxes)) + '\n')
53 |     for bbox in bboxes:
54 |         fout.write('%d %d %d %d %.03f' % (
55 |         math.floor(bbox[0]), math.floor(bbox[1]), math.ceil(bbox[2] - bbox[0]), math.ceil(bbox[3] - bbox[1]),
56 |         bbox[4] if bbox[4] <= 1 else 1) + '\n')
57 |     counter += 1
58 |     print('[%d] %s is processed.' % (counter, line))
59 | fin.close()
60 | fout.close()
61 | 
62 | 


--------------------------------------------------------------------------------
/face_detection/accuracy_evaluation/evaluation_on_widerface.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | import sys
 4 | import cv2
 5 | import math
 6 | sys.path.append('..')
 7 | # change the config as your need
 8 | from config_farm import configuration_10_320_20L_5scales_v2 as cfg
 9 | import mxnet
10 | from predict import Predict
11 | 
12 | # set the proper symbol file and model file
13 | symbol_file_path = '../symbol_farm/symbol_10_320_20L_5scales_v2_deploy.json'
14 | model_file_path = '../saved_model/configuration_10_320_20L_5scales_v2/train_10_320_20L_5scales_v2_iter_1800000.params'
15 | my_predictor = Predict(mxnet=mxnet,
16 |                        symbol_file_path=symbol_file_path,
17 |                        model_file_path=model_file_path,
18 |                        ctx=mxnet.gpu(0),
19 |                        receptive_field_list=cfg.param_receptive_field_list,
20 |                        receptive_field_stride=cfg.param_receptive_field_stride,
21 |                        bbox_small_list=cfg.param_bbox_small_list,
22 |                        bbox_large_list=cfg.param_bbox_large_list,
23 |                        receptive_field_center_start=cfg.param_receptive_field_center_start,
24 |                        num_output_scales=cfg.param_num_output_scales)
25 | 
26 | # set the val root, the path should look like XXXX/WIDER_val/images
27 | val_image_root = 'XXXX/WIDER_val/images'
28 | val_result_txt_save_root = './widerface_val_' + os.path.basename(model_file_path).split('.')[0] + '_result_txt/'
29 | if not os.path.exists(val_result_txt_save_root):
30 |     os.makedirs(val_result_txt_save_root)
31 | 
32 | resize_scale = 1
33 | score_threshold = 0.11
34 | NMS_threshold = 0.4
35 | counter = 0
36 | for parent, dir_names, file_names in os.walk(val_image_root):
37 |     for file_name in file_names:
38 |         if not file_name.lower().endswith('jpg'):
39 |             continue
40 | 
41 |         im = cv2.imread(os.path.join(parent, file_name), cv2.IMREAD_COLOR)
42 | 
43 |         bboxes = my_predictor.predict(im, resize_scale=resize_scale, score_threshold=score_threshold, top_k=10000, NMS_threshold=NMS_threshold)
44 | 
45 |         # for bbox in bboxes:
46 |         #     cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 1)
47 |         # cv2.imshow('im',im)
48 |         # cv2.waitKey()
49 | 
50 |         event_name = parent.split('/')[-1]
51 |         if not os.path.exists(os.path.join(val_result_txt_save_root, event_name)):
52 |             os.makedirs(os.path.join(val_result_txt_save_root, event_name))
53 |         fout = open(os.path.join(val_result_txt_save_root, event_name, file_name.split('.')[0] + '.txt'), 'w')
54 |         fout.write(file_name.split('.')[0] + '\n')
55 |         fout.write(str(len(bboxes)) + '\n')
56 |         for bbox in bboxes:
57 |             fout.write('%d %d %d %d %.03f' % (math.floor(bbox[0]), math.floor(bbox[1]), math.ceil(bbox[2] - bbox[0]), math.ceil(bbox[3] - bbox[1]), bbox[4] if bbox[4] <= 1 else 1) + '\n')
58 |         fout.close()
59 |         counter += 1
60 |         print('[%d] %s is processed.' % (counter, file_name))
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/face_detection/accuracy_evaluation/predict.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import sys
  3 | import os
  4 | import numpy
  5 | import cv2
  6 | import time
  7 | 
  8 | 
  9 | # empty data batch class for dynamical properties
 10 | class DataBatch:
 11 |     pass
 12 | 
 13 | 
 14 | def NMS(boxes, overlap_threshold):
 15 |     '''
 16 | 
 17 |     :param boxes: numpy nx5, n is the number of boxes, 0:4->x1, y1, x2, y2, 4->score
 18 |     :param overlap_threshold:
 19 |     :return:
 20 |     '''
 21 |     if boxes.shape[0] == 0:
 22 |         return boxes
 23 | 
 24 |     # if the bounding boxes integers, convert them to floats --
 25 |     # this is important since we'll be doing a bunch of divisions
 26 |     if boxes.dtype != numpy.float32:
 27 |         boxes = boxes.astype(numpy.float32)
 28 | 
 29 |     # initialize the list of picked indexes
 30 |     pick = []
 31 |     # grab the coordinates of the bounding boxes
 32 |     x1 = boxes[:, 0]
 33 |     y1 = boxes[:, 1]
 34 |     x2 = boxes[:, 2]
 35 |     y2 = boxes[:, 3]
 36 |     sc = boxes[:, 4]
 37 |     widths = x2 - x1
 38 |     heights = y2 - y1
 39 | 
 40 |     # compute the area of the bounding boxes and sort the bounding
 41 |     # boxes by the bottom-right y-coordinate of the bounding box
 42 |     area = heights * widths
 43 |     idxs = numpy.argsort(sc)  # 从小到大排序
 44 | 
 45 |     # keep looping while some indexes still remain in the indexes list
 46 |     while len(idxs) > 0:
 47 |         # grab the last index in the indexes list and add the
 48 |         # index value to the list of picked indexes
 49 |         last = len(idxs) - 1
 50 |         i = idxs[last]
 51 |         pick.append(i)
 52 | 
 53 |         # compare secend highest score boxes
 54 |         xx1 = numpy.maximum(x1[i], x1[idxs[:last]])
 55 |         yy1 = numpy.maximum(y1[i], y1[idxs[:last]])
 56 |         xx2 = numpy.minimum(x2[i], x2[idxs[:last]])
 57 |         yy2 = numpy.minimum(y2[i], y2[idxs[:last]])
 58 | 
 59 |         # compute the width and height of the bo（ box
 60 |         w = numpy.maximum(0, xx2 - xx1 + 1)
 61 |         h = numpy.maximum(0, yy2 - yy1 + 1)
 62 | 
 63 |         # compute the ratio of overlap
 64 |         overlap = (w * h) / area[idxs[:last]]
 65 | 
 66 |         # delete all indexes from the index list that have
 67 |         idxs = numpy.delete(idxs, numpy.concatenate(([last], numpy.where(overlap > overlap_threshold)[0])))
 68 | 
 69 |     # return only the bounding boxes that were picked using the
 70 |     # integer data type
 71 |     return boxes[pick]
 72 | 
 73 | 
 74 | class Predict(object):
 75 | 
 76 |     def __init__(self,
 77 |                  mxnet,
 78 |                  symbol_file_path,
 79 |                  model_file_path,
 80 |                  ctx,
 81 |                  receptive_field_list,
 82 |                  receptive_field_stride,
 83 |                  bbox_small_list,
 84 |                  bbox_large_list,
 85 |                  receptive_field_center_start,
 86 |                  num_output_scales
 87 |                  ):
 88 |         self.mxnet = mxnet
 89 |         self.symbol_file_path = symbol_file_path
 90 |         self.model_file_path = model_file_path
 91 |         self.ctx = ctx
 92 | 
 93 |         self.receptive_field_list = receptive_field_list
 94 |         self.receptive_field_stride = receptive_field_stride
 95 |         self.bbox_small_list = bbox_small_list
 96 |         self.bbox_large_list = bbox_large_list
 97 |         self.receptive_field_center_start = receptive_field_center_start
 98 |         self.num_output_scales = num_output_scales
 99 |         self.constant = [i / 2.0 for i in self.receptive_field_list]
100 |         self.input_height = 480
101 |         self.input_width = 640
102 |         self.__load_model()
103 | 
104 |     def __load_model(self):
105 |         # load symbol and parameters
106 |         print('----> load symbol file: %s\n----> load model file: %s' % (self.symbol_file_path, self.model_file_path))
107 |         if not os.path.exists(self.symbol_file_path):
108 |             print('The symbol file does not exist!!!!')
109 |             sys.exit(1)
110 |         if not os.path.exists(self.model_file_path):
111 |             print('The model file does not exist!!!!')
112 |             sys.exit(1)
113 |         self.symbol_net = self.mxnet.symbol.load(self.symbol_file_path)
114 |         data_name = 'data'
115 |         data_name_shape = (data_name, (1, 3, self.input_height, self.input_width))
116 |         self.module = self.mxnet.module.Module(symbol=self.symbol_net,
117 |                                                data_names=[data_name],
118 |                                                label_names=None,
119 |                                                context=self.ctx,
120 |                                                work_load_list=None)
121 |         self.module.bind(data_shapes=[data_name_shape],
122 |                          for_training=False)
123 | 
124 |         save_dict = self.mxnet.nd.load(self.model_file_path)
125 |         self.arg_name_arrays = dict()
126 |         self.arg_name_arrays['data'] = self.mxnet.nd.zeros((1, 3, self.input_height, self.input_width), self.ctx)
127 |         self.aux_name_arrays = {}
128 |         for k, v in save_dict.items():
129 |             tp, name = k.split(':', 1)
130 |             if tp == 'arg':
131 |                 self.arg_name_arrays.update({name: v.as_in_context(self.ctx)})
132 |             if tp == 'aux':
133 |                 self.aux_name_arrays.update({name: v.as_in_context(self.ctx)})
134 |         self.module.init_params(arg_params=self.arg_name_arrays,
135 |                                 aux_params=self.aux_name_arrays,
136 |                                 allow_missing=True)
137 |         print('----> Model is loaded successfully.')
138 | 
139 |     def predict(self, image, resize_scale=1, score_threshold=0.8, top_k=100, NMS_threshold=0.3, NMS_flag=True, skip_scale_branch_list=[]):
140 | 
141 |         if image.ndim != 3 or image.shape[2] != 3:
142 |             print('Only RGB images are supported.')
143 |             return None
144 | 
145 |         bbox_collection = []
146 | 
147 |         shorter_side = min(image.shape[:2])
148 |         if shorter_side * resize_scale < 128:
149 |             resize_scale = float(128) / shorter_side
150 | 
151 |         input_image = cv2.resize(image, (0, 0), fx=resize_scale, fy=resize_scale)
152 | 
153 |         input_image = input_image.astype(dtype=numpy.float32)
154 |         input_image = input_image[:, :, :, numpy.newaxis]
155 |         input_image = input_image.transpose([3, 2, 0, 1])
156 | 
157 |         data_batch = DataBatch()
158 |         data_batch.data = [self.mxnet.ndarray.array(input_image, self.ctx)]
159 |         
160 |         tic = time.time()
161 |         self.module.forward(data_batch=data_batch, is_train=False)
162 |         results = self.module.get_outputs()
163 |         outputs = []
164 |         for output in results:
165 |             outputs.append(output.asnumpy())
166 |         toc = time.time()
167 |         infer_time = (toc - tic) * 1000
168 | 
169 |         for i in range(self.num_output_scales):
170 |             if i in skip_scale_branch_list:
171 |                 continue
172 | 
173 |             score_map = numpy.squeeze(outputs[i * 2], (0, 1))
174 | 
175 |             # score_map_show = score_map * 255
176 |             # score_map_show[score_map_show < 0] = 0
177 |             # score_map_show[score_map_show > 255] = 255
178 |             # cv2.imshow('score_map' + str(i), cv2.resize(score_map_show.astype(dtype=numpy.uint8), (0, 0), fx=2, fy=2))
179 |             # cv2.waitKey()
180 | 
181 |             bbox_map = numpy.squeeze(outputs[i * 2 + 1], 0)
182 | 
183 |             RF_center_Xs = numpy.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * x for x in range(score_map.shape[1])])
184 |             RF_center_Xs_mat = numpy.tile(RF_center_Xs, [score_map.shape[0], 1])
185 |             RF_center_Ys = numpy.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * y for y in range(score_map.shape[0])])
186 |             RF_center_Ys_mat = numpy.tile(RF_center_Ys, [score_map.shape[1], 1]).T
187 | 
188 |             x_lt_mat = RF_center_Xs_mat - bbox_map[0, :, :] * self.constant[i]
189 |             y_lt_mat = RF_center_Ys_mat - bbox_map[1, :, :] * self.constant[i]
190 |             x_rb_mat = RF_center_Xs_mat - bbox_map[2, :, :] * self.constant[i]
191 |             y_rb_mat = RF_center_Ys_mat - bbox_map[3, :, :] * self.constant[i]
192 | 
193 |             x_lt_mat = x_lt_mat / resize_scale
194 |             x_lt_mat[x_lt_mat < 0] = 0
195 |             y_lt_mat = y_lt_mat / resize_scale
196 |             y_lt_mat[y_lt_mat < 0] = 0
197 |             x_rb_mat = x_rb_mat / resize_scale
198 |             x_rb_mat[x_rb_mat > image.shape[1]] = image.shape[1]
199 |             y_rb_mat = y_rb_mat / resize_scale
200 |             y_rb_mat[y_rb_mat > image.shape[0]] = image.shape[0]
201 | 
202 |             select_index = numpy.where(score_map > score_threshold)
203 |             for idx in range(select_index[0].size):
204 |                 bbox_collection.append((x_lt_mat[select_index[0][idx], select_index[1][idx]],
205 |                                         y_lt_mat[select_index[0][idx], select_index[1][idx]],
206 |                                         x_rb_mat[select_index[0][idx], select_index[1][idx]],
207 |                                         y_rb_mat[select_index[0][idx], select_index[1][idx]],
208 |                                         score_map[select_index[0][idx], select_index[1][idx]]))
209 | 
210 |         # NMS
211 |         bbox_collection = sorted(bbox_collection, key=lambda item: item[-1], reverse=True)
212 |         if len(bbox_collection) > top_k:
213 |             bbox_collection = bbox_collection[0:top_k]
214 |         bbox_collection_numpy = numpy.array(bbox_collection, dtype=numpy.float32)
215 | 
216 |         if NMS_flag:
217 |             final_bboxes = NMS(bbox_collection_numpy, NMS_threshold)
218 |             final_bboxes_ = []
219 |             for i in range(final_bboxes.shape[0]):
220 |                 final_bboxes_.append((final_bboxes[i, 0], final_bboxes[i, 1], final_bboxes[i, 2], final_bboxes[i, 3], final_bboxes[i, 4]))
221 | 
222 |             return final_bboxes_, infer_time
223 |         else:
224 |             return bbox_collection_numpy, infer_time
225 | 
226 | 
227 | def run_prediction_folder():
228 |     sys.path.append('..')
229 |     from config_farm import configuration_10_560_25L_8scales_v1 as cfg
230 |     import mxnet
231 | 
232 |     debug_folder = '' # fill the folder that contains images
233 |     file_name_list = [file_name for file_name in os.listdir(debug_folder) if file_name.lower().endswith('jpg')]
234 | 
235 |     symbol_file_path = '../symbol_farm/symbol_10_560_25L_8scales_v1_deploy.json'
236 |     model_file_path = '../saved_model/configuration_10_560_25L_8scales_v1/train_10_560_25L_8scales_v1_iter_1400000.params'
237 |     my_predictor = Predict(mxnet=mxnet,
238 |                            symbol_file_path=symbol_file_path,
239 |                            model_file_path=model_file_path,
240 |                            ctx=mxnet.gpu(0),
241 |                            receptive_field_list=cfg.param_receptive_field_list,
242 |                            receptive_field_stride=cfg.param_receptive_field_stride,
243 |                            bbox_small_list=cfg.param_bbox_small_list,
244 |                            bbox_large_list=cfg.param_bbox_large_list,
245 |                            receptive_field_center_start=cfg.param_receptive_field_center_start,
246 |                            num_output_scales=cfg.param_num_output_scales)
247 | 
248 |     for file_name in file_name_list:
249 |         im = cv2.imread(os.path.join(debug_folder, file_name))
250 | 
251 |         bboxes = my_predictor.predict(im, resize_scale=1, score_threshold=0.3, top_k=10000, NMS_threshold=0.3, NMS_flag=True, skip_scale_branch_list=[])
252 |         for bbox in bboxes:
253 |             cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
254 | 
255 |         if max(im.shape[:2]) > 1600:
256 |             scale = 1600/max(im.shape[:2])
257 |             im = cv2.resize(im, (0, 0), fx=scale, fy=scale)
258 |         cv2.imshow('im', im)
259 |         cv2.waitKey()
260 |         # cv2.imwrite(os.path.join(debug_folder, file_name.replace('.jpg','_result.jpg')), im)
261 | 
262 | 
263 | if __name__ == '__main__':
264 |     run_prediction_folder()
265 | 


--------------------------------------------------------------------------------
/face_detection/config_farm/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @date         : 18-11-28
3 | # @author       : MindBreaker
4 | # @module       :


--------------------------------------------------------------------------------
/face_detection/data_iterator_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/data_iterator_farm/__init__.py


--------------------------------------------------------------------------------
/face_detection/data_provider_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/data_provider_farm/__init__.py


--------------------------------------------------------------------------------
/face_detection/data_provider_farm/data_folder/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/data_provider_farm/data_folder/.gitkeep


--------------------------------------------------------------------------------
/face_detection/data_provider_farm/pickle_provider.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This provider accepts an adapter, save dataset in pickle file and load all dataset to memory for data iterators
  3 | '''
  4 | 
  5 | import cv2
  6 | import numpy
  7 | import pickle
  8 | import sys
  9 | 
 10 | sys.path.append('../..')
 11 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_provider import ProviderBaseclass
 12 | from data_provider_farm.text_list_adapter import TextListAdapter
 13 | 
 14 | 
 15 | class PickleProvider(ProviderBaseclass):
 16 |     """
 17 |     This class provides methods to save and read data.
 18 |     By default, images are compressed using JPG format.
 19 |     If data_adapter is not None, it means saving data, or it is reading data
 20 |     """
 21 | 
 22 |     def __init__(self,
 23 |                  pickle_file_path,
 24 |                  encode_quality=90,
 25 |                  data_adapter=None):
 26 |         ProviderBaseclass.__init__(self)
 27 | 
 28 |         if data_adapter:  # write data
 29 | 
 30 |             self.data_adapter = data_adapter
 31 |             self.data = {}
 32 |             self.counter = 0
 33 |             self.pickle_file_path = pickle_file_path
 34 | 
 35 |         else:  # read data
 36 | 
 37 |             self.data = pickle.load(open(pickle_file_path, 'rb'))
 38 |             # get positive and negative indeices
 39 |             self._positive_index = []
 40 |             self._negative_index = []
 41 |             for k, v in self.data.items():
 42 |                 if v[1] == 0:  # negative
 43 |                     self._negative_index.append(k)
 44 |                 else:  # positive
 45 |                     self._positive_index.append(k)
 46 | 
 47 |         self.compression_mode = '.jpg'
 48 |         self.encode_params = [cv2.IMWRITE_JPEG_QUALITY, encode_quality]
 49 | 
 50 |     @property
 51 |     def positive_index(self):
 52 |         return self._positive_index
 53 | 
 54 |     @property
 55 |     def negative_index(self):
 56 |         return self._negative_index
 57 | 
 58 |     def write(self):
 59 | 
 60 |         for data_item in self.data_adapter.get_one():
 61 | 
 62 |             temp_sample = []
 63 |             im, bboxes = data_item
 64 |             ret, buf = cv2.imencode(self.compression_mode, im, self.encode_params)
 65 |             if buf is None or buf.size == 0:
 66 |                 print('buf is wrong.')
 67 |                 continue
 68 |             if not ret:
 69 |                 print('An error is occurred while com:pression.')
 70 |                 continue
 71 |             temp_sample.append(buf)
 72 | 
 73 |             if isinstance(bboxes, str):  # 负样本
 74 |                 temp_sample.append(0)
 75 |                 temp_sample.append(int(bboxes))
 76 |             else:
 77 |                 temp_sample.append(1)
 78 |                 temp_sample.append(bboxes)
 79 | 
 80 |             self.data[self.counter] = temp_sample
 81 |             print('Successfully save the %d-th data item.' % self.counter)
 82 |             self.counter += 1
 83 | 
 84 |         pickle.dump(self.data, open(self.pickle_file_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
 85 | 
 86 |     def read_by_index(self, index):
 87 |         im_buf, flag, bboxes = self.data[index]
 88 |         im = cv2.imdecode(im_buf, cv2.IMREAD_COLOR)
 89 |         return im, flag, bboxes
 90 | 
 91 | 
 92 | def write_file():
 93 |     data_list_file_path = './data_folder/data_list_2019-05-07-14-47-19.txt'
 94 |     adapter = TextListAdapter(data_list_file_path)
 95 | 
 96 |     pickle_file_path = './data_folder/data_2019-05-07-14-47-19.pkl'
 97 |     encode_quality = 90
 98 |     packer = PickleProvider(pickle_file_path, encode_quality, adapter)
 99 |     packer.write()
100 | 
101 | 
102 | def read_file():
103 |     pickle_file_path = './data_folder/data_2019-05-07-14-47-19.pkl'
104 | 
105 |     provider = PickleProvider(pickle_file_path)
106 |     positive_index = provider.positive_index
107 |     negative_index = provider.negative_index
108 |     print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index)))
109 |     # all_index = positive_index+negative_index
110 |     import random
111 |     random.shuffle(positive_index)
112 | 
113 |     for i, index in enumerate(positive_index):
114 |         im, flag, bboxes_numpy = provider.read_by_index(index)
115 |         if isinstance(bboxes_numpy, numpy.ndarray):
116 |             for n in range(bboxes_numpy.shape[0]):
117 |                 cv2.rectangle(im, (bboxes_numpy[n, 0], bboxes_numpy[n, 1]),
118 |                               (bboxes_numpy[n, 0] + bboxes_numpy[n, 2], bboxes_numpy[n, 1] + bboxes_numpy[n, 3]), (0, 255, 0), 1)
119 |         cv2.imshow('im', im)
120 |         cv2.waitKey()
121 | 
122 | 
123 | if __name__ == '__main__':
124 |     write_file()
125 |     # read_file()
126 | 


--------------------------------------------------------------------------------
/face_detection/data_provider_farm/text_list_adapter.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This adapter accepts a text as input which describes the annotated data.
 3 | Each line in text are formatted as:
 4 | [image absolute path],[pos/neg flag],[num of bboxes],[x1],[y1],[width1],[height1],[x2],[y2],[width2],[height2]......
 5 | '''
 6 | 
 7 | import cv2
 8 | import numpy
 9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_data_adapter import DataAdapterBaseclass
10 | 
11 | 
12 | class TextListAdapter(DataAdapterBaseclass):
13 | 
14 |     def __init__(self, data_list_file_path):
15 | 
16 |         DataAdapterBaseclass.__init__(self)
17 |         fin = open(data_list_file_path, 'r')
18 |         self.lines = fin.readlines()
19 |         fin.close()
20 |         self.line_counter = 0
21 | 
22 |     def __del__(self):
23 |         pass
24 | 
25 |     def get_one(self):
26 |         """
27 |         This function use 'yield' to return samples
28 |         """
29 |         while self.line_counter < len(self.lines):
30 | 
31 |             line = self.lines[self.line_counter].strip('\n').split(',')
32 |             if line[1] == '1':  # pos sample
33 |                 assert len(line[3:]) == 4 * int(line[2])
34 | 
35 |             im = cv2.imread(line[0], cv2.IMREAD_UNCHANGED)
36 | 
37 |             if line[1] == '0':
38 |                 yield im, '0'
39 |                 self.line_counter += 1
40 |                 continue
41 | 
42 |             num_bboxes = int(line[2])
43 |             bboxes = []
44 |             for i in range(num_bboxes):
45 |                 x = float(line[3 + i * 4])
46 |                 y = float(line[3 + i * 4 + 1])
47 |                 width = float(line[3 + i * 4 + 2])
48 |                 height = float(line[3 + i * 4 + 3])
49 | 
50 |                 bboxes.append([x, y, width, height])
51 | 
52 |             bboxes = numpy.array(bboxes, dtype=numpy.float32)
53 |             yield im, bboxes
54 | 
55 |             self.line_counter += 1
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     pass
60 | 


--------------------------------------------------------------------------------
/face_detection/demo/demo.py:
--------------------------------------------------------------------------------
  1 | """LFFD Demo."""
  2 | import os, sys
  3 | import argparse
  4 | import cv2
  5 | import time
  6 | import mxnet as mx
  7 | import numpy as np
  8 | 
  9 | sys.path.append("..")
 10 | from accuracy_evaluation import predict
 11 | 
 12 | 
 13 | def parse_args():
 14 |     parser = argparse.ArgumentParser(description='LFFD Demo.')
 15 |     parser.add_argument('--version', type=str, default='v2',
 16 |                         help='The version of pretrained model, now support "v1" and "v2".')
 17 |     parser.add_argument('--mode', type=str, default='image',
 18 |                         help='The format of input data, now support "image" of jpg and "video" of mp4.')
 19 |     parser.add_argument('--use-gpu', type=bool, default=False,
 20 |                         help='Default is cpu.')
 21 |     parser.add_argument('--data', type=str, default='./data',
 22 |                         help='The path of input and output file.')
 23 |     args = parser.parse_args()
 24 |     return args
 25 | 
 26 | 
 27 | def main():
 28 |     args = parse_args()
 29 |     # context list
 30 |     if args.use_gpu:
 31 |         ctx = mx.gpu(0)
 32 |     else:
 33 |         ctx = mx.cpu()
 34 | 
 35 |     if args.version == 'v1':
 36 |         from config_farm import configuration_10_320_20L_5scales_v1 as cfg
 37 | 
 38 |         symbol_file_path = '../symbol_farm/symbol_10_560_25L_8scales_v1_deploy.json'
 39 |         model_file_path = '../saved_model/configuration_10_560_25L_8scales_v1/train_10_560_25L_8scales_v1_iter_1400000.params'
 40 |     elif args.version == 'v2':
 41 |         from config_farm import configuration_10_320_20L_5scales_v2 as cfg
 42 | 
 43 |         symbol_file_path = '../symbol_farm/symbol_10_320_20L_5scales_v2_deploy.json'
 44 |         model_file_path = '../saved_model/configuration_10_320_20L_5scales_v2/train_10_320_20L_5scales_v2_iter_1800000.params'
 45 |     else:
 46 |         raise TypeError('Unsupported LFFD Version.')
 47 | 
 48 |     face_predictor = predict.Predict(mxnet=mx,
 49 |                                      symbol_file_path=symbol_file_path,
 50 |                                      model_file_path=model_file_path,
 51 |                                      ctx=ctx,
 52 |                                      receptive_field_list=cfg.param_receptive_field_list,
 53 |                                      receptive_field_stride=cfg.param_receptive_field_stride,
 54 |                                      bbox_small_list=cfg.param_bbox_small_list,
 55 |                                      bbox_large_list=cfg.param_bbox_large_list,
 56 |                                      receptive_field_center_start=cfg.param_receptive_field_center_start,
 57 |                                      num_output_scales=cfg.param_num_output_scales)
 58 | 
 59 |     if args.mode == 'image':
 60 |         data_folder = args.data
 61 |         file_name_list = [file_name for file_name in os.listdir(data_folder) \
 62 |                           if file_name.lower().endswith('jpg')]
 63 | 
 64 |         for file_name in file_name_list:
 65 |             im = cv2.imread(os.path.join(data_folder, file_name))
 66 | 
 67 |             bboxes, infer_time = face_predictor.predict(im, resize_scale=1, score_threshold=0.6, top_k=10000, \
 68 |                                                         NMS_threshold=0.4, NMS_flag=True, skip_scale_branch_list=[])
 69 | 
 70 |             for bbox in bboxes:
 71 |                 cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
 72 | 
 73 |             # if max(im.shape[:2]) > 1600:
 74 |             #     scale = 1600/max(im.shape[:2])
 75 |             #     im = cv2.resize(im, (0, 0), fx=scale, fy=scale)
 76 |             cv2.imshow('im', im)
 77 |             cv2.waitKey(5000)
 78 |             cv2.imwrite(os.path.join(data_folder, file_name.replace('.jpg', '_result.png')), im)
 79 |     elif args.mode == 'video':
 80 |         # win_name = 'LFFD DEMO'
 81 |         # cv2.namedWindow(win_name, cv2.WINDOW_NORMAL)
 82 |         data_folder = args.data
 83 |         file_name_list = [file_name for file_name in os.listdir(data_folder) \
 84 |                           if file_name.lower().endswith('mp4')]
 85 |         for file_name in file_name_list:
 86 |             out_file = os.path.join(data_folder, file_name.replace('.mp4', '_v2_gpu_result.avi'))
 87 |             cap = cv2.VideoCapture(os.path.join(data_folder, file_name))
 88 |             vid_writer = cv2.VideoWriter(out_file, \
 89 |                                          cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 60, \
 90 |                                          (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), \
 91 |                                           int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))
 92 |             while cv2.waitKey(1) < 0:
 93 |                 ret, frame = cap.read()
 94 |                 if ret:
 95 |                     h, w, c = frame.shape
 96 | 
 97 |                 if not ret:
 98 |                     print("Done processing of %s" % file_name)
 99 |                     print("Output file is stored as %s" % out_file)
100 |                     cv2.waitKey(3000)
101 |                     break
102 | 
103 |                 tic = time.time()
104 |                 bboxes, infer_time = face_predictor.predict(frame, resize_scale=1, score_threshold=0.6, top_k=10000, \
105 |                                                             NMS_threshold=0.4, NMS_flag=True, skip_scale_branch_list=[])
106 |                 toc = time.time()
107 |                 detect_time = (toc - tic) * 1000
108 | 
109 |                 face_num = 0
110 |                 for bbox in bboxes:
111 |                     face_num += 1
112 |                     cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
113 | 
114 |                 computing_platform = 'Computing platform: NVIDIA GPU FP32'
115 |                 cv2.putText(frame, computing_platform, (5, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
116 |                 input_resolution = 'Network input resolution: %sx%s' % (w, h)
117 |                 cv2.putText(frame, input_resolution, (5, 65), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
118 |                 infer_time_info = 'Inference time: %.2f ms' % (infer_time)
119 |                 cv2.putText(frame, infer_time_info, (5, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
120 |                 infer_speed = 'Inference speed: %.2f FPS' % (1000 / infer_time)
121 |                 cv2.putText(frame, infer_speed, (5, 135), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
122 |                 face_num_info = 'Face num: %d' % (face_num)
123 |                 cv2.putText(frame, face_num_info, (5, 170), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
124 | 
125 |                 vid_writer.write(frame.astype(np.uint8))
126 |                 # cv2.imshow(win_name, frame)
127 | 
128 |                 if cv2.waitKey(1) & 0xFF == ord('q'):
129 |                     break
130 | 
131 |             cap.release()
132 |             cv2.destroyAllWindows()
133 |     else:
134 |         raise TypeError('Unsupported File Format.')
135 | 
136 | 
137 | if __name__ == '__main__':
138 |     main()
139 | 


--------------------------------------------------------------------------------
/face_detection/deploy_tensorrt/README.md:
--------------------------------------------------------------------------------
 1 | ## Deployment with TensorRT
 2 | We provide code for deployment with [TensorRT python API](https://developer.nvidia.com/tensorrt).
 3 | In general, once you use NVIDIA GPU in your applications, 
 4 | TensorRT is the best choice for deployment, rather than training frameworks like TensorFlow, PyTorch, MXNet, Caffe...
 5 | 
 6 | ### Prerequirements
 7 | Refer to [inference_speed_evaluation](../inference_speed_evaluation) for details.
 8 | 
 9 | ### Getting Started
10 | 1. usr `to_onnx.py` to generate onnx model file
11 | 2. run `predict_tensorrt.py` to do inference based on the generated model file
12 | 3. after you fully understand the code, you may reform and merge it to your own project.
13 | 
14 | > In most practical cases, C++ is the primary choice for efficient running.
15 | So you can rewrite the code according to the python code structure.
16 | In the future, we will provide C++ version.
17 | 
18 | ### NVIDIA Jetson NANO&TX2 Deployment Instructions
19 | TBD


--------------------------------------------------------------------------------
/face_detection/deploy_tensorrt/debug_image/test1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/deploy_tensorrt/debug_image/test1.jpg


--------------------------------------------------------------------------------
/face_detection/deploy_tensorrt/debug_image/test2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/deploy_tensorrt/debug_image/test2.jpg


--------------------------------------------------------------------------------
/face_detection/deploy_tensorrt/debug_image/test3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/deploy_tensorrt/debug_image/test3.jpg


--------------------------------------------------------------------------------
/face_detection/deploy_tensorrt/debug_image/test5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/deploy_tensorrt/debug_image/test5.jpg


--------------------------------------------------------------------------------
/face_detection/deploy_tensorrt/debug_image/test6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/deploy_tensorrt/debug_image/test6.jpg


--------------------------------------------------------------------------------
/face_detection/deploy_tensorrt/to_onnx.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import numpy
 3 | import sys
 4 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python')  # add mxnet python path if need
 5 | import mxnet
 6 | from mxnet.contrib import onnx as onnx_mxnet
 7 | from onnx import checker
 8 | import onnx
 9 | 
10 | 
11 | def generate_onnx_file():
12 |     logging.basicConfig(level=logging.INFO)
13 | 
14 |     # set the proper symbol path, param path and onnx path
15 |     symbol_path = '../symbol_farm/symbol_10_320_20L_5scales_v2_deploy.json'
16 |     param_path = '../saved_model/configuration_10_320_20L_5scales_v2/train_10_320_20L_5scales_v2_iter_1800000.params'
17 |     onnx_path = './onnx_files/v2.onnx'
18 | 
19 |     net_symbol = mxnet.symbol.load(symbol_path)
20 |     net_params_raw = mxnet.nd.load(param_path)
21 |     net_params = dict()
22 |     for k, v in net_params_raw.items():
23 |         tp, name = k.split(':', 1)
24 |         net_params.update({name: v})
25 | 
26 |     input_shape = (1, 3, 480, 640)  # CAUTION: in TensorRT, the input size cannot be changed dynamically, so you must set it here.
27 | 
28 |     onnx_mxnet.export_model(net_symbol, net_params, [input_shape], numpy.float32, onnx_path, verbose=True)
29 | 
30 |     # Load onnx model
31 |     model_proto = onnx.load_model(onnx_path)
32 | 
33 |     # Check if converted ONNX protobuf is valid
34 |     checker.check_graph(model_proto.graph)
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     generate_onnx_file()
39 | 


--------------------------------------------------------------------------------
/face_detection/inference_speed_evaluation/README.md:
--------------------------------------------------------------------------------
 1 | ## Inference Speed Evaluation
 2 | 
 3 | ### Update History
 4 | * `2019.8.1` inference python code for MXNet-cudnn and TensorRT-cudnn is online.
 5 | 
 6 | ### Additional Prerequirements
 7 | * [onnx](https://onnx.ai/) (pip3 install onnx==1.3.0)
 8 | * [pycuda](https://developer.nvidia.com/pycuda) (pip3 install pycuda==2019.1.1 or [install guide](https://pypi.org/project/pycuda/))
 9 | * [tensorrt](https://developer.nvidia.com/tensorrt) =5.x (use pip3 to install the corresponding .whl file in python folder)
10 | 
11 | > CAUTION:
12 | >
13 | > Carefully check the version compatible between CUDA, CUDNN, pycuda, TensorRT and onnx.
14 | 
15 | 
16 | ### Getting Started
17 | 1. (optional) temporally add mxnet python path to env if mxnet is not globally set
18 | 2. set `eval_with_mxnet_flag` to True to evaluate with mxnet with cudnn, or with tensorrt with cudnn (cannot run both at the same time due to some conflicts)
19 | 3. set `symbol_file_path`, `input_shape` and `real_run_loops`
20 | 4. run the script


--------------------------------------------------------------------------------
/face_detection/inference_speed_evaluation/inference_speed_eval.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import sys
 3 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python') # append mxnet python path if need
 4 | sys.path.append('../..')
 5 | import mxnet
 6 | 
 7 | eval_with_mxnet_flag = False
 8 | symbol_file_path = '../symbol_farm/symbol_10_320_20L_5scales_v2_deploy.json'
 9 | input_shape = (1, 3, 480, 640)  # (1,3,240,320) (1,3,480,640) (1,3,720,1280) (1,3,1080,1920) (1,3,2160,3840)
10 | 
11 | if eval_with_mxnet_flag:
12 |     from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_mxnet_cudnn import InferenceSpeedEval as InferenceSpeedEvalMXNet
13 | 
14 |     inferenceSpeedEvalMXNet = InferenceSpeedEvalMXNet(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, device_type='gpu', gpu_index=0)
15 |     inferenceSpeedEvalMXNet.run_speed_eval(warm_run_loops=10, real_run_loops=200)
16 | 
17 | else:
18 |     from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_tensorrt_cudnn import InferenceSpeedEval as InferenceSpeedEvalTRT
19 | 
20 |     inferenceSpeedEvalTRT = InferenceSpeedEvalTRT(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape)
21 |     inferenceSpeedEvalTRT.run_speed_eval(warm_run_loops=10, real_run_loops=200)
22 | 


--------------------------------------------------------------------------------
/face_detection/metric_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/metric_farm/__init__.py


--------------------------------------------------------------------------------
/face_detection/metric_farm/metric_default.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy
 4 | import torch
 5 | 
 6 | 
 7 | class Metric:
 8 |     def __init__(self, num_scales):
 9 |         self.num_scales = num_scales
10 |         self.sum_metric = [0.0 for i in range(num_scales * 2)]
11 |         self.num_update = 0
12 |         self.multiply_factor = 10000
13 | 
14 |     def update(self, loss_branch):
15 |         for i in range(self.num_scales):
16 |             loss_score = loss_branch[i * 2]
17 |             loss_bbox = loss_branch[i * 2 + 1]
18 | 
19 |             self.sum_metric[i * 2] += loss_score
20 |             self.sum_metric[i * 2 + 1] += loss_bbox
21 | 
22 |         self.num_update += 1
23 | 
24 |     def get(self):
25 |         return_string_list = []
26 |         for i in range(self.num_scales):
27 |             return_string_list.append('cls_loss_score_' + str(i))
28 |             return_string_list.append('reg_loss_bbox_' + str(i))
29 | 
30 |         return return_string_list, [m / self.num_update * self.multiply_factor for i, m in enumerate(self.sum_metric)]
31 | 
32 |     def reset(self):
33 |         self.sum_metric = [0.0 for i in range(self.num_scales * 2)]
34 |         self.num_update = 0
35 | 


--------------------------------------------------------------------------------
/face_detection/net_farm/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @date         : 18-8-19
3 | # @author       : MindBreaker
4 | # @module       :


--------------------------------------------------------------------------------
/face_detection/net_farm/naivenet_structures.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/net_farm/naivenet_structures.xlsx


--------------------------------------------------------------------------------
/face_detection/qualitative_results/v1_qualitative_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/qualitative_results/v1_qualitative_1.jpg


--------------------------------------------------------------------------------
/face_detection/qualitative_results/v1_qualitative_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/qualitative_results/v1_qualitative_2.jpg


--------------------------------------------------------------------------------
/face_detection/qualitative_results/v1_qualitative_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/qualitative_results/v1_qualitative_3.jpg


--------------------------------------------------------------------------------
/face_detection/qualitative_results/v1_qualitative_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/qualitative_results/v1_qualitative_4.jpg


--------------------------------------------------------------------------------
/face_detection/qualitative_results/v1_qualitative_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/qualitative_results/v1_qualitative_5.jpg


--------------------------------------------------------------------------------
/face_detection/saved_model/configuration_10_320_20L_5scales_v2/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/saved_model/configuration_10_320_20L_5scales_v2/.gitkeep


--------------------------------------------------------------------------------
/face_detection/saved_model/configuration_10_560_25L_8scales_v1/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/face_detection/saved_model/configuration_10_560_25L_8scales_v1/.gitkeep


--------------------------------------------------------------------------------
/head_detection/README.md:
--------------------------------------------------------------------------------
 1 | ## Head Detection
 2 | We use the brainwash dataset introduced by paper [End-to-end people detection in crowded scenes](https://arxiv.org/abs/1506.04878).
 3 | 
 4 | ### Recent Update
 5 | * `2019.09.23` model v1 for brainwash dataset is released.
 6 | * `2019.09.26` brainwash dataset (and packed pkl) is uploaded for downloading.
 7 | 
 8 | ### Brief Introduction to Model Version
 9 | * v1 - is designed for brainwash dataset, covering head scale [10, 160]. It has 4 branches. Please check 
10 | `./symbol_farm/symbol_structures.xlsx` for details.
11 | 
12 | ### Inference Latency
13 | 
14 | * Platform info: NVIDIA RTX 2080TI, CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0
15 | 
16 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160|7680×4320
17 | -------------|-------|-------|--------|---------|---------|---------
18 | v1|0.83ms(1198.38FPS)|1.91ms(524.14FPS)|4.83ms(206.92FPS)|10.62ms(94.19FPS)|42.28ms(23.65FPS)|166.81ms(5.99FPS)
19 | 
20 | * Platform info: NVIDIA GTX 1060(laptop), CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0
21 | 
22 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160
23 | -------------|-------|-------|--------|---------|---------
24 | v1|1.62ms(618.53FPS)|4.83ms(207.06FPS)|13.67ms(73.18FPS)|30.01ms(33.32FPS)|121.15ms(8.25FPS)
25 | 
26 | > CAUTION: The latency may vary even in the same setting.
27 | 
28 | ### Accuracy on Brainwash Dataset
29 | We train v1 on the training set (10769 images with 81975 annotated heads) and evaluate on the test set (500 images with 5007 
30 | annotated heads). This dataset is relatively simple due to monotonous scenario.
31 | 
32 | #### Quantitative Results on Test Set
33 | Average Precision (AP) is used for measuring the accuracy. In detail, we use code [Object-Detection-Metrics](https://github.com/rafaelpadilla/Object-Detection-Metrics)
34 | for calculating the AP metric. The following table presents the results:
35 | 
36 | Method|AP
37 | --------|------
38 | ReInspect, Lhungarian [1]|0.78
39 | FCHD [2]|0.70
40 | v1 (our)|0.91
41 | 
42 | >[1] [End-to-end people detection in crowded scenes](https://arxiv.org/abs/1506.04878)
43 | >
44 | >[2] [FCHD: Fast and accurate head detection in crowded scenes](https://arxiv.org/abs/1809.08766)
45 | 
46 | The v1 significantly outperforms the existing methods.
47 | 
48 | #### Some Qualitative Results on Test Set
49 | ![image](./accuracy_evaluation/test_images/2.jpg)
50 | ![image](./accuracy_evaluation/test_images/72.jpg)
51 | ![image](./accuracy_evaluation/test_images/322.jpg)
52 | ![image](./accuracy_evaluation/test_images/411.jpg)
53 | 
54 | ### User Instructions
55 | Please refer to [README in face_detection](../face_detection/README.md) for details.
56 | 
57 | ### Data Download
58 | We provide original and packed data of brainwash dataset. We provide three ways to download the packed data:
59 | * [Baidu Yunpan](https://pan.baidu.com/s/1VdiXHhtw9aNaU1E9PhVwtQ) (pwd:zvma)
60 | * [MS OneDrive]
61 | * [Google Drive]


--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/evaluation_on_brainwash.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import sys
  4 | import cv2
  5 | import math
  6 | import re
  7 | 
  8 | sys.path.append('..')
  9 | # change the config as your need
 10 | from config_farm import configuration_10_160_17L_4scales_v1 as cfg
 11 | import mxnet
 12 | from predict import Predict
 13 | 
 14 | 
 15 | def generate_gt_files():
 16 |     txt_file_path = '/media/heyonghao/HYH-4T-WD/public_dataset/head_detection/brainwash/brainwash/brainwash_test.idl'
 17 |     gt_file_root = './brainwash_testset_gt_files_for_evaluation'
 18 | 
 19 |     if not os.path.exists(gt_file_root):
 20 |         os.makedirs(gt_file_root)
 21 | 
 22 |     fin = open(txt_file_path, 'r')
 23 | 
 24 |     counter = 0
 25 |     for line in fin:
 26 |         line = line.strip(';\n')
 27 |         im_path = re.findall('["](.*?)["]', line)[0]
 28 | 
 29 |         bbox_str_list = re.findall('[(](.*?)[)]', line)
 30 |         bbox_list = []
 31 |         for bbox_str in bbox_str_list:
 32 |             bbox_str = bbox_str.split(', ')
 33 |             xmin = int(float(bbox_str[0]))
 34 |             ymin = int(float(bbox_str[1]))
 35 |             xmax = int(float(bbox_str[2]))
 36 |             ymax = int(float(bbox_str[3]))
 37 |             bbox_list.append((xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
 38 | 
 39 |         if len(bbox_list) != 0:
 40 |             gt_file_name = im_path.replace('/', '_')
 41 |             gt_file_name = gt_file_name.replace('png', 'txt')
 42 |             fout = open(os.path.join(gt_file_root, gt_file_name), 'w')
 43 |             for bbox in bbox_list:
 44 |                 line_str = 'head ' + str(bbox[0]) + ' ' + str(bbox[1]) + ' ' + str(bbox[2]) + ' ' + str(bbox[3])
 45 |                 fout.write(line_str + '\n')
 46 |             fout.close()
 47 |             counter += 1
 48 |             print(counter)
 49 |     fin.close()
 50 | 
 51 | 
 52 | def generate_predicted_files():
 53 |     # set the proper symbol file and model file
 54 |     symbol_file_path = '../symbol_farm/symbol_10_160_17L_4scales_v1_deploy.json'
 55 |     model_file_path = '../saved_model/configuration_10_160_17L_4scales_v1_2019-09-20-13-08-26/train_10_160_17L_4scales_v1_iter_800000.params'
 56 |     my_predictor = Predict(mxnet=mxnet,
 57 |                            symbol_file_path=symbol_file_path,
 58 |                            model_file_path=model_file_path,
 59 |                            ctx=mxnet.gpu(0),
 60 |                            receptive_field_list=cfg.param_receptive_field_list,
 61 |                            receptive_field_stride=cfg.param_receptive_field_stride,
 62 |                            bbox_small_list=cfg.param_bbox_small_list,
 63 |                            bbox_large_list=cfg.param_bbox_large_list,
 64 |                            receptive_field_center_start=cfg.param_receptive_field_center_start,
 65 |                            num_output_scales=cfg.param_num_output_scales)
 66 | 
 67 |     # set the val root, the path should look like XXXX/WIDER_val/images
 68 |     txt_file_path = '/media/heyonghao/HYH-4T-WD/public_dataset/head_detection/brainwash/brainwash/brainwash_test.idl'
 69 |     image_root = '/media/heyonghao/HYH-4T-WD/public_dataset/head_detection/brainwash/brainwash'
 70 |     predicted_file_root = './brainwash_testset_predicted_files_for_evaluation_' + os.path.basename(model_file_path).split('.')[0]
 71 | 
 72 |     if not os.path.exists(predicted_file_root):
 73 |         os.makedirs(predicted_file_root)
 74 | 
 75 |     fin = open(txt_file_path, 'r')
 76 | 
 77 |     resize_scale = 1
 78 |     score_threshold = 0.05
 79 |     NMS_threshold = 0.6
 80 |     counter = 0
 81 | 
 82 |     for line in fin:
 83 |         line = line.strip(';\n')
 84 |         im_path = re.findall('["](.*?)["]', line)[0]
 85 | 
 86 |         im = cv2.imread(os.path.join(image_root, im_path), cv2.IMREAD_COLOR)
 87 | 
 88 |         bboxes = my_predictor.predict(im, resize_scale=resize_scale, score_threshold=score_threshold, top_k=10000, NMS_threshold=NMS_threshold)
 89 | 
 90 |         # for bbox in bboxes:
 91 |         #     cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 1)
 92 |         # cv2.imshow('im',im)
 93 |         # cv2.waitKey()
 94 |         predicted_file_name = im_path.replace('/', '_')
 95 |         predicted_file_name = predicted_file_name.replace('png', 'txt')
 96 |         fout = open(os.path.join(predicted_file_root, predicted_file_name), 'w')
 97 |         for bbox in bboxes:
 98 |             fout.write('head %.03f %d %d %d %d' % (bbox[4] if bbox[4] <= 1 else 1, math.floor(bbox[0]), math.floor(bbox[1]), math.ceil(bbox[2] - bbox[0]), math.ceil(bbox[3] - bbox[1])) + '\n')
 99 |         fout.close()
100 |         counter += 1
101 |         print('[%d] is processed.' % counter)
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     # generate_gt_files()
106 |     generate_predicted_files()
107 | 


--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/predict.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import sys
  3 | import os
  4 | import numpy
  5 | import cv2
  6 | 
  7 | 
  8 | # empty data batch class for dynamical properties
  9 | class DataBatch:
 10 |     pass
 11 | 
 12 | 
 13 | def NMS(boxes, overlap_threshold):
 14 |     '''
 15 | 
 16 |     :param boxes: numpy nx5, n is the number of boxes, 0:4->x1, y1, x2, y2, 4->score
 17 |     :param overlap_threshold:
 18 |     :return:
 19 |     '''
 20 |     if boxes.shape[0] == 0:
 21 |         return boxes
 22 | 
 23 |     # if the bounding boxes integers, convert them to floats --
 24 |     # this is important since we'll be doing a bunch of divisions
 25 |     if boxes.dtype != numpy.float32:
 26 |         boxes = boxes.astype(numpy.float32)
 27 | 
 28 |     # initialize the list of picked indexes
 29 |     pick = []
 30 |     # grab the coordinates of the bounding boxes
 31 |     x1 = boxes[:, 0]
 32 |     y1 = boxes[:, 1]
 33 |     x2 = boxes[:, 2]
 34 |     y2 = boxes[:, 3]
 35 |     sc = boxes[:, 4]
 36 |     widths = x2 - x1
 37 |     heights = y2 - y1
 38 | 
 39 |     # compute the area of the bounding boxes and sort the bounding
 40 |     # boxes by the bottom-right y-coordinate of the bounding box
 41 |     area = heights * widths
 42 |     idxs = numpy.argsort(sc)  # 从小到大排序
 43 | 
 44 |     # keep looping while some indexes still remain in the indexes list
 45 |     while len(idxs) > 0:
 46 |         # grab the last index in the indexes list and add the
 47 |         # index value to the list of picked indexes
 48 |         last = len(idxs) - 1
 49 |         i = idxs[last]
 50 |         pick.append(i)
 51 | 
 52 |         # compare secend highest score boxes
 53 |         xx1 = numpy.maximum(x1[i], x1[idxs[:last]])
 54 |         yy1 = numpy.maximum(y1[i], y1[idxs[:last]])
 55 |         xx2 = numpy.minimum(x2[i], x2[idxs[:last]])
 56 |         yy2 = numpy.minimum(y2[i], y2[idxs[:last]])
 57 | 
 58 |         # compute the width and height of the bo（ box
 59 |         w = numpy.maximum(0, xx2 - xx1 + 1)
 60 |         h = numpy.maximum(0, yy2 - yy1 + 1)
 61 | 
 62 |         # compute the ratio of overlap
 63 |         overlap = (w * h) / area[idxs[:last]]
 64 | 
 65 |         # delete all indexes from the index list that have
 66 |         idxs = numpy.delete(idxs, numpy.concatenate(([last], numpy.where(overlap > overlap_threshold)[0])))
 67 | 
 68 |     # return only the bounding boxes that were picked using the
 69 |     # integer data type
 70 |     return boxes[pick]
 71 | 
 72 | 
 73 | class Predict(object):
 74 | 
 75 |     def __init__(self,
 76 |                  mxnet,
 77 |                  symbol_file_path,
 78 |                  model_file_path,
 79 |                  ctx,
 80 |                  receptive_field_list,
 81 |                  receptive_field_stride,
 82 |                  bbox_small_list,
 83 |                  bbox_large_list,
 84 |                  receptive_field_center_start,
 85 |                  num_output_scales
 86 |                  ):
 87 |         self.mxnet = mxnet
 88 |         self.symbol_file_path = symbol_file_path
 89 |         self.model_file_path = model_file_path
 90 |         self.ctx = ctx
 91 | 
 92 |         self.receptive_field_list = receptive_field_list
 93 |         self.receptive_field_stride = receptive_field_stride
 94 |         self.bbox_small_list = bbox_small_list
 95 |         self.bbox_large_list = bbox_large_list
 96 |         self.receptive_field_center_start = receptive_field_center_start
 97 |         self.num_output_scales = num_output_scales
 98 |         self.constant = [i / 2.0 for i in self.receptive_field_list]
 99 |         self.input_height = 480
100 |         self.input_width = 640
101 |         self.__load_model()
102 | 
103 |     def __load_model(self):
104 |         # load symbol and parameters
105 |         print('----> load symbol file: %s\n----> load model file: %s' % (self.symbol_file_path, self.model_file_path))
106 |         if not os.path.exists(self.symbol_file_path):
107 |             print('The symbol file does not exist!!!!')
108 |             sys.exit(1)
109 |         if not os.path.exists(self.model_file_path):
110 |             print('The model file does not exist!!!!')
111 |             sys.exit(1)
112 |         self.symbol_net = self.mxnet.symbol.load(self.symbol_file_path)
113 |         data_name = 'data'
114 |         data_name_shape = (data_name, (1, 3, self.input_height, self.input_width))
115 |         self.module = self.mxnet.module.Module(symbol=self.symbol_net,
116 |                                                data_names=[data_name],
117 |                                                label_names=None,
118 |                                                context=self.ctx,
119 |                                                work_load_list=None)
120 |         self.module.bind(data_shapes=[data_name_shape],
121 |                          for_training=False)
122 | 
123 |         save_dict = self.mxnet.nd.load(self.model_file_path)
124 |         self.arg_name_arrays = dict()
125 |         self.arg_name_arrays['data'] = self.mxnet.nd.zeros((1, 3, self.input_height, self.input_width), self.ctx)
126 |         self.aux_name_arrays = {}
127 |         for k, v in save_dict.items():
128 |             tp, name = k.split(':', 1)
129 |             if tp == 'arg':
130 |                 self.arg_name_arrays.update({name: v.as_in_context(self.ctx)})
131 |             if tp == 'aux':
132 |                 self.aux_name_arrays.update({name: v.as_in_context(self.ctx)})
133 |         self.module.init_params(arg_params=self.arg_name_arrays,
134 |                                 aux_params=self.aux_name_arrays,
135 |                                 allow_missing=True)
136 |         print('----> Model is loaded successfully.')
137 | 
138 |     def predict(self, image, resize_scale=1, score_threshold=0.8, top_k=100, NMS_threshold=0.3, NMS_flag=True, skip_scale_branch_list=[]):
139 | 
140 |         if image.ndim != 3 or image.shape[2] != 3:
141 |             print('Only RGB images are supported.')
142 |             return None
143 | 
144 |         bbox_collection = []
145 | 
146 |         shorter_side = min(image.shape[:2])
147 |         if shorter_side * resize_scale < 128:
148 |             resize_scale = float(128) / shorter_side
149 | 
150 |         input_image = cv2.resize(image, (0, 0), fx=resize_scale, fy=resize_scale)
151 | 
152 |         input_image = input_image.astype(dtype=numpy.float32)
153 |         input_image = input_image[:, :, :, numpy.newaxis]
154 |         input_image = input_image.transpose([3, 2, 0, 1])
155 | 
156 |         data_batch = DataBatch()
157 |         data_batch.data = [self.mxnet.ndarray.array(input_image, self.ctx)]
158 | 
159 |         self.module.forward(data_batch=data_batch, is_train=False)
160 |         results = self.module.get_outputs()
161 |         outputs = []
162 |         for output in results:
163 |             outputs.append(output.asnumpy())
164 | 
165 |         for i in range(self.num_output_scales):
166 |             if i in skip_scale_branch_list:
167 |                 continue
168 | 
169 |             score_map = numpy.squeeze(outputs[i * 2], (0, 1))
170 | 
171 |             # score_map_show = score_map * 255
172 |             # score_map_show[score_map_show < 0] = 0
173 |             # score_map_show[score_map_show > 255] = 255
174 |             # cv2.imshow('score_map' + str(i), cv2.resize(score_map_show.astype(dtype=numpy.uint8), (0, 0), fx=2, fy=2))
175 |             # cv2.waitKey()
176 | 
177 |             bbox_map = numpy.squeeze(outputs[i * 2 + 1], 0)
178 | 
179 |             RF_center_Xs = numpy.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * x for x in range(score_map.shape[1])])
180 |             RF_center_Xs_mat = numpy.tile(RF_center_Xs, [score_map.shape[0], 1])
181 |             RF_center_Ys = numpy.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * y for y in range(score_map.shape[0])])
182 |             RF_center_Ys_mat = numpy.tile(RF_center_Ys, [score_map.shape[1], 1]).T
183 | 
184 |             x_lt_mat = RF_center_Xs_mat - bbox_map[0, :, :] * self.constant[i]
185 |             y_lt_mat = RF_center_Ys_mat - bbox_map[1, :, :] * self.constant[i]
186 |             x_rb_mat = RF_center_Xs_mat - bbox_map[2, :, :] * self.constant[i]
187 |             y_rb_mat = RF_center_Ys_mat - bbox_map[3, :, :] * self.constant[i]
188 | 
189 |             x_lt_mat = x_lt_mat / resize_scale
190 |             x_lt_mat[x_lt_mat < 0] = 0
191 |             y_lt_mat = y_lt_mat / resize_scale
192 |             y_lt_mat[y_lt_mat < 0] = 0
193 |             x_rb_mat = x_rb_mat / resize_scale
194 |             x_rb_mat[x_rb_mat > image.shape[1]] = image.shape[1]
195 |             y_rb_mat = y_rb_mat / resize_scale
196 |             y_rb_mat[y_rb_mat > image.shape[0]] = image.shape[0]
197 | 
198 |             select_index = numpy.where(score_map > score_threshold)
199 |             for idx in range(select_index[0].size):
200 |                 bbox_collection.append((x_lt_mat[select_index[0][idx], select_index[1][idx]],
201 |                                         y_lt_mat[select_index[0][idx], select_index[1][idx]],
202 |                                         x_rb_mat[select_index[0][idx], select_index[1][idx]],
203 |                                         y_rb_mat[select_index[0][idx], select_index[1][idx]],
204 |                                         score_map[select_index[0][idx], select_index[1][idx]]))
205 | 
206 |         # NMS
207 |         bbox_collection = sorted(bbox_collection, key=lambda item: item[-1], reverse=True)
208 |         if len(bbox_collection) > top_k:
209 |             bbox_collection = bbox_collection[0:top_k]
210 |         bbox_collection_numpy = numpy.array(bbox_collection, dtype=numpy.float32)
211 | 
212 |         if NMS_flag:
213 |             final_bboxes = NMS(bbox_collection_numpy, NMS_threshold)
214 |             final_bboxes_ = []
215 |             for i in range(final_bboxes.shape[0]):
216 |                 final_bboxes_.append((final_bboxes[i, 0], final_bboxes[i, 1], final_bboxes[i, 2], final_bboxes[i, 3], final_bboxes[i, 4]))
217 | 
218 |             return final_bboxes_
219 |         else:
220 |             return bbox_collection_numpy
221 | 
222 | 
223 | def run_prediction_pickle():
224 |     from config_farm import configuration_10_160_17L_4scales_v1 as cfg
225 |     import mxnet
226 | 
227 |     data_pickle_file_path = '../data_provider_farm/data_folder/data_list_brainwash_test.pkl'
228 |     from data_provider_farm.pickle_provider import PickleProvider
229 |     pickle_provider = PickleProvider(data_pickle_file_path)
230 |     positive_index = pickle_provider.positive_index
231 |     negative_index = pickle_provider.negative_index
232 |     all_index = positive_index
233 |     print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index)))
234 |     # import random
235 |     # random.shuffle(all_index)
236 | 
237 |     symbol_file_path = '../symbol_farm/symbol_10_160_17L_4scales_v1_deploy.json'
238 |     model_file_path = '../saved_model/configuration_10_160_17L_4scales_v1_2019-09-20-13-08-26/train_10_160_17L_4scales_v1_iter_800000.params'
239 |     my_predictor = Predict(mxnet=mxnet,
240 |                            symbol_file_path=symbol_file_path,
241 |                            model_file_path=model_file_path,
242 |                            ctx=mxnet.gpu(0),
243 |                            receptive_field_list=cfg.param_receptive_field_list,
244 |                            receptive_field_stride=cfg.param_receptive_field_stride,
245 |                            bbox_small_list=cfg.param_bbox_small_list,
246 |                            bbox_large_list=cfg.param_bbox_large_list,
247 |                            receptive_field_center_start=cfg.param_receptive_field_center_start,
248 |                            num_output_scales=cfg.param_num_output_scales)
249 | 
250 |     for idx in all_index:
251 |         im, _, bboxes_gt = pickle_provider.read_by_index(idx)
252 | 
253 |         bboxes = my_predictor.predict(im, resize_scale=1, score_threshold=0.5, top_k=10000, NMS_threshold=0.6)
254 |         for bbox in bboxes:
255 |             cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
256 | 
257 |         cv2.imshow('im', im)
258 |         key = cv2.waitKey()
259 |         # if key & 0xFF == ord('s'):
260 |         #     cv2.imwrite('./test_images/' + str(idx) + '.jpg', im)
261 | 
262 | 
263 | if __name__ == '__main__':
264 |     run_prediction_pickle()
265 | 
266 | 


--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/2.jpg


--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/247.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/247.jpg


--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/322.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/322.jpg


--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/342.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/342.jpg


--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/377.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/377.jpg


--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/411.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/411.jpg


--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/5.jpg


--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/7.jpg


--------------------------------------------------------------------------------
/head_detection/accuracy_evaluation/test_images/72.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/accuracy_evaluation/test_images/72.jpg


--------------------------------------------------------------------------------
/head_detection/config_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/config_farm/__init__.py


--------------------------------------------------------------------------------
/head_detection/data_iterator_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/data_iterator_farm/__init__.py


--------------------------------------------------------------------------------
/head_detection/data_provider_farm/pickle_provider.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This provider accepts an adapter, save dataset in pickle file and load all dataset to memory for data iterators
  3 | '''
  4 | 
  5 | import cv2
  6 | import numpy
  7 | import pickle
  8 | 
  9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_provider import ProviderBaseclass
 10 | from .text_list_adapter import TextListAdapter
 11 | 
 12 | 
 13 | class PickleProvider(ProviderBaseclass):
 14 |     """
 15 |     This class provides methods to save and read data.
 16 |     By default, images are compressed using JPG format.
 17 |     If data_adapter is not None, it means saving data, or it is reading data
 18 |     """
 19 | 
 20 |     def __init__(self,
 21 |                  pickle_file_path,
 22 |                  encode_quality=90,
 23 |                  data_adapter=None):
 24 |         ProviderBaseclass.__init__(self)
 25 | 
 26 |         if data_adapter:  # write data
 27 | 
 28 |             self.data_adapter = data_adapter
 29 |             self.data = {}
 30 |             self.counter = 0
 31 |             self.pickle_file_path = pickle_file_path
 32 | 
 33 |         else:  # read data
 34 | 
 35 |             self.data = pickle.load(open(pickle_file_path, 'rb'))
 36 |             # get positive and negative indeices
 37 |             self._positive_index = []
 38 |             self._negative_index = []
 39 |             for k, v in self.data.items():
 40 |                 if v[1] == 0:  # negative
 41 |                     self._negative_index.append(k)
 42 |                 else:  # positive
 43 |                     self._positive_index.append(k)
 44 | 
 45 |         self.compression_mode = '.jpg'
 46 |         self.encode_params = [cv2.IMWRITE_JPEG_QUALITY, encode_quality]
 47 | 
 48 |     @property
 49 |     def positive_index(self):
 50 |         return self._positive_index
 51 | 
 52 |     @property
 53 |     def negative_index(self):
 54 |         return self._negative_index
 55 | 
 56 |     def write(self):
 57 | 
 58 |         for data_item in self.data_adapter.get_one():
 59 | 
 60 |             temp_sample = []
 61 |             im, bboxes = data_item
 62 |             ret, buf = cv2.imencode(self.compression_mode, im, self.encode_params)
 63 |             if buf is None or buf.size == 0:
 64 |                 print('buf is wrong.')
 65 |                 continue
 66 |             if not ret:
 67 |                 print('An error is occurred.')
 68 |                 continue
 69 |             temp_sample.append(buf)
 70 | 
 71 |             if isinstance(bboxes, str):  # 负样本
 72 |                 temp_sample.append(0)
 73 |                 temp_sample.append(int(bboxes))
 74 |             else:
 75 |                 temp_sample.append(1)
 76 |                 temp_sample.append(bboxes)
 77 | 
 78 |             self.data[self.counter] = temp_sample
 79 |             print('Successfully save the %d-th data item.' % self.counter)
 80 |             self.counter += 1
 81 | 
 82 |         pickle.dump(self.data, open(self.pickle_file_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
 83 | 
 84 |     def read_by_index(self, index):
 85 |         im_buf, flag, bboxes = self.data[index]
 86 |         im = cv2.imdecode(im_buf, cv2.IMREAD_COLOR)
 87 |         return im, flag, bboxes
 88 | 
 89 | 
 90 | def write_file():
 91 |     data_list_file_path = './data_folder/data_list_brainwash_test.txt'
 92 |     adapter = TextListAdapter(data_list_file_path)
 93 | 
 94 |     pickle_file_path = './data_folder/data_list_brainwash_test.pkl'
 95 |     encode_quality = 90
 96 |     packer = PickleProvider(pickle_file_path, encode_quality, adapter)
 97 |     packer.write()
 98 | 
 99 | 
100 | def read_file():
101 |     pickle_file_path = './data_folder/data_list_brainwash_test.pkl'
102 | 
103 |     provider = PickleProvider(pickle_file_path)
104 |     positive_index = provider.positive_index
105 |     negative_index = provider.negative_index
106 |     print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index)))
107 |     # all_index = positive_index+negative_index
108 |     import random
109 |     random.shuffle(positive_index)
110 | 
111 |     for i, index in enumerate(positive_index):
112 |         im, flag, bboxes_numpy = provider.read_by_index(index)
113 |         if isinstance(bboxes_numpy, numpy.ndarray):
114 |             for n in range(bboxes_numpy.shape[0]):
115 |                 cv2.rectangle(im, (bboxes_numpy[n, 0], bboxes_numpy[n, 1]),
116 |                               (bboxes_numpy[n, 0] + bboxes_numpy[n, 2], bboxes_numpy[n, 1] + bboxes_numpy[n, 3]), (0, 255, 0), 1)
117 |         cv2.imshow('im', im)
118 |         cv2.waitKey()
119 | 
120 | 
121 | if __name__ == '__main__':
122 |     # write_file()
123 |     read_file()
124 | 


--------------------------------------------------------------------------------
/head_detection/data_provider_farm/reformat_brainwash.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import os
  3 | import cv2
  4 | import json
  5 | import math
  6 | import re
  7 | 
  8 | '''
  9 | '''
 10 | 
 11 | 
 12 | def generate_data_list():
 13 |     txt_file_path = '/media/heyonghao/HYH-4T-WD/public_dataset/head_detection/brainwash/brainwash/brainwash_test.idl'
 14 |     image_root = '/media/heyonghao/HYH-4T-WD/public_dataset/head_detection/brainwash/brainwash'
 15 | 
 16 |     list_file_path = './data_folder/data_list_brainwash_test.txt'
 17 |     if not os.path.exists(os.path.dirname(list_file_path)):
 18 |         os.makedirs(os.path.dirname(list_file_path))
 19 |     fin = open(txt_file_path, 'r')
 20 |     fout = open(list_file_path, 'w')
 21 | 
 22 |     counter = 0
 23 |     for line in fin:
 24 |         line = line.strip(';\n')
 25 |         im_path = re.findall('["](.*?)["]', line)[0]
 26 |         im_path = os.path.join(image_root, im_path)
 27 |         if not os.path.exists(im_path):
 28 |             print('im file does not exist : %s'%im_path)
 29 |             continue
 30 |         bbox_str_list = re.findall('[(](.*?)[)]', line)
 31 |         bbox_list = []
 32 |         for bbox_str in bbox_str_list:
 33 |             bbox_str = bbox_str.split(', ')
 34 |             xmin = int(float(bbox_str[0]))
 35 |             ymin = int(float(bbox_str[1]))
 36 |             xmax = int(float(bbox_str[2]))
 37 |             ymax = int(float(bbox_str[3]))
 38 |             bbox_list.append((xmin, ymin, xmax-xmin+1, ymax-ymin+1))
 39 | 
 40 |         if len(bbox_list) == 0:
 41 |             line_str = im_path+',0,0'
 42 |             fout.write(line_str+'\n')
 43 |         else:
 44 |             line_str = im_path+',1,'+str(len(bbox_list))
 45 |             for bbox in bbox_list:
 46 |                 line_str += ','+str(bbox[0])+','+str(bbox[1])+','+str(bbox[2])+','+str(bbox[3])
 47 |             fout.write(line_str + '\n')
 48 |         counter += 1
 49 |         print(counter)
 50 | 
 51 |     fout.close()
 52 |     fin.close()
 53 | 
 54 | 
 55 | def show_image():
 56 |     list_file_path = './data_folder/data_list_brainwash_test.txt'
 57 | 
 58 |     fin = open(list_file_path, 'r')
 59 |     lines = fin.readlines()
 60 |     fin.close()
 61 | 
 62 |     import random
 63 |     random.shuffle(lines)
 64 |     for line in lines:
 65 |         line = line.strip('\n').split(',')
 66 | 
 67 |         im = cv2.imread(line[0])
 68 | 
 69 |         bboxes = []
 70 |         num_bboxes = int(line[2])
 71 |         for i in range(num_bboxes):
 72 |             xmin = int(line[3 + i * 4])
 73 |             ymin = int(line[4 + i * 4])
 74 |             width = int(line[5 + i * 4])
 75 |             height = int(line[6 + i * 4])
 76 |             bboxes.append((xmin, ymin, xmin + width - 1, ymin + height - 1))
 77 | 
 78 |         for bbox in bboxes:
 79 |             cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 2)
 80 | 
 81 |         cv2.imshow('im', im)
 82 |         cv2.waitKey()
 83 | 
 84 | 
 85 | def dataset_statistics():
 86 |     list_file_path = './data_folder/data_list_brainwash_test.txt'
 87 | 
 88 |     fin = open(list_file_path, 'r')
 89 |     lines = fin.readlines()
 90 |     fin.close()
 91 | 
 92 |     bin_size = 5
 93 |     longer_bin_dict = {}
 94 |     shorter_bin_dict = {}
 95 |     for line in lines:
 96 |         line = line.strip('\n').split(',')
 97 |         num_bboxes = int(line[2])
 98 |         for i in range(num_bboxes):
 99 |             width = int(line[5 + i * 4])
100 |             height = int(line[6 + i * 4])
101 | 
102 |             longer_side = max(width, height)
103 |             shorter_side = min(width, height)
104 | 
105 |             key = int(longer_side / bin_size)
106 |             if key in longer_bin_dict:
107 |                 longer_bin_dict[key] += 1
108 |             else:
109 |                 longer_bin_dict[key] = 1
110 | 
111 |             key = int(shorter_side / bin_size)
112 |             if key in shorter_bin_dict:
113 |                 shorter_bin_dict[key] += 1
114 |             else:
115 |                 shorter_bin_dict[key] = 1
116 | 
117 |     print('shorter side based statistics:')
118 |     shorter_bin_dict_key_list = sorted(shorter_bin_dict)
119 |     for k in shorter_bin_dict_key_list:
120 |         v = shorter_bin_dict[k]
121 |         print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v))
122 | 
123 |     print('longer side based statistics:')
124 |     longer_bin_dict_key_list = sorted(longer_bin_dict)
125 |     for k in longer_bin_dict_key_list:
126 |         v = longer_bin_dict[k]
127 |         print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v))
128 | 
129 | 
130 | if __name__ == '__main__':
131 |     # generate_data_list()
132 |     # show_image()
133 |     dataset_statistics()
134 | 
135 | 


--------------------------------------------------------------------------------
/head_detection/data_provider_farm/text_list_adapter.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This adapter accepts a text as input which describes the annotated data.
 3 | Each line in text are formatted as:
 4 | [image absolute path],[pos/neg flag],[num of bboxes],[x1],[y1],[width1],[height1],[x2],[y2],[width2],[height2]......
 5 | '''
 6 | 
 7 | import cv2
 8 | import numpy
 9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_data_adapter import DataAdapterBaseclass
10 | 
11 | 
12 | class TextListAdapter(DataAdapterBaseclass):
13 | 
14 |     def __init__(self, data_list_file_path):
15 | 
16 |         DataAdapterBaseclass.__init__(self)
17 |         fin = open(data_list_file_path, 'r')
18 |         self.lines = fin.readlines()
19 |         fin.close()
20 |         self.line_counter = 0
21 | 
22 |     def __del__(self):
23 |         pass
24 | 
25 |     def get_one(self):
26 |         """
27 |         This function use 'yield' to return samples
28 |         """
29 |         while self.line_counter < len(self.lines):
30 | 
31 |             line = self.lines[self.line_counter].strip('\n').split(',')
32 |             if line[1] == '1':  # 如果是正样本，需要校验bbox的个数是否一样
33 |                 assert len(line[3:]) == 4 * int(line[2])
34 | 
35 |             im = cv2.imread(line[0], cv2.IMREAD_UNCHANGED)
36 | 
37 |             if line[1] == '0':
38 |                 yield im, '0'
39 |                 self.line_counter += 1
40 |                 continue
41 | 
42 |             num_bboxes = int(line[2])
43 |             bboxes = []
44 |             for i in range(num_bboxes):
45 |                 x = float(line[3 + i * 4])
46 |                 y = float(line[3 + i * 4 + 1])
47 |                 width = float(line[3 + i * 4 + 2])
48 |                 height = float(line[3 + i * 4 + 3])
49 | 
50 |                 bboxes.append([x, y, width, height])
51 | 
52 |             bboxes = numpy.array(bboxes, dtype=numpy.float32)
53 |             yield im, bboxes
54 | 
55 |             self.line_counter += 1
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     pass
60 | 


--------------------------------------------------------------------------------
/head_detection/inference_speed_evaluation/inference_speed_eval.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import sys
 3 | 
 4 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python')
 5 | import mxnet
 6 | 
 7 | eval_with_mxnet_flag = False
 8 | symbol_file_path = '/home/heyonghao/projects/ChasingHeadDetection/symbol_farm/symbol_10_160_17L_4scales_v1_deploy.json'
 9 | input_shape = (1,3,2160,3840)  # (1,3,240,320) (1,3,480,640) (1,3,720,1280) (1,3,1080,1920) (1,3,2160,3840)
10 | real_run_loops = 200
11 | 
12 | if eval_with_mxnet_flag:
13 |     from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_mxnet_cudnn import InferenceSpeedEval as InferenceSpeedEvalMXNet
14 | 
15 |     inferenceSpeedEvalMXNet = InferenceSpeedEvalMXNet(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, device_type='gpu', gpu_index=0)
16 |     inferenceSpeedEvalMXNet.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops)
17 | 
18 | else:
19 |     from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_tensorrt_cudnn import InferenceSpeedEval as InferenceSpeedEvalTRT
20 | 
21 |     inferenceSpeedEvalTRT = InferenceSpeedEvalTRT(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape)
22 |     inferenceSpeedEvalTRT.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops)
23 | 


--------------------------------------------------------------------------------
/head_detection/metric_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/metric_farm/__init__.py


--------------------------------------------------------------------------------
/head_detection/metric_farm/metric_default.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy
 4 | import mxnet
 5 | 
 6 | 
 7 | class Metric:
 8 |     # 需要输入多少个loss，即scale个数
 9 |     def __init__(self, num_scales):
10 |         self.sum_metric = [0.0 for i in range(num_scales * 2)]
11 |         self.num_update = 0
12 |         self.num_scales = num_scales
13 |         self.num_nonzero = [1.0 for i in range(num_scales * 2)]
14 |         self.scale_factor = 10000
15 | 
16 |     # it is expected that the shape is num*c*h*w
17 |     def update(self, labels, preds):  # 这里需要注意label里面item的顺序。要参考prefetching_dataiter
18 | 
19 |         for i in range(self.num_scales):
20 |             mask = labels[i * 2]  # 先mask
21 |             label = labels[i * 2 + 1]  # 后label
22 | 
23 |             score_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=0, end=2).asnumpy()
24 |             bbox_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=2, end=6).asnumpy()
25 | 
26 |             label_bbox = mxnet.ndarray.slice_axis(label, axis=1, begin=2, end=6).asnumpy()
27 | 
28 |             pred_score = preds[i * 2].asnumpy()
29 |             pred_bbox = preds[i * 2 + 1].asnumpy()
30 | 
31 |             loss_score = numpy.sum(pred_score * score_mask)
32 |             loss_bbox = numpy.sum((label_bbox - pred_bbox) ** 2.0)
33 | 
34 |             self.num_nonzero[i * 2] += numpy.sum(score_mask[:, 0, :, :] > 0.5)
35 |             self.num_nonzero[i * 2 + 1] += numpy.sum(bbox_mask > 0.5)
36 |             self.sum_metric[i * 2] += loss_score
37 |             self.sum_metric[i * 2 + 1] += loss_bbox
38 | 
39 |         self.num_update += 1
40 | 
41 |     def get(self):
42 |         return_string_list = []
43 |         for i in range(self.num_scales):
44 |             return_string_list.append('CE_loss_score_' + str(i))
45 |             return_string_list.append('SE_loss_bbox_' + str(i))
46 | 
47 |         return return_string_list, [m / self.num_nonzero[i] * self.scale_factor for i, m in enumerate(self.sum_metric)]
48 | 
49 |     def reset(self):
50 |         self.sum_metric = [0.0 for i in range(self.num_scales * 2)]
51 |         self.num_update = 0
52 |         self.num_nonzero = [1.0 for i in range(self.num_scales * 2)]
53 | 


--------------------------------------------------------------------------------
/head_detection/symbol_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/symbol_farm/__init__.py


--------------------------------------------------------------------------------
/head_detection/symbol_farm/symbol_structures.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/head_detection/symbol_farm/symbol_structures.xlsx


--------------------------------------------------------------------------------
/license_plate_detection/README.md:
--------------------------------------------------------------------------------
 1 | ## License Plate (LP) Detection
 2 | We use the CCPD dataset introduced by paper [Towards End-to-End License Plate Detection and Recognition: A Large Dataset and Baseline](https://github.com/detectRecog/CCPD).
 3 | 
 4 | ### Recent Update
 5 | * `2019.10.02` model v1 for CCPD dataset is released.
 6 | 
 7 | ### Brief Introduction to Model Version
 8 | * v1 - is designed for CCPD dataset, covering LP scale [64, 512]. It has 3 branches. Please check 
 9 | `./symbol_farm/symbol_structures.xlsx` for details.
10 | 
11 | ### Inference Latency
12 | 
13 | * Platform info: NVIDIA RTX 2080TI, CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0
14 | 
15 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160|7680×4320
16 | -------------|-------|-------|--------|---------|---------|---------
17 | v1|0.62ms(1613.18FPS)|1.02ms(978.64FPS)|2.10ms(476.80FPS)|4.21ms(237.32FPS)|15.68ms(63.78FPS)|62.82ms(15.92FPS)
18 | 
19 | * Platform info: NVIDIA GTX 1060(laptop), CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0
20 | 
21 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160
22 | -------------|-------|-------|--------|---------|---------
23 | v1|0.86ms(1167.71FPS)|1.83ms(546.00FPS)|4.45ms(224.63FPS)|9.68ms(103.27FPS)|37.59ms(26.60FPS)
24 | 
25 | > CAUTION: The latency may vary even in the same setting.
26 | 
27 | ### Accuracy on CCPD Dataset
28 | We use the latest CCPD dataset, containing 351,974 images (it is larger than the version described in the paper).
29 | **Since the train/test split is not provided by the paper, we randomly select 3/5 data for training and the rest is for test.**
30 | We train v1 on the training set (211,180 images) and evaluate on the test set (140,794 images). 
31 | 
32 | #### Quantitative Results on Test Set
33 | Average Precision (AP) is used for measuring the accuracy. In detail, we use code [Object-Detection-Metrics](https://github.com/rafaelpadilla/Object-Detection-Metrics)
34 | for calculating the AP metric. The following table presents the results:
35 | 
36 | > `The comparison is not fair due to different traning/test split. This is for reference only!`
37 | >
38 | > `We make only one inference for each image in test. So some extremely large plates are failed to detect.`
39 | 
40 | Method|AP
41 | --------|------
42 | RPnet [1]|0.945
43 | v1 (our)|0.989
44 | 
45 | >[1] [Towards End-to-End License Plate Detection and Recognition: A Large Dataset and Baseline](http://openaccess.thecvf.com/content_ECCV_2018/papers/Zhenbo_Xu_Towards_End-to-End_License_ECCV_2018_paper.pdf)
46 | 
47 | #### Some Qualitative Results on Test Set
48 | > Some challenging cases are presented.
49 | 
50 | ![image](./accuracy_evaluation/test_images/test1.jpg_result.jpg)
51 | ![image](./accuracy_evaluation/test_images/test2.jpg_result.jpg)
52 | ![image](./accuracy_evaluation/test_images/test3.jpg_result.jpg)
53 | ![image](./accuracy_evaluation/test_images/test4.jpg_result.jpg)
54 | ![image](./accuracy_evaluation/test_images/test5.jpg_result.jpg)
55 | ![image](./accuracy_evaluation/test_images/test6.jpg_result.jpg)
56 | ![image](./accuracy_evaluation/test_images/test7.jpg_result.jpg)
57 | 
58 | ### User Instructions
59 | Please refer to [README in face_detection](../face_detection/README.md) for details.
60 | 
61 | ### Data Download
62 | Please visit [CCPD](https://github.com/detectRecog/CCPD) for accessing the data.


--------------------------------------------------------------------------------
/license_plate_detection/accuracy_evaluation/evaluation_on_CCPD.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import sys
  4 | import cv2
  5 | import math
  6 | import re
  7 | 
  8 | sys.path.append('..')
  9 | # change the config as your need
 10 | from config_farm import configuration_64_512_16L_3scales_v1 as cfg
 11 | import mxnet
 12 | from predict import Predict
 13 | 
 14 | 
 15 | def generate_gt_files():
 16 |     txt_file_path = '../data_provider_farm/data_folder/data_list_CCPD_test.txt'
 17 |     gt_file_root = './CCPD_testset_gt_files_for_evaluation'
 18 | 
 19 |     if not os.path.exists(gt_file_root):
 20 |         os.makedirs(gt_file_root)
 21 | 
 22 |     fin = open(txt_file_path, 'r')
 23 | 
 24 |     counter = 0
 25 |     for line in fin:
 26 |         line = line.strip('\n').split(',')
 27 |         im_path = os.path.basename(line[0])
 28 |         num_bboxes = int(line[2])
 29 |         if num_bboxes == 0:
 30 |             continue
 31 |         bbox_list = []
 32 |         for i in range(num_bboxes):
 33 |             xmin = int(float(line[3+i*4]))
 34 |             ymin = int(float(line[4+i*4]))
 35 |             width = int(float(line[5+i*4]))
 36 |             height = int(float(line[6+i*4]))
 37 |             bbox_list.append((xmin, ymin, width, height))
 38 | 
 39 |         gt_file_name = im_path.replace('jpg', 'txt')
 40 | 
 41 |         fout = open(os.path.join(gt_file_root, gt_file_name), 'w')
 42 |         for bbox in bbox_list:
 43 |             line_str = 'LP ' + str(bbox[0]) + ' ' + str(bbox[1]) + ' ' + str(bbox[2]) + ' ' + str(bbox[3])
 44 |             fout.write(line_str + '\n')
 45 |         fout.close()
 46 |         counter += 1
 47 |         print(counter)
 48 |     fin.close()
 49 | 
 50 | 
 51 | def generate_predicted_files():
 52 |     # set the proper symbol file and model file
 53 |     symbol_file_path = '../symbol_farm/symbol_64_512_16L_3scales_v1_deploy.json'
 54 |     model_file_path = '../saved_model/configuration_64_512_16L_3scales_v1_2019-09-29-13-41-44/train_64_512_16L_3scales_v1_iter_600000.params'
 55 |     my_predictor = Predict(mxnet=mxnet,
 56 |                            symbol_file_path=symbol_file_path,
 57 |                            model_file_path=model_file_path,
 58 |                            ctx=mxnet.gpu(0),
 59 |                            receptive_field_list=cfg.param_receptive_field_list,
 60 |                            receptive_field_stride=cfg.param_receptive_field_stride,
 61 |                            bbox_small_list=cfg.param_bbox_small_list,
 62 |                            bbox_large_list=cfg.param_bbox_large_list,
 63 |                            receptive_field_center_start=cfg.param_receptive_field_center_start,
 64 |                            num_output_scales=cfg.param_num_output_scales)
 65 | 
 66 |     # set the val root, the path should look like XXXX/WIDER_val/images
 67 |     txt_file_path = '../data_provider_farm/data_folder/data_list_CCPD_test.txt'
 68 |     predicted_file_root = './CCPD_testset_predicted_files_for_evaluation_' + os.path.basename(model_file_path).split('.')[0]
 69 | 
 70 |     if not os.path.exists(predicted_file_root):
 71 |         os.makedirs(predicted_file_root)
 72 | 
 73 |     fin = open(txt_file_path, 'r')
 74 | 
 75 |     resize_scale = 1
 76 |     score_threshold = 0.2
 77 |     NMS_threshold = 0.6
 78 |     counter = 0
 79 | 
 80 |     for line in fin:
 81 |         line = line.strip('\n').split(',')
 82 | 
 83 |         im = cv2.imread(line[0], cv2.IMREAD_COLOR)
 84 | 
 85 |         bboxes = my_predictor.predict(im, resize_scale=resize_scale, score_threshold=score_threshold, top_k=10000, NMS_threshold=NMS_threshold)
 86 | 
 87 |         # for bbox in bboxes:
 88 |         #     cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 1)
 89 |         # cv2.imshow('im',im)
 90 |         # cv2.waitKey()
 91 |         predicted_file_name = os.path.basename(line[0]).replace('jpg', 'txt')
 92 |         fout = open(os.path.join(predicted_file_root, predicted_file_name), 'w')
 93 |         for bbox in bboxes:
 94 |             fout.write('LP %.03f %d %d %d %d' % (bbox[4] if bbox[4] <= 1 else 1, math.floor(bbox[0]), math.floor(bbox[1]), math.ceil(bbox[2] - bbox[0]), math.ceil(bbox[3] - bbox[1])) + '\n')
 95 |         fout.close()
 96 |         counter += 1
 97 |         print('[%d] is processed.' % counter)
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     # generate_gt_files()
102 |     generate_predicted_files()
103 | 


--------------------------------------------------------------------------------
/license_plate_detection/accuracy_evaluation/test_images/test1.jpg_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test1.jpg_result.jpg


--------------------------------------------------------------------------------
/license_plate_detection/accuracy_evaluation/test_images/test2.jpg_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test2.jpg_result.jpg


--------------------------------------------------------------------------------
/license_plate_detection/accuracy_evaluation/test_images/test3.jpg_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test3.jpg_result.jpg


--------------------------------------------------------------------------------
/license_plate_detection/accuracy_evaluation/test_images/test4.jpg_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test4.jpg_result.jpg


--------------------------------------------------------------------------------
/license_plate_detection/accuracy_evaluation/test_images/test5.jpg_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test5.jpg_result.jpg


--------------------------------------------------------------------------------
/license_plate_detection/accuracy_evaluation/test_images/test6.jpg_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test6.jpg_result.jpg


--------------------------------------------------------------------------------
/license_plate_detection/accuracy_evaluation/test_images/test7.jpg_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/accuracy_evaluation/test_images/test7.jpg_result.jpg


--------------------------------------------------------------------------------
/license_plate_detection/config_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/config_farm/__init__.py


--------------------------------------------------------------------------------
/license_plate_detection/data_iterator_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/data_iterator_farm/__init__.py


--------------------------------------------------------------------------------
/license_plate_detection/data_provider_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/data_provider_farm/__init__.py


--------------------------------------------------------------------------------
/license_plate_detection/data_provider_farm/pickle_provider.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This provider accepts an adapter, save dataset in pickle file and load all dataset to memory for data iterators
  3 | '''
  4 | 
  5 | import cv2
  6 | import numpy
  7 | import pickle
  8 | 
  9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_provider import ProviderBaseclass
 10 | from .text_list_adapter import TextListAdapter
 11 | 
 12 | 
 13 | class PickleProvider(ProviderBaseclass):
 14 |     """
 15 |     This class provides methods to save and read data.
 16 |     By default, images are compressed using JPG format.
 17 |     If data_adapter is not None, it means saving data, or it is reading data
 18 |     """
 19 | 
 20 |     def __init__(self,
 21 |                  pickle_file_path,
 22 |                  encode_quality=90,
 23 |                  data_adapter=None):
 24 |         ProviderBaseclass.__init__(self)
 25 | 
 26 |         if data_adapter:  # write data
 27 | 
 28 |             self.data_adapter = data_adapter
 29 |             self.data = {}
 30 |             self.counter = 0
 31 |             self.pickle_file_path = pickle_file_path
 32 | 
 33 |         else:  # read data
 34 | 
 35 |             self.data = pickle.load(open(pickle_file_path, 'rb'))
 36 |             # get positive and negative indeices
 37 |             self._positive_index = []
 38 |             self._negative_index = []
 39 |             for k, v in self.data.items():
 40 |                 if v[1] == 0:  # negative
 41 |                     self._negative_index.append(k)
 42 |                 else:  # positive
 43 |                     self._positive_index.append(k)
 44 | 
 45 |         self.compression_mode = '.jpg'
 46 |         self.encode_params = [cv2.IMWRITE_JPEG_QUALITY, encode_quality]
 47 | 
 48 |     @property
 49 |     def positive_index(self):
 50 |         return self._positive_index
 51 | 
 52 |     @property
 53 |     def negative_index(self):
 54 |         return self._negative_index
 55 | 
 56 |     def write(self):
 57 | 
 58 |         for data_item in self.data_adapter.get_one():
 59 | 
 60 |             temp_sample = []
 61 |             im, bboxes = data_item
 62 |             ret, buf = cv2.imencode(self.compression_mode, im, self.encode_params)
 63 |             if buf is None or buf.size == 0:
 64 |                 print('buf is wrong.')
 65 |                 continue
 66 |             if not ret:
 67 |                 print('An error is occurred.')
 68 |                 continue
 69 |             temp_sample.append(buf)
 70 | 
 71 |             if isinstance(bboxes, str):  # 负样本
 72 |                 temp_sample.append(0)
 73 |                 temp_sample.append(int(bboxes))
 74 |             else:
 75 |                 temp_sample.append(1)
 76 |                 temp_sample.append(bboxes)
 77 | 
 78 |             self.data[self.counter] = temp_sample
 79 |             print('Successfully save the %d-th data item.' % self.counter)
 80 |             self.counter += 1
 81 | 
 82 |         pickle.dump(self.data, open(self.pickle_file_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
 83 | 
 84 |     def read_by_index(self, index):
 85 |         im_buf, flag, bboxes = self.data[index]
 86 |         im = cv2.imdecode(im_buf, cv2.IMREAD_COLOR)
 87 |         return im, flag, bboxes
 88 | 
 89 | 
 90 | def write_file():
 91 |     data_list_file_path = './data_folder/data_list_CCPD_train_debug.txt'
 92 |     adapter = TextListAdapter(data_list_file_path)
 93 | 
 94 |     pickle_file_path = './data_folder/data_list_CCPD_train_debug.pkl'
 95 |     encode_quality = 90
 96 |     packer = PickleProvider(pickle_file_path, encode_quality, adapter)
 97 |     packer.write()
 98 | 
 99 | 
100 | def read_file():
101 |     pickle_file_path = './data_folder/data_list_CCPD_train_debug.pkl'
102 | 
103 |     provider = PickleProvider(pickle_file_path)
104 |     positive_index = provider.positive_index
105 |     negative_index = provider.negative_index
106 |     print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index)))
107 |     all_index = positive_index+negative_index
108 |     import random
109 |     random.shuffle(all_index)
110 | 
111 |     for i, index in enumerate(all_index):
112 |         im, flag, bboxes_numpy = provider.read_by_index(index)
113 |         if isinstance(bboxes_numpy, numpy.ndarray):
114 |             for n in range(bboxes_numpy.shape[0]):
115 |                 cv2.rectangle(im, (bboxes_numpy[n, 0], bboxes_numpy[n, 1]),
116 |                               (bboxes_numpy[n, 0] + bboxes_numpy[n, 2], bboxes_numpy[n, 1] + bboxes_numpy[n, 3]), (0, 255, 0), 2)
117 |         cv2.imshow('im', im)
118 |         cv2.waitKey()
119 | 
120 | 
121 | if __name__ == '__main__':
122 |     # write_file()
123 |     read_file()
124 | 


--------------------------------------------------------------------------------
/license_plate_detection/data_provider_farm/reformat_CCPD.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import os
  3 | import cv2
  4 | import json
  5 | import math
  6 | import random
  7 | 
  8 | 
  9 | def annotation_from_name(file_name):
 10 |     file_name = file_name[:-4]
 11 |     name_split = file_name.split('-')
 12 |     location = name_split[2]
 13 |     location = location.split('_')
 14 |     left_top = location[0].split('&')
 15 |     right_bottom = location[1].split('&')
 16 |     x1 = int(left_top[0])
 17 |     y1 = int(left_top[1])
 18 |     x2 = int(right_bottom[0])
 19 |     y2 = int(right_bottom[1])
 20 | 
 21 |     return (x1, y1, x2-x1+1, y2-y1+1)
 22 | 
 23 | 
 24 | def generate_data_list():
 25 |     image_roots = ['/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_base',
 26 |                    '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_blur',
 27 |                    '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_challenge',
 28 |                    '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_db',
 29 |                    '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_fn',
 30 |                    '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_rotate',
 31 |                    '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_tilt',
 32 |                    '/media/heyonghao/HYH-4T-WD/public_dataset/license_plate/CCPD_2019/CCPD2019/ccpd_weather']
 33 | 
 34 |     train_list_file_path = './data_folder/data_list_CCPD_train.txt'
 35 |     test_list_file_path = './data_folder/data_list_CCPD_test.txt'
 36 |     if not os.path.exists(os.path.dirname(train_list_file_path)):
 37 |         os.makedirs(os.path.dirname(train_list_file_path))
 38 |     fout_train = open(train_list_file_path, 'w')
 39 |     fout_test = open(test_list_file_path, 'w')
 40 | 
 41 |     train_proportion = 0.6
 42 |     train_counter = 0
 43 |     test_counter = 0
 44 |     for root in image_roots:
 45 |         file_name_list = [name for name in os.listdir(root) if name.endswith('.jpg')]
 46 |         random.shuffle(file_name_list)
 47 | 
 48 |         file_name_list_train = file_name_list[:int(len(file_name_list)*train_proportion)]
 49 |         file_name_list_test = file_name_list[int(len(file_name_list)*train_proportion):]
 50 | 
 51 |         for file_name in file_name_list_train:
 52 |             location_annotation = annotation_from_name(file_name)
 53 |             line = os.path.join(root, file_name)+',1,1,'+str(location_annotation[0])+','+str(location_annotation[1])+','+str(location_annotation[2])+','+str(location_annotation[3])
 54 |             fout_train.write(line+'\n')
 55 |             train_counter += 1
 56 |             print(train_counter)
 57 | 
 58 |         for file_name in file_name_list_test:
 59 |             location_annotation = annotation_from_name(file_name)
 60 |             line = os.path.join(root, file_name)+',1,1,'+str(location_annotation[0])+','+str(location_annotation[1])+','+str(location_annotation[2])+','+str(location_annotation[3])
 61 |             fout_test.write(line+'\n')
 62 |             test_counter += 1
 63 |             print(test_counter)
 64 | 
 65 |     fout_train.close()
 66 |     fout_test.close()
 67 | 
 68 | 
 69 | def show_image():
 70 |     list_file_path = './data_folder/data_list_CCPD_train.txt'
 71 | 
 72 |     fin = open(list_file_path, 'r')
 73 |     lines = fin.readlines()
 74 |     fin.close()
 75 | 
 76 |     import random
 77 |     random.shuffle(lines)
 78 |     for line in lines:
 79 |         line = line.strip('\n').split(',')
 80 | 
 81 |         im = cv2.imread(line[0])
 82 | 
 83 |         bboxes = []
 84 |         num_bboxes = int(line[2])
 85 |         for i in range(num_bboxes):
 86 |             xmin = int(line[3 + i * 4])
 87 |             ymin = int(line[4 + i * 4])
 88 |             width = int(line[5 + i * 4])
 89 |             height = int(line[6 + i * 4])
 90 |             bboxes.append((xmin, ymin, xmin + width - 1, ymin + height - 1))
 91 | 
 92 |         for bbox in bboxes:
 93 |             cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 2)
 94 | 
 95 |         cv2.imshow('im', im)
 96 |         cv2.waitKey()
 97 | 
 98 | 
 99 | def dataset_statistics():
100 |     list_file_path = './data_folder/data_list_CCPD_train.txt'
101 | 
102 |     fin = open(list_file_path, 'r')
103 |     lines = fin.readlines()
104 |     fin.close()
105 | 
106 |     bin_size = 8
107 |     longer_bin_dict = {}
108 |     shorter_bin_dict = {}
109 |     counter_pos = 0
110 |     counter_neg = 0
111 |     for line in lines:
112 |         line = line.strip('\n').split(',')
113 |         if line[1] == '0':
114 |             counter_neg += 1
115 |             continue
116 |         else:
117 |             counter_pos += 1
118 |         num_bboxes = int(line[2])
119 |         for i in range(num_bboxes):
120 |             width = int(line[5 + i * 4])
121 |             height = int(line[6 + i * 4])
122 | 
123 |             longer_side = max(width, height)
124 |             shorter_side = min(width, height)
125 | 
126 |             key = int(longer_side / bin_size)
127 |             if key in longer_bin_dict:
128 |                 longer_bin_dict[key] += 1
129 |             else:
130 |                 longer_bin_dict[key] = 1
131 | 
132 |             key = int(shorter_side / bin_size)
133 |             if key in shorter_bin_dict:
134 |                 shorter_bin_dict[key] += 1
135 |             else:
136 |                 shorter_bin_dict[key] = 1
137 | 
138 |     total_pedestrian = 0
139 |     print('shorter side based statistics:')
140 |     shorter_bin_dict_key_list = sorted(shorter_bin_dict)
141 |     for k in shorter_bin_dict_key_list:
142 |         v = shorter_bin_dict[k]
143 |         total_pedestrian += v
144 |         print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v))
145 | 
146 |     print('longer side based statistics:')
147 |     longer_bin_dict_key_list = sorted(longer_bin_dict)
148 |     for k in longer_bin_dict_key_list:
149 |         v = longer_bin_dict[k]
150 |         print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v))
151 | 
152 |     print('num pos: %d, num neg: %d' % (counter_pos, counter_neg))
153 |     print('total LP: %d' % total_pedestrian)
154 | 
155 | 
156 | if __name__ == '__main__':
157 |     # test_name2anno()
158 |     # generate_data_list()
159 |     # show_image()
160 |     dataset_statistics()
161 | 


--------------------------------------------------------------------------------
/license_plate_detection/data_provider_farm/text_list_adapter.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This adapter accepts a text as input which describes the annotated data.
 3 | Each line in text are formatted as:
 4 | [image absolute path],[pos/neg flag],[num of bboxes],[x1],[y1],[width1],[height1],[x2],[y2],[width2],[height2]......
 5 | '''
 6 | 
 7 | import cv2
 8 | import numpy
 9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_data_adapter import DataAdapterBaseclass
10 | import random
11 | 
12 | 
13 | class TextListAdapter(DataAdapterBaseclass):
14 | 
15 |     def __init__(self, data_list_file_path):
16 | 
17 |         DataAdapterBaseclass.__init__(self)
18 |         fin = open(data_list_file_path, 'r')
19 |         self.lines = fin.readlines()
20 |         fin.close()
21 |         self.line_counter = 0
22 | 
23 |     def __del__(self):
24 |         pass
25 | 
26 |     def get_one(self):
27 |         """
28 |         This function use 'yield' to return samples
29 |         """
30 |         while self.line_counter < len(self.lines):
31 | 
32 |             line = self.lines[self.line_counter].strip('\n').split(',')
33 |             if line[1] == '1':  # 如果是正样本，需要校验bbox的个数是否一样
34 |                 assert len(line[3:]) == 4 * int(line[2])
35 | 
36 |             im = cv2.imread(line[0], cv2.IMREAD_UNCHANGED)
37 | 
38 |             if line[1] == '0':
39 |                 yield im, '0'
40 |                 self.line_counter += 1
41 |                 continue
42 | 
43 |             num_bboxes = int(line[2])
44 |             bboxes = []
45 |             for i in range(num_bboxes):
46 |                 x = float(line[3 + i * 4])
47 |                 y = float(line[3 + i * 4 + 1])
48 |                 width = float(line[3 + i * 4 + 2])
49 |                 height = float(line[3 + i * 4 + 3])
50 | 
51 |                 bboxes.append([x, y, width, height])
52 | 
53 |             bboxes = numpy.array(bboxes, dtype=numpy.float32)
54 |             yield im, bboxes
55 | 
56 |             # generate negative samples
57 |             left = numpy.min(bboxes[:, 0])
58 |             top = numpy.min(bboxes[:, 1])
59 |             right = numpy.max(bboxes[:, 0] + bboxes[:, 2])
60 |             bottom = numpy.max(bboxes[:, 1] + bboxes[:, 3])
61 |             if random.random() < 0.25:
62 |                 im_crop = im[:, :int(left), :].copy()
63 |                 if im_crop.shape[1] > 100:
64 |                     yield im_crop, '0'
65 |             if random.random() < 0.25:
66 |                 im_crop = im[:, int(right):, :].copy()
67 |                 if im_crop.shape[1] > 100:
68 |                     yield im_crop, '0'
69 |             if random.random() < 0.25:
70 |                 im_crop = im[:int(top), :, :].copy()
71 |                 if im_crop.shape[0] > 100:
72 |                     yield im_crop, '0'
73 |             if random.random() < 0.25:
74 |                 im_crop = im[int(bottom):, :, :].copy()
75 |                 if im_crop.shape[0] > 100:
76 |                     yield im_crop, '0'
77 | 
78 |             self.line_counter += 1
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     pass
83 | 


--------------------------------------------------------------------------------
/license_plate_detection/inference_speed_evaluation/inference_speed_eval.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import sys
 3 | 
 4 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python')
 5 | import mxnet
 6 | 
 7 | eval_with_mxnet_flag = False
 8 | symbol_file_path = '/home/heyonghao/projects/ChasingLicensePlateDetection/symbol_farm/symbol_64_512_16L_3scales_v1_deploy.json'
 9 | input_shape = (1,3,2160,3840) # (1,3,240,320) (1,3,480,640) (1,3,720,1280) (1,3,1080,1920) (1,3,2160,3840)
10 | real_run_loops = 200
11 | 
12 | if eval_with_mxnet_flag:
13 |     from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_mxnet_cudnn import InferenceSpeedEval as InferenceSpeedEvalMXNet
14 | 
15 |     inferenceSpeedEvalMXNet = InferenceSpeedEvalMXNet(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, device_type='gpu', gpu_index=0)
16 |     inferenceSpeedEvalMXNet.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops)
17 | 
18 | else:
19 |     from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_tensorrt_cudnn import InferenceSpeedEval as InferenceSpeedEvalTRT
20 | 
21 |     inferenceSpeedEvalTRT = InferenceSpeedEvalTRT(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape)
22 |     inferenceSpeedEvalTRT.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops)
23 | 


--------------------------------------------------------------------------------
/license_plate_detection/metric_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/metric_farm/__init__.py


--------------------------------------------------------------------------------
/license_plate_detection/metric_farm/metric_default.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy
 4 | import mxnet
 5 | 
 6 | 
 7 | class Metric:
 8 |     # 需要输入多少个loss，即scale个数
 9 |     def __init__(self, num_scales):
10 |         self.sum_metric = [0.0 for i in range(num_scales * 2)]
11 |         self.num_update = 0
12 |         self.num_scales = num_scales
13 |         self.num_nonzero = [1.0 for i in range(num_scales * 2)]
14 |         self.scale_factor = 10000
15 | 
16 |     # it is expected that the shape is num*c*h*w
17 |     def update(self, labels, preds):  # 这里需要注意label里面item的顺序。要参考prefetching_dataiter
18 | 
19 |         for i in range(self.num_scales):
20 |             mask = labels[i * 2]  # 先mask
21 |             label = labels[i * 2 + 1]  # 后label
22 | 
23 |             score_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=0, end=2).asnumpy()
24 |             bbox_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=2, end=6).asnumpy()
25 | 
26 |             label_bbox = mxnet.ndarray.slice_axis(label, axis=1, begin=2, end=6).asnumpy()
27 | 
28 |             pred_score = preds[i * 2].asnumpy()
29 |             pred_bbox = preds[i * 2 + 1].asnumpy()
30 | 
31 |             loss_score = numpy.sum(pred_score * score_mask)
32 |             loss_bbox = numpy.sum((label_bbox - pred_bbox) ** 2.0)
33 | 
34 |             self.num_nonzero[i * 2] += numpy.sum(score_mask[:, 0, :, :] > 0.5)
35 |             self.num_nonzero[i * 2 + 1] += numpy.sum(bbox_mask > 0.5)
36 |             self.sum_metric[i * 2] += loss_score
37 |             self.sum_metric[i * 2 + 1] += loss_bbox
38 | 
39 |         self.num_update += 1
40 | 
41 |     def get(self):
42 |         return_string_list = []
43 |         for i in range(self.num_scales):
44 |             return_string_list.append('CE_loss_score_' + str(i))
45 |             return_string_list.append('SE_loss_bbox_' + str(i))
46 | 
47 |         return return_string_list, [m / self.num_nonzero[i] * self.scale_factor for i, m in enumerate(self.sum_metric)]
48 | 
49 |     def reset(self):
50 |         self.sum_metric = [0.0 for i in range(self.num_scales * 2)]
51 |         self.num_update = 0
52 |         self.num_nonzero = [1.0 for i in range(self.num_scales * 2)]
53 | 


--------------------------------------------------------------------------------
/license_plate_detection/symbol_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/symbol_farm/__init__.py


--------------------------------------------------------------------------------
/license_plate_detection/symbol_farm/symbol_structures.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/license_plate_detection/symbol_farm/symbol_structures.xlsx


--------------------------------------------------------------------------------
/pedestrian_detection/README.md:
--------------------------------------------------------------------------------
 1 | ## Pedestrian Detection
 2 | We plan to use [Caltech Pedestrian Dataset](http://www.vision.caltech.edu/Image_Datasets/CaltechPedestrians/index.html)
 3 | with [new annotations](http://www.vision.caltech.edu/Image_Datasets/CaltechPedestrians/index.html),
 4 | [CityPersons](https://bitbucket.org/shanshanzhang/citypersons) (a part of [CityScapes](https://www.cityscapes-dataset.com/)) and 
 5 | [KITTI](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=2d) for benchmarking.
 6 | 
 7 | ### Recent Update
 8 | * `2019.09.18` preview version of model v1 for Caltech Pedestrian Dataset is released.
 9 | 
10 | ### Brief Introduction to Model Version
11 | * v1 - is designed for Caltech Pedestrian Dataset, covering pedestrian scale [30, 320]. It has 4 branches. Please check 
12 | `./symbol_farm/symbol_structures.xlsx` for details.
13 | 
14 | ### Inference Latency
15 | * Platform info: NVIDIA Jetson NANO, CUDA 10.0, CUDNN 7.5.0, TensorRT 5.1.6
16 | 
17 | Model Version|160×140|320×240|640×480|1280×720
18 | -------------|-------|-------|-------|--------
19 | v1|6.90ms(144.83FPS)|11.87ms(84.24FPS)|36.95ms(27.06FPS)|106.23ms(9.41FPS)
20 | v2|-|-|-|-
21 | 
22 | * Platform info: NVIDIA Jetson TX2, CUDA 10.0, CUDNN 7.5.0, TensorRT 5.1.6 (power mode: MAXN)
23 | 
24 | Model Version|160×140|320×240|640×480|1280×720|1920×1080
25 | -------------|-------|-------|-------|--------|---------
26 | v1|3.63ms(275.43FPS)|6.80ms(147.36FPS)|15.87ms(63.01FPS)|43.33ms(23.08FPS)|93.93ms(10.65FPS)
27 | v2|-|-|-|-|-
28 | 
29 | 
30 | * Platform info: NVIDIA RTX 2080TI, CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0
31 | 
32 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160|7680×4320
33 | -------------|-------|-------|--------|---------|---------|---------
34 | v1|1.01ms(985.71FPS)|1.55ms(644.93FPS)|3.26ms(306.77FPS)|6.50ms(153.76FPS)|24.58ms(40.68FPS)|99.71ms(10.03FPS)
35 | v2|-|-|-|-|-|-
36 | 
37 | * Platform info: NVIDIA GTX 1060(laptop), CUDA 10.0, CUDNN 7.4.2, TensorRT 5.1.5.0
38 | 
39 | Model Version|320×240|640×480|1280×720|1920×1080|3840×2160
40 | -------------|-------|-------|--------|---------|---------
41 | v1|1.25ms(800.00FPS)|2.93ms(341.80FPS)|7.46ms(134.08FPS)|16.03ms(62.39FPS)|62.80ms(15.92FPS)
42 | v2|-|-|-|-|-
43 | 
44 | > CAUTION: The latency may vary even in the same setting.
45 | 
46 | ### Accuracy on Caltech Pedestrian Dataset
47 | After investigating the data, we found that Caltech Pedestrian Dataset is not well annotated, even giving the
48 | new annotations (not annotated, not aligned well, the highly occluded are annotated). The final data used for training: 
49 | 1559 pos images (at least one pedestrian inside), 2691 neg images; 4786 pedestrian in total; the longer side of bboxes
50 | varies from 10 pixels to 500 pixels. 
51 | 
52 | Download links for packed training and test sets:
53 | * [Baidu Yunpan](https://pan.baidu.com/s/1SvoSeg5thFHDDwZc9gh09A) (pwd:8omv)
54 | * [MS OneDrive](https://1drv.ms/u/s!Av9h0YMgxdaSinO2G1DT-yPWkKc6?e=elsea6)
55 | * [Google Drive](https://drive.google.com/open?id=1ICNAEfLa2YHJvxE6_YZYAA8Cyl1N1kAD)
56 | 
57 | #### Quantitative Results on Test Set
58 | Currently, the quantitative results are not prepared well. We will release later.
59 | 
60 | #### Some Qualitative Results on Test Set
61 | **(we found that false positives are often appear in the small scales, probably due to noisy training instances. For large scales, v1 performs well.)**
62 | 
63 | ![image](./accuracy_evaluation/test_images/1064.jpg)
64 | ![image](./accuracy_evaluation/test_images/1199.jpg)
65 | ![image](./accuracy_evaluation/test_images/1212.jpg)
66 | ![image](./accuracy_evaluation/test_images/3981.jpg)
67 | 
68 | > To play with the trained v1 model, please check `./accuracy_evaluation/predict.py`.
69 | ### User Instructions
70 | Please refer to [README in face_detection](../face_detection/README.md) for details.


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/1064.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1064.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/1081.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1081.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/1104.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1104.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/1199.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1199.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/1212.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1212.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/1461.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/1461.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/2210.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/2210.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/2221.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/2221.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/2396.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/2396.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/2407.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/2407.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/2756.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/2756.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/3043.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3043.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/326.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/326.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/3368.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3368.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/3812.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3812.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/3914.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3914.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/3981.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3981.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/3988.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/3988.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/accuracy_evaluation/test_images/877.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/accuracy_evaluation/test_images/877.jpg


--------------------------------------------------------------------------------
/pedestrian_detection/config_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/config_farm/__init__.py


--------------------------------------------------------------------------------
/pedestrian_detection/data_iterator_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/data_iterator_farm/__init__.py


--------------------------------------------------------------------------------
/pedestrian_detection/data_provider_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/data_provider_farm/__init__.py


--------------------------------------------------------------------------------
/pedestrian_detection/data_provider_farm/pickle_provider.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This provider accepts an adapter, save dataset in pickle file and load all dataset to memory for data iterators
  3 | '''
  4 | 
  5 | import cv2
  6 | import numpy
  7 | import pickle
  8 | 
  9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_provider import ProviderBaseclass
 10 | from .text_list_adapter import TextListAdapter
 11 | 
 12 | 
 13 | class PickleProvider(ProviderBaseclass):
 14 |     """
 15 |     This class provides methods to save and read data.
 16 |     By default, images are compressed using JPG format.
 17 |     If data_adapter is not None, it means saving data, or it is reading data
 18 |     """
 19 | 
 20 |     def __init__(self,
 21 |                  pickle_file_path,
 22 |                  encode_quality=90,
 23 |                  data_adapter=None):
 24 |         ProviderBaseclass.__init__(self)
 25 | 
 26 |         if data_adapter:  # write data
 27 | 
 28 |             self.data_adapter = data_adapter
 29 |             self.data = {}
 30 |             self.counter = 0
 31 |             self.pickle_file_path = pickle_file_path
 32 | 
 33 |         else:  # read data
 34 | 
 35 |             self.data = pickle.load(open(pickle_file_path, 'rb'))
 36 |             # get positive and negative indeices
 37 |             self._positive_index = []
 38 |             self._negative_index = []
 39 |             for k, v in self.data.items():
 40 |                 if v[1] == 0:  # negative
 41 |                     self._negative_index.append(k)
 42 |                 else:  # positive
 43 |                     self._positive_index.append(k)
 44 | 
 45 |         self.compression_mode = '.jpg'
 46 |         self.encode_params = [cv2.IMWRITE_JPEG_QUALITY, encode_quality]
 47 | 
 48 |     @property
 49 |     def positive_index(self):
 50 |         return self._positive_index
 51 | 
 52 |     @property
 53 |     def negative_index(self):
 54 |         return self._negative_index
 55 | 
 56 |     def write(self):
 57 | 
 58 |         for data_item in self.data_adapter.get_one():
 59 | 
 60 |             temp_sample = []
 61 |             im, bboxes = data_item
 62 |             ret, buf = cv2.imencode(self.compression_mode, im, self.encode_params)
 63 |             if buf is None or buf.size == 0:
 64 |                 print('buf is wrong.')
 65 |                 continue
 66 |             if not ret:
 67 |                 print('An error is occurred.')
 68 |                 continue
 69 |             temp_sample.append(buf)
 70 | 
 71 |             if isinstance(bboxes, str):  # 负样本
 72 |                 temp_sample.append(0)
 73 |                 temp_sample.append(int(bboxes))
 74 |             else:
 75 |                 temp_sample.append(1)
 76 |                 temp_sample.append(bboxes)
 77 | 
 78 |             self.data[self.counter] = temp_sample
 79 |             print('Successfully save the %d-th data item.' % self.counter)
 80 |             self.counter += 1
 81 | 
 82 |         pickle.dump(self.data, open(self.pickle_file_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
 83 | 
 84 |     def read_by_index(self, index):
 85 |         im_buf, flag, bboxes = self.data[index]
 86 |         im = cv2.imdecode(im_buf, cv2.IMREAD_COLOR)
 87 |         return im, flag, bboxes
 88 | 
 89 | 
 90 | def write_file():
 91 |     data_list_file_path = './data_folder/data_list_caltech_test.txt'
 92 |     adapter = TextListAdapter(data_list_file_path)
 93 | 
 94 |     pickle_file_path = './data_folder/data_list_caltech_test.pkl'
 95 |     encode_quality = 90
 96 |     packer = PickleProvider(pickle_file_path, encode_quality, adapter)
 97 |     packer.write()
 98 | 
 99 | 
100 | def read_file():
101 |     pickle_file_path = './data_folder/data_list_caltech_test.pkl'
102 | 
103 |     provider = PickleProvider(pickle_file_path)
104 |     positive_index = provider.positive_index
105 |     negative_index = provider.negative_index
106 |     print("num of positive: %d\nnum of negative: %d" % (len(positive_index), len(negative_index)))
107 |     # all_index = positive_index+negative_index
108 |     import random
109 |     random.shuffle(positive_index)
110 | 
111 |     for i, index in enumerate(positive_index):
112 |         im, flag, bboxes_numpy = provider.read_by_index(index)
113 |         if isinstance(bboxes_numpy, numpy.ndarray):
114 |             for n in range(bboxes_numpy.shape[0]):
115 |                 cv2.rectangle(im, (bboxes_numpy[n, 0], bboxes_numpy[n, 1]),
116 |                               (bboxes_numpy[n, 0] + bboxes_numpy[n, 2], bboxes_numpy[n, 1] + bboxes_numpy[n, 3]), (0, 255, 0), 2)
117 |         cv2.imshow('im', im)
118 |         cv2.waitKey()
119 | 
120 | 
121 | if __name__ == '__main__':
122 |     # write_file()
123 |     read_file()
124 | 


--------------------------------------------------------------------------------
/pedestrian_detection/data_provider_farm/reformat_caltech.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import os
  3 | import cv2
  4 | import json
  5 | import math
  6 | 
  7 | 
  8 | def generate_data_list():
  9 |     annotation_root = '/media/heyonghao/HYH-4T-WD/public_dataset/Caltech/Caltech_new_annotations/anno_test_1xnew'
 10 |     image_root = '/media/heyonghao/HYH-4T-WD/public_dataset/Caltech/Caltech_data/extracted_data'
 11 | 
 12 |     list_file_path = './data_folder/data_list_caltech_test.txt'
 13 |     if not os.path.exists(os.path.dirname(list_file_path)):
 14 |         os.makedirs(os.path.dirname(list_file_path))
 15 |     fout = open(list_file_path, 'w')
 16 | 
 17 |     counter = 0
 18 |     for parent, dirnames, filenames in os.walk(annotation_root):
 19 |         for filename in filenames:
 20 |             if not filename.endswith('.txt'):
 21 |                 continue
 22 | 
 23 |             filename_splits = filename[:-4].split('_')
 24 |             set_name = filename_splits[0]
 25 |             seq_name = filename_splits[1]
 26 |             img_name = filename_splits[2]
 27 | 
 28 |             img_path = os.path.join(image_root, set_name, seq_name, 'images', img_name)
 29 |             if not os.path.exists(img_path):
 30 |                 print('The corresponding image does not exist! [%s]' % img_path)
 31 |                 continue
 32 | 
 33 |             line = img_path
 34 | 
 35 |             fin_anno = open(os.path.join(parent, filename), 'r')
 36 | 
 37 |             bbox_list = []
 38 |             for i, anno in enumerate(fin_anno):
 39 |                 if i == 0:
 40 |                     continue
 41 |                 anno = anno.strip('\n').split(' ')
 42 |                 if anno[0] != 'person':
 43 |                     continue
 44 |                 x = math.floor(float(anno[1]))
 45 |                 y = math.floor(float(anno[2]))
 46 |                 width = math.ceil(float(anno[3]))
 47 |                 height = math.ceil(float(anno[4]))
 48 | 
 49 |                 width_vis = math.ceil(float(anno[8]))
 50 |                 height_vis = math.ceil(float(anno[9]))
 51 | 
 52 |                 if (width_vis*height_vis)/(width*height) < 0.2:
 53 |                     continue
 54 | 
 55 |                 bbox_list.append((x, y, width, height))
 56 |             if len(bbox_list) == 0:
 57 |                 line += ',0,0'
 58 |                 fout.write(line + '\n')
 59 |             else:
 60 |                 bbox_line = ''
 61 |                 for bbox in bbox_list:
 62 |                     bbox_line += ',' + str(bbox[0]) + ',' + str(bbox[1]) + ',' + str(bbox[2]) + ',' + str(bbox[3])
 63 |                 line += ',1,' + str(len(bbox_list)) + bbox_line
 64 |                 fout.write(line + '\n')
 65 |             counter += 1
 66 |             print(counter)
 67 | 
 68 |     fout.close()
 69 | 
 70 | 
 71 | def show_image():
 72 |     list_file_path = './data_folder/data_list_caltech_test.txt'
 73 | 
 74 |     fin = open(list_file_path, 'r')
 75 |     lines = fin.readlines()
 76 |     fin.close()
 77 | 
 78 |     import random
 79 |     random.shuffle(lines)
 80 |     for line in lines:
 81 |         line = line.strip('\n').split(',')
 82 | 
 83 |         im = cv2.imread(line[0])
 84 | 
 85 |         bboxes = []
 86 |         num_bboxes = int(line[2])
 87 |         for i in range(num_bboxes):
 88 |             xmin = int(line[3 + i * 4])
 89 |             ymin = int(line[4 + i * 4])
 90 |             width = int(line[5 + i * 4])
 91 |             height = int(line[6 + i * 4])
 92 |             bboxes.append((xmin, ymin, xmin + width - 1, ymin + height - 1))
 93 | 
 94 |         for bbox in bboxes:
 95 |             cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 0), 2)
 96 | 
 97 |         cv2.imshow('im', im)
 98 |         cv2.waitKey()
 99 | 
100 | 
101 | def dataset_statistics():
102 |     list_file_path = './data_folder/data_list_caltech_test.txt'
103 | 
104 |     fin = open(list_file_path, 'r')
105 |     lines = fin.readlines()
106 |     fin.close()
107 | 
108 |     bin_size = 10
109 |     longer_bin_dict = {}
110 |     shorter_bin_dict = {}
111 |     counter_pos = 0
112 |     counter_neg = 0
113 |     for line in lines:
114 |         line = line.strip('\n').split(',')
115 |         if line[1] == '0':
116 |             counter_neg += 1
117 |             continue
118 |         else:
119 |             counter_pos += 1
120 |         num_bboxes = int(line[2])
121 |         for i in range(num_bboxes):
122 |             width = int(line[5 + i * 4])
123 |             height = int(line[6 + i * 4])
124 | 
125 |             longer_side = max(width, height)
126 |             shorter_side = min(width, height)
127 | 
128 |             key = int(longer_side / bin_size)
129 |             if key in longer_bin_dict:
130 |                 longer_bin_dict[key] += 1
131 |             else:
132 |                 longer_bin_dict[key] = 1
133 | 
134 |             key = int(shorter_side / bin_size)
135 |             if key in shorter_bin_dict:
136 |                 shorter_bin_dict[key] += 1
137 |             else:
138 |                 shorter_bin_dict[key] = 1
139 | 
140 |     total_pedestrian = 0
141 |     print('shorter side based statistics:')
142 |     shorter_bin_dict_key_list = sorted(shorter_bin_dict)
143 |     for k in shorter_bin_dict_key_list:
144 |         v = shorter_bin_dict[k]
145 |         total_pedestrian += v
146 |         print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v))
147 | 
148 |     print('longer side based statistics:')
149 |     longer_bin_dict_key_list = sorted(longer_bin_dict)
150 |     for k in longer_bin_dict_key_list:
151 |         v = longer_bin_dict[k]
152 |         print('[%d-%d): %d' % (k * bin_size, k * bin_size + bin_size, v))
153 | 
154 |     print('num pos: %d, num neg: %d' % (counter_pos, counter_neg))
155 |     print('total pedestrian: %d' % total_pedestrian)
156 | 
157 | 
158 | if __name__ == '__main__':
159 |     # generate_data_list()
160 |     show_image()
161 |     # dataset_statistics()
162 | 


--------------------------------------------------------------------------------
/pedestrian_detection/data_provider_farm/text_list_adapter.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This adapter accepts a text as input which describes the annotated data.
 3 | Each line in text are formatted as:
 4 | [image absolute path],[pos/neg flag],[num of bboxes],[x1],[y1],[width1],[height1],[x2],[y2],[width2],[height2]......
 5 | '''
 6 | 
 7 | import cv2
 8 | import numpy
 9 | from ChasingTrainFramework_GeneralOneClassDetection.data_provider_base.base_data_adapter import DataAdapterBaseclass
10 | 
11 | 
12 | class TextListAdapter(DataAdapterBaseclass):
13 | 
14 |     def __init__(self, data_list_file_path):
15 | 
16 |         DataAdapterBaseclass.__init__(self)
17 |         fin = open(data_list_file_path, 'r')
18 |         self.lines = fin.readlines()
19 |         fin.close()
20 |         self.line_counter = 0
21 | 
22 |     def __del__(self):
23 |         pass
24 | 
25 |     def get_one(self):
26 |         """
27 |         This function use 'yield' to return samples
28 |         """
29 |         while self.line_counter < len(self.lines):
30 | 
31 |             line = self.lines[self.line_counter].strip('\n').split(',')
32 |             if line[1] == '1':  #
33 |                 assert len(line[3:]) == 4 * int(line[2])
34 | 
35 |             im = cv2.imread(line[0], cv2.IMREAD_UNCHANGED)
36 | 
37 |             if line[1] == '0':
38 |                 yield im, '0'
39 |                 self.line_counter += 1
40 |                 continue
41 | 
42 |             num_bboxes = int(line[2])
43 |             bboxes = []
44 |             for i in range(num_bboxes):
45 |                 x = float(line[3 + i * 4])
46 |                 y = float(line[3 + i * 4 + 1])
47 |                 width = float(line[3 + i * 4 + 2])
48 |                 height = float(line[3 + i * 4 + 3])
49 | 
50 |                 bboxes.append([x, y, width, height])
51 | 
52 |             bboxes = numpy.array(bboxes, dtype=numpy.float32)
53 |             yield im, bboxes
54 | 
55 |             self.line_counter += 1
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     pass
60 | 


--------------------------------------------------------------------------------
/pedestrian_detection/inference_speed_evaluation/inference_speed_eval.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import sys
 3 | 
 4 | sys.path.append('/home/heyonghao/libs/incubator-mxnet/python')
 5 | import mxnet
 6 | 
 7 | eval_with_mxnet_flag = False
 8 | symbol_file_path = '/home/heyonghao/projects/ChasingPedestrainDetection/symbol_farm/symbol_30_320_20L_4scales_v1_deploy.json'
 9 | input_shape = (1, 3, 2160, 3840)  # (1,3,240,320) (1,3,480,640) (1,3,720,1280) (1,3,1080,1920) (1,3,2160,3840)
10 | real_run_loops = 200
11 | 
12 | if eval_with_mxnet_flag:
13 |     from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_mxnet_cudnn import InferenceSpeedEval as InferenceSpeedEvalMXNet
14 | 
15 |     inferenceSpeedEvalMXNet = InferenceSpeedEvalMXNet(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape, device_type='gpu', gpu_index=0)
16 |     inferenceSpeedEvalMXNet.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops)
17 | 
18 | else:
19 |     from ChasingTrainFramework_GeneralOneClassDetection.inference_speed_eval.inference_speed_eval_with_tensorrt_cudnn import InferenceSpeedEval as InferenceSpeedEvalTRT
20 | 
21 |     inferenceSpeedEvalTRT = InferenceSpeedEvalTRT(symbol_file_path=symbol_file_path, mxnet_module=mxnet, input_shape=input_shape)
22 |     inferenceSpeedEvalTRT.run_speed_eval(warm_run_loops=10, real_run_loops=real_run_loops)
23 | 


--------------------------------------------------------------------------------
/pedestrian_detection/metric_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/metric_farm/__init__.py


--------------------------------------------------------------------------------
/pedestrian_detection/metric_farm/metric_default.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy
 4 | import mxnet
 5 | 
 6 | 
 7 | class Metric:
 8 |     # 需要输入多少个loss，即scale个数
 9 |     def __init__(self, num_scales):
10 |         self.sum_metric = [0.0 for i in range(num_scales * 2)]
11 |         self.num_update = 0
12 |         self.num_scales = num_scales
13 |         self.num_nonzero = [1.0 for i in range(num_scales * 2)]
14 |         self.scale_factor = 10000
15 | 
16 |     # it is expected that the shape is num*c*h*w
17 |     def update(self, labels, preds):  # 这里需要注意label里面item的顺序。要参考prefetching_dataiter
18 | 
19 |         for i in range(self.num_scales):
20 |             mask = labels[i * 2]  # 先mask
21 |             label = labels[i * 2 + 1]  # 后label
22 | 
23 |             score_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=0, end=2).asnumpy()
24 |             bbox_mask = mxnet.ndarray.slice_axis(mask, axis=1, begin=2, end=6).asnumpy()
25 | 
26 |             label_bbox = mxnet.ndarray.slice_axis(label, axis=1, begin=2, end=6).asnumpy()
27 | 
28 |             pred_score = preds[i * 2].asnumpy()
29 |             pred_bbox = preds[i * 2 + 1].asnumpy()
30 | 
31 |             loss_score = numpy.sum(pred_score * score_mask)
32 |             loss_bbox = numpy.sum((label_bbox - pred_bbox) ** 2.0)
33 | 
34 |             self.num_nonzero[i * 2] += numpy.sum(score_mask[:, 0, :, :] > 0.5)
35 |             self.num_nonzero[i * 2 + 1] += numpy.sum(bbox_mask > 0.5)
36 |             self.sum_metric[i * 2] += loss_score
37 |             self.sum_metric[i * 2 + 1] += loss_bbox
38 | 
39 |         self.num_update += 1
40 | 
41 |     def get(self):
42 |         return_string_list = []
43 |         for i in range(self.num_scales):
44 |             return_string_list.append('CE_loss_score_' + str(i))
45 |             return_string_list.append('SE_loss_bbox_' + str(i))
46 | 
47 |         return return_string_list, [m / self.num_nonzero[i] * self.scale_factor for i, m in enumerate(self.sum_metric)]
48 | 
49 |     def reset(self):
50 |         self.sum_metric = [0.0 for i in range(self.num_scales * 2)]
51 |         self.num_update = 0
52 |         self.num_nonzero = [1.0 for i in range(self.num_scales * 2)]
53 | 


--------------------------------------------------------------------------------
/pedestrian_detection/symbol_farm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/symbol_farm/__init__.py


--------------------------------------------------------------------------------
/pedestrian_detection/symbol_farm/symbol_structures.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/becauseofAI/lffd-pytorch/f7da857f7ea939665b81d7bfedb98d02f4147723/pedestrian_detection/symbol_farm/symbol_structures.xlsx


--------------------------------------------------------------------------------
/vehicle_detection/README.md:
--------------------------------------------------------------------------------
1 | Coming soon...


--------------------------------------------------------------------------------