├── pic ├── 000456.jpg └── 000542.jpg ├── libs ├── __init__.py ├── logger │ ├── __init__.py │ ├── DCGAN_data_stream.py │ ├── factory.py │ ├── data_pipeline.py │ └── CPD_stream.py ├── network │ ├── __init__.py │ ├── factory.py │ ├── network.py │ └── Backbone_net.py ├── tools │ ├── __init__.py │ ├── overfitting_monitor.py │ └── gadget.py └── crafting_table │ ├── __init__.py │ ├── factory.py │ ├── assembly_line.py │ ├── DCGAN_line.py │ └── Backbone_line.py ├── README.md └── CSRT_train.py /pic/000456.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JK-Rao/Corner_Segmentation_TextDetection/HEAD/pic/000456.jpg -------------------------------------------------------------------------------- /pic/000542.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JK-Rao/Corner_Segmentation_TextDetection/HEAD/pic/000542.jpg -------------------------------------------------------------------------------- /libs/__init__.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-7-27 下午5:15 3 | # @File __init.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Corner_Segmentation_TextDetection 2 | Multi-Oriented Scene Text Detection via Corner Localization and Region Segmentation
3 | Coding... 4 | -------------------------------------------------------------------------------- /libs/logger/__init__.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-7-27 下午5:15 3 | # @File __init__.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | -------------------------------------------------------------------------------- /libs/network/__init__.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-7-27 下午5:15 3 | # @File __init__.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | -------------------------------------------------------------------------------- /libs/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-7-27 下午5:15 3 | # @File __init__.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | -------------------------------------------------------------------------------- /libs/crafting_table/__init__.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-7-27 下午5:15 3 | # @File __init__.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | -------------------------------------------------------------------------------- /CSRT_train.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-8-6 下午12:01 3 | # @File CSRT_train.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | 7 | from libs.network.factory import get_network 8 | from libs.crafting_table.factory import train_model 9 | 10 | # network = get_network('CSTR') 11 | # loss_dict = network.structure_loss() 12 | # opti = network.define_optimizer(loss_dict) 13 | train_model('CSTR') 14 | a = 1 15 | -------------------------------------------------------------------------------- /libs/network/factory.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-7-27 下午5:15 3 | # @File factory.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | 7 | from .Backbone_net import Backbone_net 8 | 9 | 10 | def get_network(name,global_reuse=False): 11 | if name == 'DCGAN': 12 | a=1 13 | elif name == 'CSTR': 14 | return Backbone_net(global_reuse) 15 | 16 | 17 | def get_network_name(name): 18 | if name == 'DCGAN': 19 | return ['gen', 'dis'] 20 | -------------------------------------------------------------------------------- /libs/logger/DCGAN_data_stream.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-7-27 下午5:15 3 | # @File DCGAN_data_stream.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | 7 | from .data_pipeline import TfReader, TfWriter, DataPipeline 8 | import cv2 9 | 10 | 11 | class DCGANReader(TfReader): 12 | def __init__(self): 13 | TfReader.__init__(self, 32, 20, 1) 14 | 15 | 16 | class DCGANWriter(TfWriter): 17 | def __init__(self): 18 | TfWriter.__init__(self, 32, 20, 1) 19 | 20 | 21 | def DCGAN_get_pipeline(sess, propose, batch_size, filename): 22 | stream = DataPipeline('.tfrecords', propose, './data') 23 | return stream.tfrecords2imgs_tensor(sess, filename, batch_size, DCGANReader(), cv2.IMREAD_GRAYSCALE) 24 | -------------------------------------------------------------------------------- /libs/crafting_table/factory.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-7-27 下午5:15 3 | # @File factory.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | 7 | from .DCGAN_line import DCGANLine 8 | from .Backbone_line import Backbone_line 9 | from ..network.factory import get_network 10 | from os.path import join 11 | import tensorflow as tf 12 | 13 | tf.reset_default_graph() 14 | 15 | 16 | def train_model(name, inster_number=None, annotation=None): 17 | if name == 'DCGAN': 18 | info_dict = {'inster_number': inster_number, 'annotation': annotation, 'batch_size': 128, 'val_size': 64} 19 | line = DCGANLine('train', info_dict) 20 | line.structure_train_context() 21 | elif name == 'CSTR': 22 | line = Backbone_line() 23 | line.structure_train_context() 24 | 25 | 26 | def test_model(name, model_path, model_name, parameter_name): 27 | if name == 'DCGAN': 28 | info_dict = {'model_name': join(model_path, model_name), 29 | 'parameter_name': join(model_path, parameter_name), 30 | 'batch_size': 128} 31 | line = DCGANLine('test', info_dict) 32 | return line.restore_test_context() 33 | -------------------------------------------------------------------------------- /libs/tools/overfitting_monitor.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-7-27 下午5:15 3 | # @File overfitting_monitor.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | 7 | from __future__ import print_function 8 | import cv2 9 | import numpy as np 10 | import copy 11 | import os 12 | 13 | 14 | def calcu_Euclidean_distance(im1, im2): 15 | im1 = im1.astype(np.int32) 16 | im2 = im2.astype(np.int32) 17 | return np.linalg.norm(im1 - im2, ord=2) 18 | 19 | 20 | def min_distance_set(obj_file, file_set): 21 | try: 22 | index = file_set.index(obj_file) 23 | except ValueError as e: 24 | print(e) 25 | return -1, -1 26 | temp_set = copy.deepcopy(file_set) 27 | temp_set.pop(index) 28 | min_distance = 999999. 29 | ave_distance = 0. 30 | min_obj_name = obj_file 31 | min_file_name = '' 32 | for file_name in temp_set: 33 | dis = calcu_Euclidean_distance(cv2.imread(obj_file, -1), cv2.imread(file_name, -1)) 34 | min_distance, min_file_name = [dis, file_name] if dis < min_distance else [min_distance, min_file_name] 35 | ave_distance += dis 36 | ave_distance /= len(temp_set) 37 | return min_distance, ave_distance, min_obj_name, min_file_name 38 | 39 | 40 | def random_sampling(file_set, iter_num): 41 | min_dis = 999999. 42 | ave_dis = 0. 43 | abs_obj_name = '' 44 | abs_file_name = '' 45 | print('calculate distance',end='') 46 | for i in range(iter_num): 47 | print(' %d/%d' % (i + 1, iter_num),end='') 48 | min, ave, prob_obj_name, prob_file_name = min_distance_set(file_set[np.random.randint(0, len(file_set))], 49 | file_set) 50 | min_dis, abs_obj_name, abs_file_name = [min, prob_obj_name, prob_file_name] if min < min_dis \ 51 | else [min_dis, abs_obj_name, abs_file_name] 52 | ave_dis += ave 53 | print('') 54 | return min_dis, ave_dis / iter_num, abs_obj_name, abs_file_name 55 | 56 | 57 | if __name__ == '__main__': 58 | inster_number = 0 59 | iter_num = 1 60 | PATH = './out_bank_CNN_num%d' % inster_number 61 | file_names = os.listdir(PATH) 62 | file_names = [os.path.join(PATH, a) for a in file_names] 63 | min_dis, ave_dis, obj_name, file_name = random_sampling(file_names, iter_num) 64 | print(min_dis, ave_dis, obj_name, file_name) 65 | cv2.imshow('obj', cv2.imread(obj_name)) 66 | cv2.imshow('file', cv2.imread(file_name)) 67 | cv2.waitKey() 68 | -------------------------------------------------------------------------------- /libs/crafting_table/assembly_line.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-7-27 下午5:15 3 | # @File assembly.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | 7 | import tensorflow as tf 8 | import os 9 | import cv2 10 | 11 | 12 | class AssemblyLine(object): 13 | def __init__(self, config, graph): 14 | self.sess = tf.Session(graph=graph, config=config) 15 | self.iter_num = 0 16 | self.summary_writer = None 17 | 18 | self.sess.as_default() 19 | 20 | def create_summary(self, summ_dict,log_path): 21 | # summ_dict = self.network.get_summary() 22 | merged = None 23 | for key in summ_dict: 24 | tf.summary.scalar(key, summ_dict[key]) 25 | merged = tf.summary.merge_all() 26 | writer = tf.summary.FileWriter(log_path, self.sess.graph) 27 | self.summary_writer = writer 28 | return merged 29 | 30 | def write_summary(self, mg): 31 | self.summary_writer.add_summary(mg, self.iter_num) 32 | 33 | def close_summary_writer(self): 34 | if self.summary_writer is None: 35 | print('Error in close writer...') 36 | else: 37 | self.summary_writer.close() 38 | @staticmethod 39 | def average_gradients(tower_grads): 40 | average_grads = list() 41 | for grad_and_vars in zip(*tower_grads): 42 | grads = [tf.expand_dims(g, 0) for g, _ in grad_and_vars] 43 | grads = tf.concat(grads, axis=0) 44 | grad = tf.reduce_mean(grads,axis=0) 45 | grad_and_var = (grad, grad_and_vars[0][1]) 46 | average_grads.append(grad_and_var) 47 | return average_grads 48 | 49 | def structure_train_context(self): 50 | raise NotImplementedError('Must be subclassed.') 51 | 52 | def restore_test_context(self): 53 | raise NotImplementedError('Must be subclassed.') 54 | 55 | def get_saver(self, vars, max_to_keep=100): 56 | return tf.train.Saver(vars, max_to_keep=max_to_keep) 57 | 58 | def save_model(self, saver, save_path_name, write_meta_graph=True): 59 | self.sess.as_default() 60 | saver.save(self.sess, save_path_name, write_meta_graph=write_meta_graph) 61 | 62 | def restore_model(self, model_name, parameter_name): 63 | saver = tf.train.import_meta_graph(model_name) 64 | graph = tf.get_default_graph() 65 | self.sess.run(tf.global_variables_initializer()) 66 | saver.restore(self.sess, parameter_name) 67 | return graph 68 | -------------------------------------------------------------------------------- /libs/logger/factory.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-7-27 下午5:15 3 | # @File factory.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | 7 | from .DCGAN_data_stream import DCGAN_get_pipeline 8 | from .CPD_stream import ground_truth2feature_map 9 | import scipy.io as sio 10 | import cv2 11 | from os.path import join 12 | import numpy as np 13 | import copy 14 | import random 15 | import time 16 | 17 | CPD_mat = sio.loadmat('./data/img_data/gt.mat') 18 | sampling_list = range(CPD_mat['imnames'].shape[1]) 19 | random.shuffle(sampling_list) 20 | train_sampling_list = sampling_list[:-100000] 21 | val_sampling_list = sampling_list[-100000:-1] 22 | 23 | 24 | def random_list(): 25 | random.shuffle(train_sampling_list) 26 | 27 | 28 | def get_sample_tensor(model_name, sess=None, propose=None, batch_size=None, filename=None): 29 | if model_name == 'DCGAN': 30 | return DCGAN_get_pipeline(sess, propose, batch_size, filename) 31 | elif model_name == 'CPD': 32 | t0 = time.time() 33 | img_batch = None 34 | dicts = list() 35 | if not batch_size is None: 36 | for i in range(batch_size[0], batch_size[1]): 37 | img = cv2.imread( 38 | join('/home/cj3/Downloads/im/SynthText', 39 | CPD_mat['imnames'][0][train_sampling_list[i] if filename == 'train' else 40 | val_sampling_list[i]][0].encode('gb18030'))) 41 | img_height, img_width = img.shape[0:2] 42 | img = cv2.resize(img, (512, 512)) 43 | img = img[np.newaxis, :] 44 | 45 | gt_array = copy.deepcopy(CPD_mat['wordBB'][0][train_sampling_list[i] if filename == 'train' else 46 | val_sampling_list[i]]) 47 | gt_array[0] = gt_array[0] * 512. / img_width 48 | gt_array[1] = gt_array[1] * 512. / img_height 49 | gt_array.astype(np.int32) 50 | if len(gt_array.shape) < 3: 51 | gt_array = gt_array.reshape((gt_array.shape[0], gt_array.shape[1], 1)) 52 | # continue 53 | img_batch = img if img_batch is None else np.append(img_batch, img, axis=0) 54 | dicts.append(ground_truth2feature_map(gt_array)) 55 | 56 | else: 57 | img = cv2.imread(join('/home/cj3/Downloads/im/SynthText', CPD_mat['imnames'][0][0][0].encode('gb18030'))) 58 | img_height, img_width = img.shape[0:2] 59 | img = cv2.resize(img, (512, 512)) 60 | img = img[np.newaxis, :] 61 | img_batch = img 62 | 63 | gt_array = CPD_mat['wordBB'][0][0] 64 | gt_array[0] = gt_array[0] * 512. / img_width 65 | gt_array[1] = gt_array[1] * 512. / img_height 66 | gt_array.astype(np.int32) 67 | dicts.append(ground_truth2feature_map(gt_array)) 68 | 69 | # print('propcess spend %fs for 8 imgs.' % (time.time() - t0)) 70 | return dicts, img_batch 71 | -------------------------------------------------------------------------------- /libs/logger/data_pipeline.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-7-27 下午5:15 3 | # @File data_pipline.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | 7 | import numpy as np 8 | import tensorflow as tf 9 | import cv2 10 | from os.path import join 11 | import os 12 | 13 | 14 | class TfWriter(object): 15 | def __init__(self, fixed_height=None, fixed_width=None, fixed_chanel=None): 16 | self.fixed_height = fixed_height 17 | self.fixed_width = fixed_width 18 | self.fixed_chanel = fixed_chanel 19 | 20 | @staticmethod 21 | def _int64_feature(value): 22 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 23 | 24 | @staticmethod 25 | def _bytes_feature(value): 26 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 27 | 28 | def write_tfrecords(self, im, wirter): 29 | if not self.fixed_height is None: 30 | im = cv2.resize(im, (self.fixed_width, self.fixed_height)) 31 | im_raw = im.astype(np.uint8).tostring() 32 | example = tf.train.Example(features=tf.train.Features(feature={ 33 | 'im_raw': self._bytes_feature(im_raw)})) 34 | wirter.write(example.SerializeToString()) 35 | 36 | 37 | class TfReader(object): 38 | def __init__(self, fixed_height=None, fixed_width=None, fixed_chanel=None): 39 | self.fixed_height = fixed_height 40 | self.fixed_width = fixed_width 41 | self.fixed_chanel = fixed_chanel 42 | 43 | def read_and_decode(self, filename_queue): 44 | reader = tf.TFRecordReader() 45 | _, serialized_example = reader.read(filename_queue) 46 | features = tf.parse_single_example(serialized_example, features={ 47 | 'im_raw': tf.FixedLenFeature([], tf.string) 48 | }) 49 | image = tf.decode_raw(features['im_raw'], tf.uint8) 50 | if not self.fixed_width is None: 51 | image = tf.reshape(image, [self.fixed_height, self.fixed_width, self.fixed_chanel]) 52 | 53 | return image 54 | 55 | def load_sample(self, file_path, batch_size, num_epochs): 56 | if not num_epochs: 57 | num_epochs = None 58 | filename_queue = tf.train.string_input_producer([file_path], num_epochs=num_epochs) 59 | image = self.read_and_decode(filename_queue) 60 | images = tf.train.shuffle_batch([image], 61 | batch_size=batch_size, 62 | num_threads=64, 63 | capacity=1000 + 3 * batch_size, 64 | min_after_dequeue=1000 65 | ) 66 | return images 67 | 68 | 69 | class DataPipeline(object): 70 | def __init__(self, data_type, propose, root_path, train_dir='train', val_dir='val', test_dir='test'): 71 | self.data_type = data_type 72 | self.propose = propose 73 | self.root_path = root_path 74 | self.xxx_dir = {'train': train_dir, 'val': val_dir, 'test': test_dir} 75 | 76 | def set_dataType(self, data_type): 77 | self.data_type = data_type 78 | 79 | def set_propose(self, propose): 80 | self.propose = propose 81 | 82 | def imgs2tfrecords(self, object_path, object_name, recorder=TfWriter(), flag=cv2.IMREAD_COLOR): 83 | writer = tf.python_io.TFRecordWriter(join(object_path, object_name + '.tfrecords')) 84 | file_path = join(self.root_path, self.xxx_dir[self.propose]) 85 | file_lines = os.listdir(file_path) 86 | for index, file_name in enumerate(file_lines): 87 | im = cv2.imread(join(file_path, file_name), flags=flag) 88 | if im is None: 89 | print('Error in reading %s...' % file_name) 90 | continue 91 | recorder.write_tfrecords(im, writer) 92 | if index % 1000 == 0: 93 | print('processing %d/%d...' % (index, len(file_lines))) 94 | print('writing end...') 95 | 96 | def tfrecords2imgs_tensor(self, sess, tf_data_name, batch_size, recorder=TfReader(), flag=cv2.IMREAD_COLOR): 97 | tfdata_path = join(self.root_path, self.xxx_dir[self.propose]) 98 | file_path_name = join(tfdata_path, tf_data_name + self.data_type) 99 | if not os.path.exists(file_path_name): 100 | raise IOError('No such file: \'%s\'' % file_path_name) 101 | images_tensor = recorder.load_sample(file_path_name, batch_size, None) 102 | return images_tensor 103 | 104 | @staticmethod 105 | def tensor2data(sess,tensor, normal=True): 106 | data = sess.run(tensor) 107 | data = data / 255. - 0.5 if normal else data 108 | return data 109 | -------------------------------------------------------------------------------- /libs/network/network.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-7-27 下午5:15 3 | # @File network.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | 7 | import tensorflow as tf 8 | from tensorflow.python.training.moving_averages import assign_moving_average 9 | 10 | 11 | def layer(op): 12 | def layer_decorate(self, *args, **kwargs): 13 | self.input = self.pre_process_tensor 14 | self.pre_process_tensor = op(self, self.input, *args[1:], **kwargs) 15 | if args[0] == 'save tensor': 16 | self.layers.append(self.pre_process_tensor) 17 | return self 18 | 19 | return layer_decorate 20 | 21 | 22 | def flattener(op): 23 | def tensor_flattener(self, *args): 24 | concat_tensor = tf.reshape(args[0], shape=[-1, args[1]]) 25 | self.input = self.pre_process_tensor 26 | self.pre_process_tensor = op(self, self.input, concat_tensor) 27 | return self 28 | 29 | return tensor_flattener 30 | 31 | 32 | class Network(object): 33 | def __init__(self, net_name): 34 | self.net_name = net_name 35 | self.pre_process_tensor = None 36 | self.input = None 37 | self.layers = [] 38 | 39 | def setup(self, x, scope_name, reuse=False): 40 | raise NotImplementedError('Must be subclassed.') 41 | 42 | def restore(self): 43 | raise NotImplementedError('Must be subclassed.') 44 | 45 | def get_summary(self): 46 | raise NotImplementedError('Must be subclassed.') 47 | 48 | def get_pred(self): 49 | raise NotImplementedError('Must be subclassed.') 50 | 51 | def structure_loss(self): 52 | raise NotImplementedError('Must be subclassed.') 53 | 54 | @staticmethod 55 | def define_optimizer(self, loss_dict): 56 | raise NotImplementedError('Must be subclassed.') 57 | 58 | def get_trainable_var(self, name): 59 | vars = [] 60 | for var in tf.global_variables(): 61 | if var.name.split('/')[0] == name and var.name.split('/')[1] != name: 62 | vars.append(var) 63 | return vars 64 | 65 | def feed(self, x, save_tensor): 66 | self.pre_process_tensor = x 67 | if save_tensor == 'save tensor': 68 | self.layers.append(self.pre_process_tensor) 69 | elif save_tensor == 'flatten tensor x2': 70 | self.pre_process_tensor = tf.reshape(self.pre_process_tensor, 71 | shape=[-1, 2]) 72 | elif save_tensor == 'flatten tensor x4': 73 | self.pre_process_tensor = tf.reshape(self.pre_process_tensor, 74 | shape=[-1, 4]) 75 | elif save_tensor == 'flatten tensor x1': 76 | self.pre_process_tensor = tf.reshape(self.pre_process_tensor, 77 | shape=[-1, 1]) 78 | return self 79 | 80 | def weight_var(self, shape, name): 81 | return tf.get_variable(name=name, shape=shape, initializer=tf.random_normal_initializer(mean=0., stddev=0.02)) 82 | 83 | def bias_var(self, shape, name): 84 | return tf.get_variable(name=name, shape=shape, initializer=tf.constant_initializer(0)) 85 | 86 | def layer_tensor_pop(self, index=-1): 87 | return self.layers.pop(index) 88 | 89 | def layer_tensor_demand(self, index=-1): 90 | return self.layers[index] 91 | 92 | @flattener 93 | def concat_tensor(self, tensor_org, tensor_cc): 94 | return tf.concat([tensor_org, tensor_cc], axis=0) 95 | 96 | @layer 97 | def normal(self, x, on_train, decay, axes, name_scale, name_offset, name_mean, name_var): 98 | # batch-normalization 99 | shape = x.get_shape().as_list()[-1] 100 | scale = tf.get_variable(name_scale, shape, initializer=tf.ones_initializer(), trainable=True) 101 | offset = tf.get_variable(name_offset, shape, initializer=tf.zeros_initializer(), trainable=True) 102 | variance_epsilon = 1e-7 103 | mean_p = tf.get_variable(name_mean, shape, initializer=tf.zeros_initializer(), trainable=False) 104 | var_p = tf.get_variable(name_var, shape, initializer=tf.ones_initializer(), trainable=False) 105 | 106 | # moving average 107 | def mean_var_with_update(): 108 | mean_ba, var_ba = tf.nn.moments(x, axes, name='moments') 109 | with tf.control_dependencies([assign_moving_average(mean_p, mean_ba, decay), 110 | assign_moving_average(var_p, var_ba, decay)]): 111 | return tf.identity(mean_ba), tf.identity(var_ba) 112 | 113 | # with tf.variable_scope('EMA'): 114 | mean, var = tf.cond(on_train, mean_var_with_update, lambda: (mean_p, var_p)) 115 | 116 | return tf.nn.batch_normalization(x, mean, var, offset, scale, variance_epsilon) 117 | 118 | @layer 119 | def deconv2d(self, x, output_size, k_h, k_w, d_h, d_w, name_W, name_b, padding='SAME'): 120 | w = self.weight_var([k_h, k_w, output_size[-1], x.get_shape().as_list()[-1]], name=name_W) 121 | deconv = tf.nn.conv2d_transpose(x, w, output_shape=output_size, strides=[1, d_h, d_w, 1], padding=padding) + \ 122 | self.bias_var([output_size[-1]], name=name_b) 123 | return deconv 124 | 125 | @layer 126 | def conv2d(self, x, output_dim, k_h, k_w, d_h, d_w, name_W, name_b, padding='SAME'): 127 | w = self.weight_var([k_h, k_w, x.get_shape().as_list()[-1], output_dim], name=name_W) 128 | conv = tf.nn.conv2d(x, w, strides=[1, d_h, d_w, 1], padding=padding) + \ 129 | self.bias_var([output_dim], name=name_b) 130 | return conv 131 | 132 | @layer 133 | def lrelu(self, x, leak=0.2): 134 | return tf.maximum(x, leak * x) 135 | 136 | @layer 137 | def relu(self, x): 138 | return tf.nn.relu(x) 139 | 140 | @layer 141 | def mulfc(self, x, output_dim, name_W, name_b): 142 | w = self.weight_var([x.get_shape().as_list()[-1], output_dim], name=name_W) 143 | fc = tf.matmul(x, w) + self.bias_var([output_dim], name=name_b) 144 | return fc 145 | 146 | @layer 147 | def max_pool2d(self, x, k_h, k_w, d_h, d_w, padding='SAME'): 148 | return tf.nn.max_pool(x, [1, k_h, k_w, 1], [1, d_h, d_w, 1], padding) 149 | 150 | @layer 151 | def reshape(self, x, out_size): 152 | return tf.reshape(x, shape=out_size) 153 | 154 | @layer 155 | def tanh(self, x, scale): 156 | return tf.nn.tanh(x) / scale 157 | 158 | @layer 159 | def sigmoid(self, x): 160 | return tf.nn.sigmoid(x) 161 | 162 | @layer 163 | def softmax(self, x): 164 | return tf.nn.softmax(x) 165 | -------------------------------------------------------------------------------- /libs/logger/CPD_stream.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-8-7 上午11:07 3 | # @File CPD_stream.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | 7 | import numpy as np 8 | from .data_pipeline import TfReader 9 | from .data_pipeline import TfWriter 10 | from ..tools import gadget 11 | import time 12 | import copy 13 | from multiprocessing import Process, Queue 14 | 15 | 16 | class CPDReader(TfReader): 17 | def __init__(self): 18 | TfReader.__init__(self, 32, 20, 1) 19 | 20 | 21 | class CPDWriter(TfWriter): 22 | def __init__(self): 23 | TfWriter.__init__(self, 32, 20, 1) 24 | 25 | 26 | def init_CPD_mask(shape, channel, mask_type): 27 | # print('init mask of %s' % mask_type) 28 | gt_mask = None 29 | if mask_type == 'cls': 30 | for i in range(channel): 31 | if i % 2 == 0: 32 | gt_mask = np.ones(shape=shape, dtype=np.float32) if gt_mask is None \ 33 | else np.append(gt_mask, np.ones(shape=shape, dtype=np.float32), axis=3) 34 | else: 35 | gt_mask = np.zeros(shape=shape, dtype=np.float32) if gt_mask is None \ 36 | else np.append(gt_mask, np.zeros(shape=shape, dtype=np.float32), axis=3) 37 | elif mask_type == 'reg': 38 | for i in range(channel): 39 | gt_mask = np.zeros(shape=shape, dtype=np.float32) if gt_mask is None \ 40 | else np.append(gt_mask, np.zeros(shape=shape, dtype=np.float32), axis=3) 41 | elif mask_type == 'seg': 42 | for i in range(channel): 43 | gt_mask = np.zeros(shape=shape, dtype=np.float32) if gt_mask is None \ 44 | else np.append(gt_mask, np.zeros(shape=shape, dtype=np.float32), axis=3) 45 | return gt_mask 46 | 47 | 48 | # gt_rects:[[cx,cy,ss,ss,point_type]...] 49 | def gt_array2gt_rects(gt_array): 50 | gt_rects = list() 51 | for i in range(gt_array.shape[2]): 52 | if np.min(gt_array[:, :, i]) > 512: 53 | continue 54 | ssl = np.sqrt((gt_array[0, 3, i] - gt_array[0, 0, i]) ** 2 + (gt_array[1, 3, i] - gt_array[1, 0, i]) ** 2) 55 | sst = np.sqrt((gt_array[0, 1, i] - gt_array[0, 0, i]) ** 2 + (gt_array[1, 1, i] - gt_array[1, 0, i]) ** 2) 56 | ssr = np.sqrt((gt_array[0, 2, i] - gt_array[0, 1, i]) ** 2 + (gt_array[1, 2, i] - gt_array[1, 1, i]) ** 2) 57 | ssb = np.sqrt((gt_array[0, 3, i] - gt_array[0, 2, i]) ** 2 + (gt_array[1, 3, i] - gt_array[1, 2, i]) ** 2) 58 | ss = np.sort([ssl, sst, ssr, ssb])[0] 59 | for point_type in range(4): 60 | gt_rects.append([gt_array[0, point_type, i], gt_array[1, point_type, i], ss, ss, point_type]) 61 | return gt_rects 62 | 63 | 64 | gt_cls_mask_f11 = init_CPD_mask([1, 4, 4, 1], 32, 'cls') 65 | gt_cls_mask_f10 = init_CPD_mask([1, 6, 6, 1], 32, 'cls') 66 | gt_cls_mask_f9 = init_CPD_mask([1, 8, 8, 1], 32, 'cls') 67 | gt_cls_mask_f8 = init_CPD_mask([1, 16, 16, 1], 32, 'cls') 68 | gt_cls_mask_f7 = init_CPD_mask([1, 32, 32, 1], 32, 'cls') 69 | gt_cls_mask_f4 = init_CPD_mask([1, 64, 64, 1], 32, 'cls') 70 | gt_cls_mask_f3 = init_CPD_mask([1, 128, 128, 1], 48, 'cls') 71 | 72 | gt_reg_mask_f11 = init_CPD_mask([1, 4, 4, 1], 64, 'reg') 73 | gt_reg_mask_f10 = init_CPD_mask([1, 6, 6, 1], 64, 'reg') 74 | gt_reg_mask_f9 = init_CPD_mask([1, 8, 8, 1], 64, 'reg') 75 | gt_reg_mask_f8 = init_CPD_mask([1, 16, 16, 1], 64, 'reg') 76 | gt_reg_mask_f7 = init_CPD_mask([1, 32, 32, 1], 64, 'reg') 77 | gt_reg_mask_f4 = init_CPD_mask([1, 64, 64, 1], 64, 'reg') 78 | gt_reg_mask_f3 = init_CPD_mask([1, 128, 128, 1], 96, 'reg') 79 | 80 | gt_seg_mask = init_CPD_mask([1, 512, 512, 1], 4, 'seg') 81 | 82 | 83 | # gt_array:A 3d tensor,[2,4,None] 84 | def ground_truth2feature_map(gt_array): 85 | global gt_cls_mask_f11, gt_cls_mask_f10, gt_cls_mask_f9, gt_cls_mask_f8, gt_cls_mask_f7, gt_cls_mask_f4, \ 86 | gt_cls_mask_f3, gt_reg_mask_f11, gt_reg_mask_f10, gt_reg_mask_f9, gt_reg_mask_f8, gt_reg_mask_f7, gt_reg_mask_f4, \ 87 | gt_reg_mask_f3, gt_seg_mask 88 | 89 | gt_rects = gt_array2gt_rects(gt_array) 90 | default_boxes_f = list() 91 | position_f = list() 92 | gt_rects_f = list() 93 | gt_boxes = list() 94 | scale_list_f = list() 95 | cls_maps = [copy.deepcopy(gt_cls_mask_f11), copy.deepcopy(gt_cls_mask_f10), copy.deepcopy(gt_cls_mask_f9), 96 | copy.deepcopy(gt_cls_mask_f8), copy.deepcopy(gt_cls_mask_f7), copy.deepcopy(gt_cls_mask_f4), 97 | copy.deepcopy(gt_cls_mask_f3)] 98 | reg_maps = [copy.deepcopy(gt_reg_mask_f11), copy.deepcopy(gt_reg_mask_f10), copy.deepcopy(gt_reg_mask_f9), 99 | copy.deepcopy(gt_reg_mask_f8), copy.deepcopy(gt_reg_mask_f7), copy.deepcopy(gt_reg_mask_f4), 100 | copy.deepcopy(gt_reg_mask_f3)] 101 | map_size_table = [4, 6, 8, 16, 32, 64, 128] 102 | scale_table = [[184, 208, 232, 256], 103 | [124, 136, 148, 160], 104 | [88, 96, 104, 112], 105 | [56, 64, 72, 80], 106 | [36, 40, 44, 48], 107 | [20, 24, 28, 32], 108 | [4, 8, 6, 10, 12, 16]] 109 | strides = [128, 85.3333, 64, 32, 16, 8, 4] 110 | for gt_rect in gt_rects: 111 | for map_index in range(7): # 7 different resolving map from f11 to f3 112 | default_boxes, position, gt_re, gt_box, scale_list = \ 113 | gadget.project_feature_map_simple(gt_rect[0:4], 114 | map_size_table[map_index], 115 | map_size_table[map_index], 116 | scale_table[map_index], 117 | strides[map_index], 118 | gt_rect[4], 119 | map_index) 120 | default_boxes_f += default_boxes 121 | position_f += position 122 | gt_rects_f += gt_re 123 | gt_boxes += gt_box 124 | scale_list_f += scale_list 125 | 126 | # test 127 | iou_matrix = gadget.calcul_matrix_iou(np.array(default_boxes_f), np.array(gt_boxes)) 128 | cls_maps, reg_maps = gadget.project_feature_map_iou(cls_maps, reg_maps, np.array(default_boxes_f), 129 | np.array(iou_matrix), 130 | np.array(position_f), np.array(gt_rects_f), 131 | np.array(scale_list_f)) 132 | gt_seg_mask_counterpart = gadget.project_feature_map_seg(gt_array, copy.deepcopy(gt_seg_mask)) 133 | 134 | return {'cls_mask': cls_maps, 135 | 'reg_mask': reg_maps, 136 | 'seg_mask': [gt_seg_mask_counterpart]} 137 | -------------------------------------------------------------------------------- /libs/crafting_table/DCGAN_line.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-7-27 下午5:15 3 | # @File DCGAN_line.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | 7 | from .assembly_line import AssemblyLine 8 | import tensorflow as tf 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import matplotlib.gridspec as gridspec 12 | import cv2 13 | from os.path import join 14 | import os 15 | from ..network.factory import get_network 16 | from ..network.factory import get_network_name 17 | from ..logger.factory import get_sample_tensor 18 | from ..logger.data_pipeline import DataPipeline 19 | from ..tools.gadget import mk_dir 20 | from ..tools.overfitting_monitor import random_sampling 21 | 22 | 23 | class DCGANLine(AssemblyLine): 24 | def __init__(self, propose, info_dict): 25 | if propose == 'train': 26 | AssemblyLine.__init__(self, DCGANLine.get_config(), get_network('DCGAN')) 27 | self.inster_number = info_dict['inster_number'] 28 | self.annotation = info_dict['annotation'] 29 | self.batch_size = info_dict['batch_size'] 30 | self.val_size = info_dict['val_size'] 31 | self.IMG_CHANEL = self.network.IMG_CHANEL 32 | elif propose == 'test': 33 | AssemblyLine.__init__(self, DCGANLine.get_config(), None) 34 | self.model_name = info_dict['model_name'] 35 | self.parameter_name = info_dict['parameter_name'] 36 | self.batch_size = info_dict['batch_size'] 37 | self.Z_dim = 100 38 | self.IMG_CHANEL = 1 39 | else: 40 | raise ValueError('No type like:%s of DCGANLine class...' % propose) 41 | 42 | @staticmethod 43 | def get_config(): 44 | config = tf.ConfigProto() 45 | config.gpu_options.allow_growth = True 46 | return config 47 | 48 | def sample_Z(self, m, n): 49 | return np.random.uniform(-1., 1., size=[m, n]) 50 | 51 | def plot(self, samples): 52 | fig = plt.figure(figsize=(4, 4)) 53 | gs = gridspec.GridSpec(4, 4) 54 | gs.update(wspace=0.05, hspace=0.05) 55 | for i, sample in enumerate(samples): # [i,samples[i]] imax=16 56 | sample = sample + 0.5 57 | ax = plt.subplot(gs[i]) 58 | plt.axis('off') 59 | ax.set_xticklabels([]) 60 | ax.set_aspect('equal') 61 | if self.IMG_CHANEL == 1: 62 | plt.imshow(sample.reshape(32, 20), cmap='Greys_r') 63 | else: 64 | plt.imshow(sample.reshape(32, 20, self.network.IMG_CHANEL), cmap='Greys_r') 65 | return fig 66 | 67 | def structure_train_context(self): 68 | saver = self.get_saver(self.network.get_trainable_var(self.network.net_name[0])) 69 | loss_dict = self.network.structure_loss() 70 | opti_dict = self.network.define_optimizer(loss_dict) 71 | merged = self.create_summary('.logs/log_num%d' % self.iter_num) 72 | with self.sess: 73 | self.sess.run(tf.global_variables_initializer()) 74 | self.save_model(saver, 75 | './model_DCGAN_num%d%s/iter_meta.ckpt' % (self.inster_number, self.annotation)) 76 | 77 | i = 0 78 | mk_dir('out_bank_CNN_num%d%s/' % (self.inster_number, self.annotation)) 79 | X_mb_tensor = get_sample_tensor('DCGAN', self.sess, 'train', self.batch_size, 80 | 'train_num%d' % self.inster_number) 81 | X_val_tensor = get_sample_tensor('DCGAN', self.sess, 'val', self.val_size, 82 | 'val_num%d' % self.inster_number) 83 | 84 | coord = tf.train.Coordinator() 85 | threads = tf.train.start_queue_runners(sess=self.sess, coord=coord) 86 | 87 | for iter in range(70000): 88 | if iter % 1000 == 0: 89 | self.iter_num = iter 90 | samples = self.sess.run(self.network.get_pred()['gen_im'], feed_dict={ 91 | self.network.Z: self.sample_Z(16, self.network.Z_dim), self.network.on_train: False, 92 | self.network.batch_pattern: 16}) # 16*784 93 | fig = self.plot(samples) 94 | plt.savefig('out_bank_CNN_num%d%s/' % (self.inster_number, self.annotation) + '/{}.png'.format( 95 | str(i).zfill(3)), 96 | bbox_inches='tight') 97 | i += 1 98 | plt.close(fig) 99 | X_mb = DataPipeline.tensor2data(self.sess,X_mb_tensor) 100 | 101 | _, D_loss_curr = self.sess.run([opti_dict['d_opti'], loss_dict['d_loss']], feed_dict={ 102 | self.network.X: X_mb, 103 | self.network.Z: self.sample_Z(self.batch_size, self.network.Z_dim), 104 | self.network.on_train: True, 105 | self.network.batch_pattern: self.batch_size}) 106 | 107 | _, G_loss_curr = self.sess.run([opti_dict['g_opti'], loss_dict['g_loss']], feed_dict={ 108 | self.network.Z: self.sample_Z(self.batch_size, self.network.Z_dim), 109 | self.network.on_train: True, 110 | self.network.batch_pattern: self.batch_size}) 111 | # if iter % 100 == 0: 112 | # print('Iter:%d G_loss:%f,D_loss:%f' % (iter, G_loss_curr, D_loss_curr)) 113 | if iter % 1000 == 0: 114 | # overfitting record 115 | j = 0 116 | print('Iter:%d G_loss:%f,D_loss:%f' % (iter, G_loss_curr, D_loss_curr)) 117 | samples = self.sess.run(self.network.get_pred()['gen_im'], feed_dict={ 118 | self.network.Z: self.sample_Z(1000, self.network.Z_dim), 119 | self.network.on_train: False, self.network.batch_pattern: 1000}) 120 | PATH = './temp_CNN_num%d' % self.inster_number 121 | mk_dir(PATH) 122 | for temp_file in os.listdir(PATH): 123 | os.remove(join(PATH,temp_file)) 124 | for line in range(1000): 125 | cv2.imwrite(join(PATH, '%08d.jpg' % j), 126 | np.round((samples[line, :, :, 0] + 0.5) * 255)) 127 | j += 1 128 | iter_num = 10 129 | file_names = os.listdir(PATH) 130 | file_names = [os.path.join(PATH, a) for a in file_names] 131 | min_dis, ave_dis, _, _ = random_sampling(file_names, iter_num) 132 | self.sess.run(tf.assign(self.network.Min_distance, min_dis)) 133 | self.sess.run(tf.assign(self.network.Ave_distance, ave_dis)) 134 | # loss record 135 | X_val = DataPipeline.tensor2data(self.sess,X_val_tensor) 136 | mg = self.sess.run(merged, feed_dict={ 137 | self.network.X: X_val, 138 | self.network.on_train: False, 139 | self.network.Z: self.sample_Z(self.val_size, self.network.Z_dim), 140 | self.network.batch_pattern: self.val_size}) 141 | self.write_summary(mg) 142 | 143 | gl_step = self.sess.run(self.network.global_step) 144 | if gl_step % 10000 == 0: 145 | self.save_model(saver, './model_DCGAN_num%d%s/iter_%d_num%d.ckpt' \ 146 | % (self.inster_number, self.annotation, gl_step, self.inster_number), 147 | write_meta_graph=False) 148 | 149 | coord.request_stop() 150 | coord.join(threads) 151 | self.close_summary_writer() 152 | self.sess.close() 153 | 154 | def restore_test_context(self, print_im=True): 155 | mk_dir('./out') 156 | graph = self.restore_model(self.model_name, self.parameter_name) 157 | Z = graph.get_operation_by_name('Z').outputs[0] 158 | on_train = graph.get_operation_by_name('on_train').outputs[0] 159 | batch_size = graph.get_operation_by_name('batch_size').outputs[0] 160 | gen_im = graph.get_collection('out', scope=get_network_name('DCGAN')[0])[0] 161 | # gen_vars = self.network.get_trainable_var(self.network.net_name[0]) 162 | if print_im: 163 | for iter_num in range(8): 164 | print('print img %d/%d' % (iter_num+1, 8)) 165 | samples = self.sess.run(gen_im, feed_dict={ 166 | Z: self.sample_Z(16, 100), on_train: False, 167 | batch_size: 16}) 168 | fig = self.plot(samples) 169 | plt.savefig('out/' + '%08d.png' % iter_num, bbox_inches='tight') 170 | plt.close(fig) 171 | samples = self.sess.run(gen_im, feed_dict={Z: self.sample_Z(self.batch_size, 100), 172 | on_train: False, batch_size: self.batch_size}) 173 | self.sess.close() 174 | return samples 175 | -------------------------------------------------------------------------------- /libs/network/Backbone_net.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-8-3 上午10:47 3 | # @File Backbone_net.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | 7 | import tensorflow as tf 8 | from .network import Network 9 | import tensorflow.contrib.slim as slim 10 | import tensorflow.contrib.slim.nets 11 | import tensorflow.contrib.layers as layers 12 | import os 13 | import numpy as np 14 | 15 | tf.reset_default_graph() 16 | 17 | 18 | class Backbone_net(Network): 19 | def __init__(self, reuse=False): 20 | self.graph = tf.get_default_graph() 21 | Network.__init__(self, 'backbone') 22 | self.IM_HEIGHT = 512 23 | self.IM_WIDTH = 512 24 | self.IM_CHANEL = 3 25 | self.global_reuse = reuse 26 | self.global_step = tf.Variable(0, trainable=False) 27 | self.X = tf.placeholder(tf.float32, shape=[None, self.IM_HEIGHT, self.IM_WIDTH, self.IM_CHANEL], name='X') 28 | self.Ycls = tf.placeholder(tf.float32, shape=[None, 2], name='Ycls') 29 | self.Yreg = tf.placeholder(tf.float32, shape=[None, 4], name='Yreg') 30 | self.Yseg = tf.placeholder(tf.float32, shape=[None, 1], name='Yseg') 31 | 32 | self.on_train = tf.placeholder(tf.bool, [], name='on_train') 33 | self.batch_size = tf.placeholder(tf.int32, name='batch_size') 34 | self.detect_dict = {} 35 | self.off_dict = {} 36 | self.seg = None 37 | 38 | self.vgg16_variables = None 39 | self.vgg16_initializer = None 40 | 41 | self.loss_dict = None 42 | self.lamd = 10 43 | 44 | self.setup(self.X, 'backbone') 45 | 46 | def setup(self, x, scope_name, reuse=False): 47 | conv5_3 = self.load_vgg_model() 48 | with tf.variable_scope(scope_name) as scope: 49 | if self.global_reuse or reuse: 50 | scope.reuse_variables() 51 | self.feed(conv5_3, 'abandon tensor') \ 52 | .relu('abandon tensor') \ 53 | .conv2d('abandon tensor', 1024, 3, 3, 1, 1, 'conv6_W', 'conv6_b') \ 54 | .relu('abandon tensor') \ 55 | .conv2d('abandon tensor', 1024, 1, 1, 1, 1, 'conv7_W', 'conv7_b') \ 56 | .relu('save tensor') \ 57 | .conv2d('abandon tensor', 256, 1, 1, 1, 1, 'conv8_1W', 'conv8_1b') \ 58 | .relu('abandon tensor') \ 59 | .conv2d('abandon tensor', 512, 3, 3, 2, 2, 'conv8_2W', 'conv8_2b') \ 60 | .relu('save tensor') \ 61 | .conv2d('abandon tensor', 128, 1, 1, 1, 1, 'conv9_1W', 'conv9_1b') \ 62 | .relu('abandon tensor') \ 63 | .conv2d('abandon tensor', 256, 3, 3, 2, 2, 'conv9_2W', 'conv9_2b') \ 64 | .relu('save tensor') \ 65 | .conv2d('abandon tensor', 128, 1, 1, 1, 1, 'conv10_1W', 'conv10_1b') \ 66 | .relu('abandon tensor') \ 67 | .conv2d('abandon tensor', 256, 3, 3, 1, 1, 'conv10_2W', 'conv10_2b', 'VALID') \ 68 | .relu('save tensor') \ 69 | .conv2d('abandon tensor', 128, 1, 1, 1, 1, 'conv11_1W', 'conv11_1b') \ 70 | .relu('abandon tensor') \ 71 | .conv2d('abandon tensor', 256, 3, 3, 1, 1, 'conv11_2W', 'conv11_2b', 'VALID') \ 72 | .relu('save tensor') 73 | f11 = self.layer_tensor_demand() 74 | f10 = self.deconvolution_model([self.batch_size, 6, 6, 256], f11, self.layers[5], 'deconv_f10', '1X1') 75 | f9 = self.deconvolution_model([self.batch_size, 8, 8, 256], f10, self.layers[4], 'deconv_f9', '1X1') 76 | f8 = self.deconvolution_model([self.batch_size, 16, 16, 256], f9, self.layers[3], 'deconv_f8') 77 | f7 = self.deconvolution_model([self.batch_size, 32, 32, 256], f8, self.layers[2], 'deconv_f7') 78 | f4 = self.deconvolution_model([self.batch_size, 64, 64, 256], f7, self.layers[1], 'deconv_f4') 79 | f3 = self.deconvolution_model([self.batch_size, 128, 128, 256], f4, self.layers[0], 'deconv_f3') 80 | 81 | f_mix = list() 82 | f_mix.append({'tensor': f11, 'num': 4, 'name': 'f11_CPD'}) 83 | f_mix.append({'tensor': f10, 'num': 4, 'name': 'f10_CPD'}) 84 | f_mix.append({'tensor': f9, 'num': 4, 'name': 'f9_CPD'}) 85 | f_mix.append({'tensor': f8, 'num': 4, 'name': 'f8_CPD'}) 86 | f_mix.append({'tensor': f7, 'num': 4, 'name': 'f7_CPD'}) 87 | f_mix.append({'tensor': f4, 'num': 4, 'name': 'f4_CPD'}) 88 | f_mix.append({'tensor': f3, 'num': 6, 'name': 'f3_CPD'}) 89 | self.setup_corner_point_dect(f_mix) 90 | self.setup_position_sen_seg(f_mix) 91 | a = 1 92 | 93 | def get_summary(self): 94 | if self.loss_dict is None: 95 | raise ValueError('loss is None...') 96 | return { 97 | 'total': self.loss_dict['cls loss'] + self.loss_dict['reg loss'] + self.loss_dict['seg loss'] * self.lamd, 98 | 'cls loss': self.loss_dict['cls loss'], 99 | 'reg loss': self.loss_dict['reg loss'], 100 | 'seg loss': self.loss_dict['seg loss']} 101 | 102 | # def flatten_tensor(self, tensor): 103 | # return tf.reshape(tensor, [-1, tensor.get_shape().as_list()[-1]]) 104 | 105 | def get_pred(self): 106 | return self.detect_dict, self.off_dict, self.seg 107 | 108 | def structure_loss(self): 109 | self.feed(self.detect_dict['f11_CPD'], 'flatten tensor x2') \ 110 | .concat_tensor(self.detect_dict['f10_CPD'], 2) \ 111 | .concat_tensor(self.detect_dict['f9_CPD'], 2) \ 112 | .concat_tensor(self.detect_dict['f8_CPD'], 2) \ 113 | .concat_tensor(self.detect_dict['f7_CPD'], 2) \ 114 | .concat_tensor(self.detect_dict['f4_CPD'], 2) \ 115 | .concat_tensor(self.detect_dict['f3_CPD'], 2) 116 | flatten_pred_cls = self.pre_process_tensor 117 | self.feed(self.off_dict['f11_CPD'], 'flatten tensor x4') \ 118 | .concat_tensor(self.off_dict['f10_CPD'], 4) \ 119 | .concat_tensor(self.off_dict['f9_CPD'], 4) \ 120 | .concat_tensor(self.off_dict['f8_CPD'], 4) \ 121 | .concat_tensor(self.off_dict['f7_CPD'], 4) \ 122 | .concat_tensor(self.off_dict['f4_CPD'], 4) \ 123 | .concat_tensor(self.off_dict['f3_CPD'], 4) 124 | flatten_pred_reg = self.pre_process_tensor 125 | pred_seg = self.seg 126 | self.feed(self.seg, 'flatten tensor x1') 127 | flatten_pred_seg = self.pre_process_tensor 128 | 129 | OHEM_mask = self.Ycls[:, 1] >= 1 130 | # OHEM_mask = tf.logical_or(OHEM_mask, self.Ycls[:, 1] >= 1) 131 | OHEM_mask = tf.reshape(tf.cast(OHEM_mask, dtype=tf.int32), shape=[-1, 1]) 132 | 133 | pos_num = tf.reduce_sum(OHEM_mask) 134 | neg_num = pos_num * 3 135 | flatten_pred_cls_neg = tf.where(tf.reshape(tf.cast(OHEM_mask, tf.bool), shape=[-1]), 136 | tf.zeros_like(flatten_pred_cls[:, 1], dtype=tf.float32), 137 | flatten_pred_cls[:, 1]) 138 | val, index = tf.nn.top_k(flatten_pred_cls_neg, k=tf.maximum(neg_num, 1)) 139 | cls_pos = tf.reshape(flatten_pred_cls[:, 1], shape=[-1, 1]) 140 | OHEM_mask_cls = tf.cast(OHEM_mask, dtype=tf.bool) 141 | OHEM_mask_cls = tf.logical_or(OHEM_mask_cls, cls_pos >= val[-1]) 142 | OHEM_mask_cls = tf.cast(OHEM_mask_cls, dtype=tf.float32) 143 | # data_num = tf.reduce_sum(OHEM_mask_cls) 144 | # cls loss 145 | epsilon = 1e-10 146 | loss_cls = -tf.reduce_sum(self.Ycls * tf.log(flatten_pred_cls + epsilon), axis=[1], 147 | keep_dims=True) * OHEM_mask_cls 148 | # reg loss 149 | delta_reg = tf.abs(flatten_pred_reg - self.Yreg) 150 | OHEM_mask = tf.cast(OHEM_mask, dtype=tf.float32) 151 | smooth_l1_sign = tf.cast(tf.reshape(delta_reg < 1, shape=[-1, 4]), dtype=tf.float32) 152 | loss_reg = tf.reduce_sum(0.5 * tf.pow(delta_reg, 2) * smooth_l1_sign + 153 | (delta_reg - 0.5) * (1 - smooth_l1_sign), axis=[1], keep_dims=True) * OHEM_mask 154 | # seg loss 155 | # loss_seg = tf.reduce_mean(1 - ((2 * self.Yseg * pred_seg) /(self.Yseg + pred_seg))) 156 | loss_seg = 1 - tf.reduce_sum((2 * (self.Yseg * flatten_pred_seg))) / \ 157 | tf.reduce_sum(self.Yseg + (flatten_pred_seg)) 158 | # loss_seg=tf.norm(self.Yseg-flatten_pred_seg)/5000 159 | 160 | self.loss_dict = {'cls loss': tf.reduce_sum(loss_cls) / (tf.cast(pos_num, tf.float32)), 161 | 'reg loss': tf.reduce_sum(loss_reg) / tf.cast(pos_num, tf.float32), 162 | 'seg loss': loss_seg} 163 | 164 | return self.loss_dict 165 | 166 | @staticmethod 167 | def define_optimizer(self, loss_dict): 168 | # backbone_vars = self.get_trainable_var('backbone') 169 | # vgg_vars = self.get_trainable_var('vgg_16') 170 | # total_vars = backbone_vars + vgg_vars 171 | total_vars = Backbone_net.obtain_vars(self, ['backbone', 'vgg_16']) 172 | loss = loss_dict['cls loss'] + loss_dict['reg loss'] + self.lamd * loss_dict['seg loss'] 173 | optimizer = tf.train.AdamOptimizer(0.0001).minimize(loss, 174 | global_step=self.global_step, 175 | var_list=total_vars) 176 | return optimizer, total_vars 177 | 178 | @staticmethod 179 | def obtain_vars(self, var_names): 180 | vars = list() 181 | for var_name in var_names: 182 | vars = vars + self.get_trainable_var(var_name) 183 | return vars 184 | 185 | def setup_corner_point_dect(self, f): 186 | for f_in in f: 187 | scor, offs = self.detect_model(f_in['tensor'], f_in['num'], f_in['name']) 188 | self.detect_dict[f_in['name']] = scor 189 | self.off_dict[f_in['name']] = offs 190 | 191 | def setup_position_sen_seg(self, f): 192 | f_sum = None 193 | for f_index, f_in in enumerate(f): 194 | if f_index < 2: 195 | continue 196 | f_sum = tf.image.resize_images(f_in['tensor'], [128, 128]) if f_sum is None \ 197 | else f_sum + tf.image.resize_images(f_in['tensor'], [128, 128]) 198 | self.feed(f_sum, 'abandon tensor') \ 199 | .conv2d('abandon tensor', 256, 1, 1, 1, 1, 'PSS_conv_1_W', 'PSS_conv_1_b') \ 200 | .relu('abandon tensor') \ 201 | .deconv2d('abandon tensor', [self.batch_size, 256, 256, 256], 2, 2, 2, 2, 'PSS_deconv_1_W', 202 | 'PSS_deconv_1_b') \ 203 | .conv2d('abandon tensor', 256, 1, 1, 1, 1, 'PSS_conv_2_W', 'PSS_conv_2_b') \ 204 | .relu('abandon tensor') \ 205 | .deconv2d('abandon tensor', [self.batch_size, 512, 512, 4], 2, 2, 2, 2, 'PSS_deconv_2_W', 'PSS_deconv_2_b') \ 206 | .sigmoid('save tensor') 207 | pss_pred = self.layer_tensor_pop() 208 | self.seg = pss_pred 209 | 210 | def detect_model(self, f, default_box_num, name): 211 | self.feed(f, 'abandon tensor') \ 212 | .conv2d('abandon tensor', 256, 1, 1, 1, 1, name + '_conv1_1_W', name + '_conv1_1_b') \ 213 | .relu('abandon tensor') \ 214 | .conv2d('abandon tensor', 256, 1, 1, 1, 1, name + '_conv1_2_W', name + '_conv1_2_b') \ 215 | .relu('abandon tensor') \ 216 | .conv2d('abandon tensor', 256, 1, 1, 1, 1, name + '_conv1_3_W', name + '_conv1_3_b') \ 217 | .relu('save tensor') 218 | conv = self.layer_tensor_pop() 219 | self.feed(f, 'abandon tensor') \ 220 | .conv2d('abandon tensor', 256, 1, 1, 1, 1, name + '_conv2_1_W', name + '_conv2_1_b') \ 221 | .relu('save tensor') 222 | conv_short = self.layer_tensor_pop() 223 | 224 | self.feed(conv + conv_short, 'abandon tensor') \ 225 | .conv2d('abandon tensor', 256, 1, 1, 1, 1, name + '_conv3_W', name + '_conv3_b') \ 226 | .relu('save tensor') \ 227 | .conv2d('abandon tensor', default_box_num * 4 * 2, 1, 1, 1, 1, name + '_conv4_top_W', name + '_conv4_top_b') \ 228 | .softmax('save tensor') 229 | 230 | scor_pred = self.layer_tensor_pop() 231 | 232 | self.feed(self.layer_tensor_pop(), 'abandon tensor') \ 233 | .conv2d('save tensor', default_box_num * 4 * 4, 1, 1, 1, 1, name + '_conv4_bottom_W', 234 | name + '_conv4_bottom_b') 235 | 236 | offs_pred = self.layer_tensor_pop() 237 | 238 | return scor_pred, offs_pred 239 | 240 | def deconvolution_model(self, deconv_size, deconv_layer, feature_layer, name, scale_ratio='normal'): 241 | if scale_ratio == 'normal': 242 | self.feed(deconv_layer, 'abandon tensor') \ 243 | .deconv2d('abandon tensor', deconv_size, 2, 2, 2, 2, name + '_top_deconv_W', name + '_top_deconv_b') \ 244 | .conv2d('abandon tensor', 512, 3, 3, 1, 1, name + '_top_conv_W', name + '_top_conv_b') \ 245 | .normal('save tensor', self.on_train, 0.5, [0, 1, 2], 246 | name + '_top_scale', name + '_top_offset', name + '_top_mean', name + '_top_var') 247 | elif scale_ratio == '1X1': 248 | self.feed(deconv_layer, 'abandon tensor') \ 249 | .deconv2d('abandon tensor', deconv_size, 3, 3, 1, 1, name + '_top_deconv_W', name + '_top_deconv_b', 250 | 'VALID') \ 251 | .conv2d('abandon tensor', 512, 3, 3, 1, 1, name + '_top_conv_W', name + '_top_conv_b') \ 252 | .normal('save tensor', self.on_train, 0.5, [0, 1, 2], 253 | name + '_top_scale', name + '_top_offset', name + '_top_mean', name + '_top_var') 254 | top_tensor = self.layer_tensor_pop() 255 | 256 | self.feed(feature_layer, 'abandon tensor') \ 257 | .conv2d('abandon tensor', 512, 3, 3, 1, 1, name + '_bottom_deconv_W_1', name + '_bottom_deconv_b_1') \ 258 | .normal('abandon tensor', self.on_train, 0.5, [0, 1, 2], 259 | name + '_bottom_scale_1', name + '_bottom_offset_1', name + '_bottom_mean_1', 260 | name + '_bottom_var_1') \ 261 | .relu('abandon tensor') \ 262 | .conv2d('abandon tensor', 512, 3, 3, 1, 1, name + '_bottom_deconv_W_2', name + '_bottom_deconv_b_2') \ 263 | .normal('save tensor', self.on_train, 0.5, [0, 1, 2], 264 | name + '_bottom_scale_2', name + '_bottom_offset_2', name + '_bottom_mean_2', 265 | name + '_bottom_var_2') 266 | bottom_tensor = self.layer_tensor_pop() 267 | 268 | ES_layer = top_tensor + bottom_tensor 269 | 270 | self.feed(ES_layer, 'abandon tensor') \ 271 | .relu('save tensor') 272 | return self.layer_tensor_demand() 273 | 274 | def get_graph(self): 275 | return self.graph 276 | 277 | def load_vgg_model(self): 278 | graph = self.graph 279 | with tf.variable_scope('vgg_16') as scope: 280 | if self.global_reuse: 281 | scope.reuse_variables() 282 | self.feed(self.X, 'abandon tensor') \ 283 | .conv2d('abandon tensor', 64, 3, 3, 1, 1, 'conv1/conv1_1/weights', 'conv1/conv1_1/biases') \ 284 | .relu('abandon tensor') \ 285 | .conv2d('abandon tensor', 64, 3, 3, 1, 1, 'conv1/conv1_2/weights', 'conv1/conv1_2/biases') \ 286 | .relu('abandon tensor') \ 287 | .max_pool2d('abandon tensor', 2, 2, 2, 2) \ 288 | .conv2d('abandon tensor', 128, 3, 3, 1, 1, 'conv2/conv2_1/weights', 'conv2/conv2_1/biases') \ 289 | .relu('abandon tensor') \ 290 | .conv2d('abandon tensor', 128, 3, 3, 1, 1, 'conv2/conv2_2/weights', 'conv2/conv2_2/biases') \ 291 | .relu('abandon tensor') \ 292 | .max_pool2d('abandon tensor', 2, 2, 2, 2) \ 293 | .conv2d('abandon tensor', 256, 3, 3, 1, 1, 'conv3/conv3_1/weights', 'conv3/conv3_1/biases') \ 294 | .relu('abandon tensor') \ 295 | .conv2d('abandon tensor', 256, 3, 3, 1, 1, 'conv3/conv3_2/weights', 'conv3/conv3_2/biases') \ 296 | .relu('abandon tensor') \ 297 | .conv2d('abandon tensor', 256, 3, 3, 1, 1, 'conv3/conv3_3/weights', 'conv3/conv3_3/biases') \ 298 | .relu('save tensor') \ 299 | .max_pool2d('abandon tensor', 2, 2, 2, 2) \ 300 | .conv2d('abandon tensor', 512, 3, 3, 1, 1, 'conv4/conv4_1/weights', 'conv4/conv4_1/biases') \ 301 | .relu('abandon tensor') \ 302 | .conv2d('abandon tensor', 512, 3, 3, 1, 1, 'conv4/conv4_2/weights', 'conv4/conv4_2/biases') \ 303 | .relu('abandon tensor') \ 304 | .conv2d('abandon tensor', 512, 3, 3, 1, 1, 'conv4/conv4_3/weights', 'conv4/conv4_3/biases') \ 305 | .relu('save tensor') \ 306 | .max_pool2d('abandon tensor', 2, 2, 2, 2) \ 307 | .conv2d('abandon tensor', 512, 3, 3, 1, 1, 'conv5/conv5_1/weights', 'conv5/conv5_1/biases') \ 308 | .relu('abandon tensor') \ 309 | .conv2d('abandon tensor', 512, 3, 3, 1, 1, 'conv5/conv5_2/weights', 'conv5/conv5_2/biases') \ 310 | .relu('abandon tensor') \ 311 | .conv2d('abandon tensor', 512, 3, 3, 1, 1, 'conv5/conv5_3/weights', 'conv5/conv5_3/biases') \ 312 | .relu('save tensor') 313 | model_path = 'model/vgg_model/vgg_16.ckpt' 314 | assert (os.path.isfile(model_path)) 315 | 316 | variables_to_restore = tf.contrib.framework.get_variables_to_restore() 317 | variables_to_restore.pop(0) 318 | self.vgg16_variables = variables_to_restore 319 | init_vgg = tf.contrib.framework.assign_from_checkpoint_fn(model_path, variables_to_restore) 320 | self.vgg16_initializer = init_vgg 321 | 322 | # with tf.Session(graph=graph) as sess: 323 | # sess.run(tf.global_variables_initializer()) 324 | # init_vgg(sess) 325 | 326 | return self.layer_tensor_pop() 327 | -------------------------------------------------------------------------------- /libs/crafting_table/Backbone_line.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-8-9 下午3:00 3 | # @File Backbone_line.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | 7 | from .assembly_line import AssemblyLine 8 | from ..logger.factory import get_sample_tensor 9 | from ..logger.factory import random_list 10 | import tensorflow as tf 11 | import numpy as np 12 | import time 13 | import cv2 14 | import copy 15 | from ..network.factory import get_network 16 | from ..network.Backbone_net import Backbone_net 17 | from os.path import join 18 | import os 19 | 20 | 21 | def flatten_concat(stand_data): 22 | cls_data = None 23 | reg_data = None 24 | seg_data = None 25 | process_one_scale = False 26 | for i in range(7): 27 | cls_data_scale = None 28 | reg_data_scale = None 29 | seg_data_scale = None 30 | for batch_data in stand_data: 31 | cls_data_scale = batch_data['cls_mask'][i] if cls_data_scale is None \ 32 | else np.append(cls_data_scale, batch_data['cls_mask'][i], axis=0) 33 | reg_data_scale = batch_data['reg_mask'][i] if reg_data_scale is None \ 34 | else np.append(reg_data_scale, batch_data['reg_mask'][i], axis=0) 35 | if not process_one_scale: 36 | seg_data_scale = batch_data['seg_mask'][0] if seg_data_scale is None \ 37 | else np.append(seg_data_scale, batch_data['seg_mask'][0], axis=0) 38 | 39 | cls_data = np.reshape(cls_data_scale, [-1, 2]) if cls_data is None \ 40 | else np.append(cls_data, np.reshape(cls_data_scale, [-1, 2]), axis=0) 41 | reg_data = np.reshape(reg_data_scale, [-1, 4]) if reg_data is None \ 42 | else np.append(reg_data, np.reshape(reg_data_scale, [-1, 4]), axis=0) 43 | if not process_one_scale: 44 | seg_data = np.reshape(seg_data_scale, [-1, 1]) if seg_data is None \ 45 | else np.append(seg_data, np.reshape(seg_data_scale, [-1, 1]), axis=0) 46 | # seg_data = seg_data_scale if seg_data is None \ 47 | # else np.append(seg_data, seg_data_scale) 48 | process_one_scale = True 49 | return {'cls_data': cls_data, 50 | 'reg_data': reg_data, 51 | 'seg_data': seg_data} 52 | 53 | 54 | class Backbone_line(AssemblyLine): 55 | def __init__(self): 56 | AssemblyLine.__init__(self, self.get_config(), tf.get_default_graph()) 57 | self.batch_size = 8 58 | self.solo_batch_size = 8 59 | self.val_size = 2 60 | self.IMG_CHANEL = 3 61 | 62 | @staticmethod 63 | def get_config(): 64 | config = tf.ConfigProto(allow_soft_placement=True) 65 | # config.gpu_options.allow_growth = True 66 | return config 67 | 68 | def artificial_check(self, X_mb, Y_mb, scale_table): 69 | strides = [128, 85.3333, 64, 32, 16, 8, 4] 70 | # show imgs 71 | for img_index in range(X_mb.shape[0]): 72 | dyeing_X = copy.deepcopy(X_mb[img_index]) 73 | cv2.imshow('img', X_mb[img_index]) 74 | Y_m = Y_mb[img_index] 75 | seg_map = Y_m['seg_mask'][0][0] 76 | # segment check 77 | for channel in range(4): 78 | if channel < 3: 79 | dyeing_X[:, :, channel] += ((255 - dyeing_X[:, :, channel]) * (seg_map[:, :, channel])).astype( 80 | np.uint8) 81 | seg_map_rgb = copy.deepcopy(seg_map[:, :, channel] * 255).astype(np.uint8) 82 | seg_map_rgb = cv2.cvtColor(seg_map_rgb, cv2.COLOR_GRAY2BGR) 83 | else: 84 | dyeing_X[:, :, channel - 1] += ( 85 | (255 - dyeing_X[:, :, channel - 1]) * (seg_map[:, :, channel])).astype(np.uint8) 86 | dyeing_X[:, :, 0] += ((255 - dyeing_X[:, :, 0]) * (seg_map[:, :, channel])).astype(np.uint8) 87 | seg_map_rgb = copy.deepcopy(seg_map[:, :, channel] * 255).astype(np.uint8) 88 | seg_map_rgb = cv2.cvtColor(seg_map_rgb, cv2.COLOR_GRAY2BGR) 89 | cv2.imshow('seg map%d' % channel, seg_map_rgb) 90 | cv2.imshow('img_dyeing', dyeing_X) 91 | # classify check 92 | for scale in range(7): 93 | cls_map = Y_m['cls_mask'][scale][0] 94 | reg_map = Y_m['reg_mask'][scale][0] 95 | point_type_len = cls_map.shape[2] / 8 96 | color = tuple() 97 | for scale_type in range(cls_map.shape[2] / 2): 98 | default_box_width = scale_table[scale][scale_type % point_type_len] 99 | index = np.where(cls_map[:, :, scale_type * 2 + 1] > 0.5) 100 | index = np.array(index).T 101 | # index_img = index * int(256 / (2 ** scale)) + int(256 / (2 ** scale)) / 2 102 | index_img = index * int(strides[scale]) + int(strides[scale] / 2) 103 | 104 | if scale_type / point_type_len == 0: 105 | color = (255, 0, 0) 106 | elif scale_type / point_type_len == 1: 107 | color = (0, 255, 0) 108 | elif scale_type / point_type_len == 2: 109 | color = (0, 0, 255) 110 | elif scale_type / point_type_len == 3: 111 | color = (255, 0, 255) 112 | for orde, ind in enumerate(index_img): 113 | # regression check 114 | reg_val = reg_map[index[orde][0], index[orde][1], 4 * scale_type:4 * scale_type + 4] 115 | Dx = reg_val[0] * default_box_width 116 | Dy = reg_val[1] * default_box_width 117 | Ss = int(np.exp(reg_val[2]) * default_box_width) 118 | # Dy=0 119 | # Dx=0 120 | # Ss = default_box_width 121 | ind[1] = ind[1] + Dx 122 | ind[0] = ind[0] + Dy 123 | # detection 124 | cv2.circle(dyeing_X, (ind[1], ind[0]), 2, color, 2) 125 | rect_lt = (ind[1] - Ss // 2, ind[0] - Ss // 2) 126 | rect_rb = (ind[1] + Ss // 2, ind[0] + Ss // 2) 127 | cv2.rectangle(dyeing_X, rect_lt, rect_rb, color, 1) 128 | 129 | cv2.imshow('img_dyeing', dyeing_X) 130 | cv2.waitKey() 131 | 132 | def structure_train_context(self): 133 | opti = tf.train.AdamOptimizer(0.0001) 134 | tower_grads = list() 135 | device_num = 4 136 | self.solo_batch_size = self.batch_size // device_num 137 | nets = list() 138 | test_loss = None 139 | for i in range(device_num): 140 | with tf.device('/gpu:%d' % i): 141 | with tf.name_scope('GPU%d' % i): 142 | net = get_network('CSTR', global_reuse=False if i == 0 else True) 143 | nets.append(net) 144 | loss_dict = net.structure_loss() 145 | loss =loss_dict['cls loss'] + loss_dict['reg loss'] + net.lamd * loss_dict['seg loss'] 146 | # loss = loss_dict['cls loss'] 147 | if i ==0: 148 | test_loss = [loss_dict['cls loss'] , loss_dict['reg loss'] , net.lamd * loss_dict['seg loss']] 149 | grads = opti.compute_gradients(loss) 150 | tower_grads.append(grads) 151 | 152 | grads = self.average_gradients(tower_grads) 153 | apply_gradinet_op = opti.apply_gradients(grads) 154 | # test_op = tf.train.AdamOptimizer(0.0001).minimize(test_loss) 155 | 156 | self.sess.run(tf.global_variables_initializer()) 157 | vgg16_initializer = nets[0].vgg16_initializer 158 | vgg16_initializer(self.sess) 159 | 160 | # loss_dict_val = nets[0].structure_loss() 161 | 162 | offset = 0 163 | 164 | scale_table = [[184, 208, 232, 256], 165 | [124, 136, 148, 160], 166 | [88, 96, 104, 112], 167 | [56, 64, 72, 80], 168 | [36, 40, 44, 48], 169 | [20, 24, 28, 32], 170 | [4, 8, 6, 10, 12, 16]] 171 | 172 | merged = self.create_summary(nets[0].get_summary(), './data/logs/log_CSTR_2_cls_lr:ful_loss') 173 | for ep in range(10): 174 | random_list() 175 | for iter in range(85000): 176 | feed_dict_val = None 177 | if iter % 10 == 0: 178 | print('val testing...') 179 | feed_dict_val = dict() 180 | 181 | Y_val_mb, X_val_mb = get_sample_tensor('CPD', batch_size=[iter/10 * self.val_size, 182 | iter/10 * self.val_size + self.val_size], 183 | filename='val') 184 | 185 | # self.artificial_check(X_val_mb, Y_val_mb, scale_table) 186 | if X_val_mb is None: 187 | continue 188 | actually_batch_size = X_val_mb.shape[0] 189 | 190 | Y_val_mb_flatten = flatten_concat(Y_val_mb) 191 | feed_dict_val[nets[0].X] = X_val_mb 192 | feed_dict_val[nets[0].Ycls] = Y_val_mb_flatten['cls_data'] 193 | feed_dict_val[nets[0].Yreg] = Y_val_mb_flatten['reg_data'] 194 | feed_dict_val[nets[0].Yseg] = Y_val_mb_flatten['seg_data'] 195 | feed_dict_val[nets[0].on_train] = False 196 | feed_dict_val[nets[0].batch_size] = actually_batch_size 197 | 198 | los_cls, los_reg, los_seg, mg \ 199 | = self.sess.run([test_loss[0], 200 | test_loss[1], 201 | test_loss[2], 202 | merged], 203 | feed_dict=feed_dict_val) 204 | self.iter_num = iter 205 | self.write_summary(mg) 206 | print('iter step:%d total loss:%f cls loss:%f,reg loss:%f,seg loss:%f' 207 | % (iter, (los_cls + los_reg + los_seg), los_cls, los_reg, 208 | los_seg)) 209 | 210 | # training scope 211 | print('opti iter%d...' % iter) 212 | t_iter_start = time.time() 213 | stretch = self.batch_size 214 | while True: 215 | Y_train_mb, X_train_mb = get_sample_tensor('CPD', batch_size=[iter * self.batch_size + offset, 216 | iter * self.batch_size + stretch + offset], 217 | filename='train') 218 | # break 219 | if X_train_mb is None: 220 | continue 221 | actually_batch_size = X_train_mb.shape[0] 222 | if actually_batch_size < self.batch_size: 223 | stretch += self.batch_size - actually_batch_size 224 | print('Error!!!!!!!!!!!!!') 225 | continue 226 | break 227 | 228 | feed_dict = dict() 229 | # self.artificial_check(X_train_mb,Y_train_mb,scale_table) 230 | for device_id in range(device_num): 231 | # test = init_template_f_in(scale_table, [128, 85.3333, 64, 32, 16, 8, 4]) 232 | Y_tain_mb_flatten = flatten_concat(Y_train_mb[device_id * self.solo_batch_size: 233 | (device_id + 1) * self.solo_batch_size]) 234 | # print(np.sum(Y_tain_mb_flatten['cls_data'][:,1])) 235 | feed_dict[nets[device_id].X] = X_train_mb[device_id * self.solo_batch_size: 236 | (device_id + 1) * self.solo_batch_size] 237 | feed_dict[nets[device_id].Ycls] = Y_tain_mb_flatten['cls_data'] 238 | feed_dict[nets[device_id].Yreg] = Y_tain_mb_flatten['reg_data'] 239 | feed_dict[nets[device_id].Yseg] = Y_tain_mb_flatten['seg_data'] 240 | feed_dict[nets[device_id].on_train] = True 241 | feed_dict[nets[device_id].batch_size] = self.solo_batch_size 242 | 243 | t_iter_pre_opti = time.time() 244 | # train_loss = self.sess.run(test_loss[0]+test_loss[1]+test_loss[2], 245 | # feed_dict=feed_dict) 246 | self.sess.run(apply_gradinet_op, 247 | feed_dict=feed_dict) 248 | # train_loss = self.sess.run(test_loss[0]+test_loss[1]+test_loss[2], 249 | # feed_dict=feed_dict) 250 | print('optimizer update successful, total spend:%fs and opti spend:%fs this time...' 251 | % ((time.time() - t_iter_start), (time.time() - t_iter_pre_opti))) 252 | # print('optimizer update successful, iter:%d loss:%f' 253 | # % (iter, train_loss)) 254 | a = 1 255 | self.close_summary_writer() 256 | 257 | # scale_table = [[256, 232, 208, 184], 258 | # [124, 136, 148, 160], 259 | # [88, 96, 104, 112], 260 | # [56, 64, 72, 80], 261 | # [36, 40, 44, 48], 262 | # [20, 24, 28, 32], 263 | # [4, 8, 6, 10, 12, 16]] 264 | # 265 | # saver = self.get_saver(total_vars) 266 | # merged = self.create_summary('./data/logs/log_CSTR') 267 | # for iter in range(50000): 268 | # if iter % 50 == 0: 269 | # Y_val_mb, X_val_mb = get_sample_tensor('CPD', batch_size=[self.batch_size * 1000 + iter * self.val_size, 270 | # self.batch_size * 1000 + iter * self.val_size \ 271 | # + self.val_size]) 272 | # if X_val_mb is None: 273 | # continue 274 | # actually_batch_size = X_val_mb.shape[0] 275 | # print('val testing...') 276 | # Y_val_mb_flatten = flatten_concat(Y_val_mb) 277 | # # self.artificial_check(X_val_mb, Y_val_mb, scale_table) 278 | # los_cls, los_reg, los_seg, mg, OHEM_data, OHEM_data_cls \ 279 | # = self.sess.run([loss_dict['cls loss'], 280 | # loss_dict['reg loss'], 281 | # loss_dict['seg loss'], 282 | # merged, 283 | # OHEM, 284 | # OHEM_cls], 285 | # feed_dict={self.network.X: X_val_mb, 286 | # self.network.Ycls: Y_val_mb_flatten['cls_data'], 287 | # self.network.Yreg: Y_val_mb_flatten['reg_data'], 288 | # self.network.Yseg: Y_val_mb_flatten['seg_data'], 289 | # self.network.on_train: False, 290 | # self.network.batch_size: actually_batch_size 291 | # }) 292 | # print('iter step:%d total loss:%f cls loss:%f,reg loss:%f,seg loss:%f' 293 | # % (iter, (los_cls + los_reg + los_seg * self.network.lamd), los_cls, los_reg, 294 | # los_seg * self.network.lamd)) 295 | # self.iter_num = iter 296 | # self.write_summary(mg) 297 | # 298 | # # t1 = time.time() 299 | # # self.sess.run([self.network.get_pred()[2], 300 | # # self.network.get_pred()[0]['f11_CPD'], 301 | # # self.network.get_pred()[1]['f11_CPD'], 302 | # # self.network.get_pred()[0]['f10_CPD'], 303 | # # self.network.get_pred()[1]['f10_CPD'], 304 | # # self.network.get_pred()[0]['f9_CPD'], 305 | # # self.network.get_pred()[1]['f9_CPD'], 306 | # # self.network.get_pred()[0]['f8_CPD'], 307 | # # self.network.get_pred()[1]['f8_CPD'], 308 | # # self.network.get_pred()[0]['f7_CPD'], 309 | # # self.network.get_pred()[1]['f7_CPD'], 310 | # # self.network.get_pred()[0]['f4_CPD'], 311 | # # self.network.get_pred()[1]['f4_CPD'], 312 | # # self.network.get_pred()[0]['f3_CPD'], 313 | # # self.network.get_pred()[1]['f3_CPD']], 314 | # # feed_dict={self.network.X: X_val_mb, 315 | # # self.network.on_train: False, 316 | # # self.network.batch_size: self.val_size 317 | # # }) 318 | # # print('spend %f' % (time.time() - t1)) 319 | # 320 | # print('opti iter%d...' % iter) 321 | # Y_train_mb, X_train_mb = get_sample_tensor('CPD', batch_size=[iter * self.batch_size, 322 | # iter * self.batch_size + self.batch_size]) 323 | # if X_train_mb is None: 324 | # continue 325 | # actually_batch_size = X_train_mb.shape[0] 326 | # Y_tain_mb_flatten = flatten_concat(Y_train_mb) 327 | # 328 | # self.sess.run(opti_dict, feed_dict={self.network.X: X_train_mb, 329 | # self.network.Ycls: Y_tain_mb_flatten['cls_data'], 330 | # self.network.Yreg: Y_tain_mb_flatten['reg_data'], 331 | # self.network.Yseg: Y_tain_mb_flatten['seg_data'], 332 | # self.network.on_train: True, 333 | # self.network.batch_size: actually_batch_size 334 | # }) 335 | -------------------------------------------------------------------------------- /libs/tools/gadget.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | # @Time 18-7-27 下午5:15 3 | # @File gadget.py 4 | # @Software PyCharm 5 | # @Author JK.Rao 6 | 7 | import os 8 | import numpy as np 9 | import shutil 10 | import cv2 11 | import copy 12 | import time 13 | import cupy as cp 14 | 15 | 16 | def mk_dir(path): 17 | if not os.path.exists(path): 18 | os.makedirs(path) 19 | 20 | 21 | def transform_file(source_file, obj_path, obj_name=None): 22 | mk_dir(obj_path) 23 | if obj_name is None: 24 | file_name = source_file.split('/')[-1] 25 | else: 26 | file_name = obj_name 27 | shutil.copy(source_file, os.path.join(obj_path, file_name)) 28 | 29 | 30 | # point format:[x_left,y_top,x_right,y_bottom] 31 | def calcul_iou(ltrb_point1s, ltrb_point2s): 32 | x = [ltrb_point1s[0], ltrb_point1s[2], ltrb_point2s[0], ltrb_point2s[2]] 33 | y = [ltrb_point1s[1], ltrb_point1s[3], ltrb_point2s[1], ltrb_point2s[3]] 34 | 35 | if x[0] >= x[3] or x[1] <= x[2] or y[0] >= y[3] or y[1] <= y[2]: 36 | ins_score = 0 37 | else: 38 | x_sort = np.sort(x) 39 | y_sort = np.sort(y) 40 | 41 | ins_score = float(x_sort[2] - x_sort[1]) * (y_sort[2] - y_sort[1]) / ( 42 | (x[1] - x[0]) * (y[1] - y[0]) + (x[3] - x[2]) * (y[3] - y[2]) - 43 | (x_sort[2] - x_sort[1]) * (y_sort[2] - y_sort[1])) 44 | return ins_score 45 | 46 | 47 | # default_matrix:2d tensor like [[left, top, right, bottom]...] 48 | # gt_matrix: as same as default_matrix' format 49 | def calcul_matrix_iou(default_matrix, gt_matrix): 50 | area_default = (default_matrix[:, 2] - default_matrix[:, 0]) * (default_matrix[:, 3] - default_matrix[:, 1]) 51 | area_gt = (gt_matrix[:, 2] - gt_matrix[:, 0]) * (gt_matrix[:, 3] - gt_matrix[:, 1]) 52 | f_in_matrix = np.append(default_matrix, gt_matrix, axis=-1) 53 | x_in_matrix = np.delete(f_in_matrix, [1, 3, 5, 7], axis=-1) 54 | y_in_matrix = np.delete(f_in_matrix, [0, 2, 4, 6], axis=-1) 55 | interset_flag = x_in_matrix[:, 0] < x_in_matrix[:, 3] 56 | interset_flag = np.logical_and(interset_flag, x_in_matrix[:, 1] > x_in_matrix[:, 2]) 57 | interset_flag = np.logical_and(interset_flag, y_in_matrix[:, 0] < y_in_matrix[:, 3]) 58 | interset_flag = np.logical_and(interset_flag, y_in_matrix[:, 1] > y_in_matrix[:, 2]).astype(np.float32) 59 | 60 | x_in_matrix.sort(axis=-1) 61 | y_in_matrix.sort(axis=-1) 62 | area_i = (x_in_matrix[:, 2] - x_in_matrix[:, 1]) * (y_in_matrix[:, 2] - y_in_matrix[:, 1]) 63 | epsilon = 1e-10 64 | return interset_flag * area_i / (area_default + area_gt - area_i ) 65 | 66 | 67 | # gt_rect:[cx,cy,ss,ss] 68 | # cls_map:4d tensor 69 | # reg_map:4d tensor 70 | # scale:[scale of default box] 71 | # stride:stride of map 72 | # point_type:0-3 73 | # threshold:the threshold of iou 74 | def project_feature_map(gt_rect, cls_map, reg_map, scale, stride, point_type, threshold=0.5): 75 | test_time = 0. 76 | top, bottom, left, right = gt_rect[1] - gt_rect[3] // 2, \ 77 | gt_rect[1] + gt_rect[3] // 2, \ 78 | gt_rect[0] - gt_rect[2] // 2, \ 79 | gt_rect[0] + gt_rect[2] // 2 80 | max_scale = max(scale) 81 | height, width = cls_map.shape[1:3] 82 | default_boxes = list() 83 | position = list() 84 | for step_H in range(int(np.floor((top - max_scale // 2) / stride)), 85 | int(np.ceil((bottom + max_scale // 2) / stride))): 86 | for step_W in range(int(np.floor((left - max_scale // 2) / stride)), 87 | int(np.ceil((right + max_scale // 2) / stride))): 88 | if step_H < 0 or step_W < 0 or step_H > height - 1 or step_W > width - 1: 89 | continue 90 | for scal_index, scal in enumerate(scale): 91 | default_box = [int(step_W * stride + stride / 2 - scal / 2), 92 | int(step_H * stride + stride / 2 - scal / 2), 93 | int(step_W * stride + stride / 2 + scal / 2), 94 | int(step_H * stride + stride / 2 + scal / 2)] 95 | default_boxes.append(default_box) 96 | position.append([step_H, step_W, scal_index, len(scale)]) 97 | t0 = time.time() 98 | ins_score = calcul_iou(default_box, [int(left), int(top), int(right), int(bottom)]) 99 | test_time += (time.time() - t0) 100 | if ins_score > threshold: 101 | # print(ins_score) 102 | # print(step_H, step_W, scal) 103 | cent_dbox = [int((default_box[0] + default_box[2]) / 2), 104 | int((default_box[1] + default_box[3]) / 2), 105 | scal, 106 | scal] 107 | cls_map[0, step_H, step_W, 108 | point_type * len(scale) * 2 + scal_index * 2:point_type * len(scale) * 2 + scal_index * 2 + 2] = \ 109 | [0, 1] 110 | 111 | reg_map[0, step_H, step_W, 112 | point_type * len(scale) * 4 + scal_index * 4:point_type * len(scale) * 4 + +scal_index * 4 + 4] = \ 113 | [(gt_rect[0] - cent_dbox[0]) / float(scal), 114 | (gt_rect[1] - cent_dbox[1]) / float(scal), 115 | np.log(gt_rect[2] / float(scal)), 116 | np.log(gt_rect[3] / float(scal))] 117 | return cls_map, reg_map, test_time 118 | 119 | 120 | def project_feature_map_simple(gt_rect, map_height, map_width, scale, stride, point_type, map_type): 121 | # t_start = time.time() 122 | top, bottom, left, right = gt_rect[1] - gt_rect[3] // 2, \ 123 | gt_rect[1] + gt_rect[3] // 2, \ 124 | gt_rect[0] - gt_rect[2] // 2, \ 125 | gt_rect[0] + gt_rect[2] // 2 126 | # max_scale = max(scale) 127 | height, width = map_height, map_width 128 | default_boxes = list() 129 | position = list() 130 | gt_rects = list() 131 | gt_boxes = list() 132 | scale_list = list() 133 | valid_number = 0 134 | test_n = 0 135 | 136 | for scal_index, scal in enumerate(scale): 137 | if scal < gt_rect[-1] * 0.707 or 0.707 * scal > gt_rect[-1]: # 0.707=sqrt(1^2/2) 138 | continue 139 | invalid_border_length = min(scal, gt_rect[-1]) / 3. 140 | # invalid_border_length0 141 | for step_H in range(int(np.floor((top - scal / 2 + invalid_border_length) / stride)), 142 | int(np.ceil((bottom + scal / 2 - invalid_border_length) / stride))): 143 | for step_W in range(int(np.floor((left - scal / 2 + invalid_border_length) / stride)), 144 | int(np.ceil((right + scal / 2 - invalid_border_length) / stride))): 145 | test_n += 1 146 | if step_H < 0 or step_W < 0 or step_H > height - 1 or step_W > width - 1: 147 | continue 148 | 149 | valid_number += 1 150 | default_box = [int(step_W * stride + stride / 2 - scal / 2), 151 | int(step_H * stride + stride / 2 - scal / 2), 152 | int(step_W * stride + stride / 2 + scal / 2), 153 | int(step_H * stride + stride / 2 + scal / 2)] 154 | 155 | default_boxes.append(default_box) 156 | position.append([step_H, step_W, scal_index, len(scale), point_type, map_type]) 157 | gt_rects.append(gt_rect) 158 | gt_boxes.append([int(left), int(top), int(right), int(bottom)]) 159 | scale_list.append(scal) 160 | 161 | # t_over = time.time() 162 | # if t_over - t_start > 0.1: 163 | # print(t_over - t_start, stride, len(scale_list), test_n, gt_rect[-1]) 164 | return default_boxes, position, gt_rects, gt_boxes, scale_list 165 | 166 | 167 | # cls_map: list of feature_map include f11 to f3 168 | # reg_map: as same as cls_map's format 169 | # default_boxes: default box matrix :[[left, top, right, bottom]...] 170 | # iou_matrix: a vector include iou information 171 | # position_matrix: [[step_H, step_W, scal_index, len(scale),point_type,map_type]...] 172 | # gt_rects: ground truth like: [[x, y, ss, ss]...] 173 | # scale_matrix: the matrix include scale 174 | def project_feature_map_iou(cls_maps, reg_maps, default_boxes, iou_matrix, position_matrix, gt_rects, scale_matrix, 175 | threshold=0.5): 176 | iou_matrix = iou_matrix > threshold 177 | position_matrix_roi = position_matrix[iou_matrix] 178 | scale_matrix_roi = scale_matrix[iou_matrix] 179 | gt_rects_iou = gt_rects[iou_matrix] 180 | default_boxes_iou = default_boxes[iou_matrix] 181 | reg_matrix_x = gt_rects_iou[:, 0] - (default_boxes_iou[:, 0] + default_boxes_iou[:, 2]) / 2. 182 | reg_matrix_y = gt_rects_iou[:, 1] - (default_boxes_iou[:, 1] + default_boxes_iou[:, 3]) / 2. 183 | for pos_index, pos in enumerate(position_matrix_roi): 184 | step_H = pos[0] 185 | step_W = pos[1] 186 | scal_index = pos[2] 187 | scal_length = pos[3] 188 | point_type = pos[4] 189 | cls_maps[pos[-1]][0, step_H, step_W, 190 | point_type * scal_length * 2 + scal_index * 2:point_type * scal_length * 2 + scal_index * 2 + 2] = [0, 1] 191 | reg_maps[pos[-1]][0, step_H, step_W, 192 | point_type * scal_length * 4 + scal_index * 4:point_type * scal_length * 4 + +scal_index * 4 + 4] = \ 193 | [reg_matrix_x[pos_index] / float(scale_matrix_roi[pos_index]), 194 | reg_matrix_y[pos_index] / float(scale_matrix_roi[pos_index]), 195 | np.log(gt_rects_iou[pos_index, 2] / float(scale_matrix_roi[pos_index])), 196 | np.log(gt_rects_iou[pos_index, 3] / float(scale_matrix_roi[pos_index]))] 197 | return cls_maps, reg_maps 198 | 199 | 200 | def init_template_f_in(scales, strides): 201 | templet_f_in = np.zeros([7, 128, 128, 6, 4], dtype=np.int32) 202 | templet_height_index = np.array(range(128) * 128).reshape([128, 128]).T 203 | templet_width_index = np.array(range(128) * 128).reshape([128, 128]) 204 | for sca_index, scale in enumerate(scales): 205 | for default_index, default_box in enumerate(scale): 206 | templet_f_in[sca_index, :, :, default_index, 0] = np.cast[np.int32]((templet_width_index + 0.5) * strides[ 207 | sca_index] - default_box // 2) 208 | templet_f_in[sca_index, :, :, default_index, 1] = np.cast[np.int32]((templet_height_index + 0.5) * strides[ 209 | sca_index] - default_box // 2) 210 | templet_f_in[sca_index, :, :, default_index, 2] = np.cast[np.int32]((templet_width_index + 0.5) * strides[ 211 | sca_index] + default_box // 2) 212 | templet_f_in[sca_index, :, :, default_index, 3] = np.cast[np.int32]((templet_height_index + 0.5) * strides[ 213 | sca_index] + default_box // 2) 214 | return templet_f_in.reshape([1, 7, 128, 128, 6, 4]) 215 | 216 | 217 | # gt_matrix: 2d tensor like:[[left, top, right, bottom],...] 218 | # cls_f_in:6d tensor like:[gt_s, scale, height, width, default box, [left, top, right, bottom]] 219 | # reg_map:4d tensor 220 | # scale:[scale of default box] 221 | # stride:stride of map 222 | # point_type:0-3 223 | # threshold:the threshold of iou 224 | def project_feature_map_matrix(gt_matrix, scales, strides, threshold=0.5): 225 | template_f_in = init_template_f_in(scales, strides) 226 | gt_matrix = gt_matrix.reshape([-1, 1, 1, 1, 1, 4]) 227 | cls_f_in = None 228 | gts_num = gt_matrix.shape[0] 229 | t_1 = time.time() 230 | for gt_num in range(gts_num): 231 | cls_f_in = template_f_in if cls_f_in is None else np.append(cls_f_in, template_f_in, axis=0) 232 | # init 233 | area_default_boxes = ((cls_f_in[:, :, :, :, :, 2] - cls_f_in[:, :, :, :, :, 0]) * 234 | (cls_f_in[:, :, :, :, :, 3] - cls_f_in[:, :, :, :, :, 1])).reshape( 235 | [gts_num, 7, 128, 128, 6, 1]) 236 | area_gt = ((gt_matrix[:, :, :, :, :, 2] - gt_matrix[:, :, :, :, :, 0]) * 237 | (gt_matrix[:, :, :, :, :, 3] - gt_matrix[:, :, :, :, :, 1])).reshape([gts_num, 1, 1, 1, 1, 1]) 238 | valid_region = cls_f_in[:, :, :, :, :, 0] < gt_matrix[:, :, :, :, :, 2] 239 | valid_region = np.logical_and(valid_region, cls_f_in[:, :, :, :, :, 2] > gt_matrix[:, :, :, :, :, 0]) 240 | valid_region = np.logical_and(valid_region, cls_f_in[:, :, :, :, :, 1] < gt_matrix[:, :, :, :, :, 3]) 241 | valid_region = np.logical_and(valid_region, cls_f_in[:, :, :, :, :, 3] > gt_matrix[:, :, :, :, :, 1]) 242 | valid_region = np.cast[np.float32](valid_region).reshape([gts_num, 7, 128, 128, 6, 1]) 243 | # cls_f_in = cls_f_in * valid_region 244 | # combine_in_gt: [left1, right1, left2, right2, top1, bottom1, top2, bottom2] 245 | combine_in_gt = np.zeros([gts_num, 7, 128, 128, 6, 8], dtype=np.int32) 246 | t0 = time.time() 247 | print(t0 - t_1) 248 | combine_in_gt[:, :, :, :, :, 0] = cls_f_in[:, :, :, :, :, 0] 249 | combine_in_gt[:, :, :, :, :, 1] = cls_f_in[:, :, :, :, :, 2] 250 | combine_in_gt[:, :, :, :, :, 2] = gt_matrix[:, :, :, :, :, 0] 251 | combine_in_gt[:, :, :, :, :, 3] = gt_matrix[:, :, :, :, :, 2] 252 | combine_in_gt[:, :, :, :, :, 4] = cls_f_in[:, :, :, :, :, 1] 253 | combine_in_gt[:, :, :, :, :, 5] = cls_f_in[:, :, :, :, :, 3] 254 | combine_in_gt[:, :, :, :, :, 6] = gt_matrix[:, :, :, :, :, 1] 255 | combine_in_gt[:, :, :, :, :, 7] = gt_matrix[:, :, :, :, :, 3] 256 | combine_in_gt = combine_in_gt * valid_region 257 | t1 = time.time() 258 | print(t1 - t0) 259 | # sort rectangle points 260 | combine_in_gt_W = np.sort(combine_in_gt[:, :, :, :, :, 0:4], axis=-1) 261 | combine_in_gt_H = np.sort(combine_in_gt[:, :, :, :, :, 4:8], axis=-1) 262 | t2 = time.time() 263 | print(t2 - t1) 264 | area_i = ((combine_in_gt_W[:, :, :, :, :, 2] - combine_in_gt_W[:, :, :, :, :, 1]) * 265 | (combine_in_gt_H[:, :, :, :, :, 2] - combine_in_gt_H[:, :, :, :, :, 1])).reshape( 266 | [gts_num, 7, 128, 128, 6, 1]) 267 | 268 | iou = area_i / (area_default_boxes + area_gt - area_i) 269 | 270 | return iou 271 | 272 | 273 | # gt_matrix: 1d tensor like:[left, top, right, bottom] 274 | # cls_f_in:6d tensor like:[1, scale, height, width, default box, [left, top, right, bottom]] 275 | # reg_map:4d tensor 276 | # scale:[scale of default box] 277 | # stride:stride of map 278 | # point_type:0-3 279 | # threshold:the threshold of iou 280 | t0_0 = time.time() 281 | templete_one = cp.ones((7, 128, 128, 1), dtype=cp.float32) 282 | templete_zero = cp.zeros((7, 128, 128, 1), dtype=cp.float32) 283 | templete_cls = cp.concatenate([templete_one, templete_zero] * 24, axis=-1) 284 | print('**********%f' % (time.time() - t0_0)) 285 | 286 | 287 | def project_feature_map_matrix_cupy(cls_f_in, gt_matrix, combine_in_gt, point_type, threshold=0.5): 288 | gt_matrix = gt_matrix.reshape([-1, 1, 1, 1, 1, 4]) 289 | # init 290 | 291 | t_1 = time.time() 292 | area_default_boxes = ((cls_f_in[:, :, :, :, :, 2] - cls_f_in[:, :, :, :, :, 0]) * 293 | (cls_f_in[:, :, :, :, :, 3] - cls_f_in[:, :, :, :, :, 1])).reshape([7, 128, 128, 6]) 294 | t0 = time.time() 295 | print(t0 - t_1) 296 | area_gt = ((gt_matrix[:, :, :, :, :, 2] - gt_matrix[:, :, :, :, :, 0]) * 297 | (gt_matrix[:, :, :, :, :, 3] - gt_matrix[:, :, :, :, :, 1])).reshape([1, 1, 1, 1]) 298 | valid_region = cls_f_in[:, :, :, :, :, 0] < gt_matrix[:, :, :, :, :, 2] 299 | valid_region = cp.logical_and(valid_region, cls_f_in[:, :, :, :, :, 2] > gt_matrix[:, :, :, :, :, 0]) 300 | valid_region = cp.logical_and(valid_region, cls_f_in[:, :, :, :, :, 1] < gt_matrix[:, :, :, :, :, 3]) 301 | valid_region = cp.logical_and(valid_region, cls_f_in[:, :, :, :, :, 3] > gt_matrix[:, :, :, :, :, 1]) 302 | valid_region = valid_region.reshape([1, 7, 128, 128, 6, 1]).astype('f') 303 | # cls_f_in = cls_f_in * valid_region 304 | # combine_in_gt: [left1, right1, left2, right2, top1, bottom1, top2, bottom2] 305 | 306 | combine_in_gt[:, :, :, :, :, 0] = cls_f_in[:, :, :, :, :, 0] 307 | combine_in_gt[:, :, :, :, :, 1] = cls_f_in[:, :, :, :, :, 2] 308 | combine_in_gt[:, :, :, :, :, 2] = gt_matrix[:, :, :, :, :, 0] 309 | combine_in_gt[:, :, :, :, :, 3] = gt_matrix[:, :, :, :, :, 2] 310 | combine_in_gt[:, :, :, :, :, 4] = cls_f_in[:, :, :, :, :, 1] 311 | combine_in_gt[:, :, :, :, :, 5] = cls_f_in[:, :, :, :, :, 3] 312 | combine_in_gt[:, :, :, :, :, 6] = gt_matrix[:, :, :, :, :, 1] 313 | combine_in_gt[:, :, :, :, :, 7] = gt_matrix[:, :, :, :, :, 3] 314 | combine_in_gt = combine_in_gt * valid_region 315 | t1 = time.time() 316 | print(t1 - t0) 317 | # sort rectangle points 318 | combine_in_gt_W = cp.sort(combine_in_gt[:, :, :, :, :, 0:4], axis=-1) 319 | combine_in_gt_H = cp.sort(combine_in_gt[:, :, :, :, :, 4:8], axis=-1) 320 | t2 = time.time() 321 | print(t2 - t1) 322 | area_i = ((combine_in_gt_W[:, :, :, :, :, 2] - combine_in_gt_W[:, :, :, :, :, 1]) * 323 | (combine_in_gt_H[:, :, :, :, :, 2] - combine_in_gt_H[:, :, :, :, :, 1])).reshape([7, 128, 128, 6]) 324 | 325 | iou = area_i / (area_default_boxes + area_gt - area_i) 326 | gts_feature = (iou > threshold).astype(cp.float32) 327 | 328 | cls_feature = cp.concatenate([1. - gts_feature, gts_feature], axis=-1) 329 | cls_feature = cls_feature.reshape([7, 128, 128, 12]) 330 | 331 | t3 = time.time() 332 | templete_cls[0:6, :, :, 8 * point_type:8 * point_type + 8] += cls_feature[0:6, :, :, 0:8] 333 | templete_cls[6, :, :, 8 * point_type:8 * point_type + 12] += cls_feature[6, :, :, 0:12] 334 | print(time.time() - t3) 335 | 336 | return iou 337 | 338 | 339 | # gt_array:3d tensor shape like [2,4,?] 340 | # seg_map:4d tensor shape like [1,512,512,4] 341 | def project_feature_map_seg(gt_array, seg_map): 342 | for point_index in range(gt_array.shape[-1]): 343 | point_corner = gt_array[:, :, point_index].T 344 | assert point_corner.shape[0] == 4, 'AmountError: incorrect number in point corner.' 345 | full_corner = copy.deepcopy(point_corner) 346 | for i in range(4): 347 | before_point = point_corner[i, :] 348 | after_point = point_corner[0, :] if i == 3 else point_corner[i + 1, :] 349 | center_point = np.array([(before_point[0] + after_point[0]) / 2, 350 | (before_point[1] + after_point[1]) / 2]).reshape([1, 2]).astype(np.int32) 351 | full_corner = np.insert(full_corner, i * 2 + 1, center_point, axis=0) 352 | center_point = np.array([(full_corner[1, 0] + full_corner[5, 0]) / 2, 353 | (full_corner[3, 1] + full_corner[7, 1]) / 2]).astype(np.int32) 354 | 355 | points_list = list() 356 | points_list.append((full_corner[0] - 1).tolist() + (full_corner[1] - 1).tolist() + (center_point - 1).tolist() + 357 | (full_corner[-1] - 1).tolist()) 358 | points_list.append((full_corner[1] - np.array([0, 1])).tolist() + (full_corner[2] - np.array([0, 1])).tolist() + 359 | (full_corner[3] - np.array([0, 1])).tolist() + (center_point - np.array([0, 1])).tolist()) 360 | points_list.append(center_point.tolist() + full_corner[3].tolist() + full_corner[4].tolist() + 361 | full_corner[5].tolist()) 362 | points_list.append((full_corner[-1] - np.array([1, 0])).tolist() + (center_point - np.array([1, 0])).tolist() + 363 | (full_corner[5] - np.array([1, 0])).tolist() + (full_corner[6] - np.array([1, 0])).tolist()) 364 | for i in range(4): 365 | seg_map[0, :, :, i] = cv2.drawContours(copy.deepcopy(seg_map[0, :, :, i]), 366 | [np.array(points_list[i]).reshape([4, 2]).astype(np.int32)], 367 | 0, 1., cv2.FILLED) 368 | return seg_map 369 | 370 | 371 | def array2list_CSTR_dict(dict): 372 | for batch in dict: 373 | cls_list = batch['cls_mask'] 374 | for index, cls_array in enumerate(cls_list): 375 | cls_list[index] = cls_array.tolist() 376 | reg_list = batch['reg_mask'] 377 | for index, reg_array in enumerate(reg_list): 378 | reg_list[index] = reg_array.tolist() 379 | seg_list = batch['seg_mask'] 380 | for index, seg_array in enumerate(seg_list): 381 | seg_list[index] = seg_array.tolist() 382 | return dict 383 | 384 | 385 | if __name__ == '__main__': 386 | # cls_mask = np.ones([1, 64, 64, 1], dtype=np.float32) 387 | # for i in range(47): 388 | # if i % 2 == 0: 389 | # cls_mask = np.append(cls_mask, np.zeros([1, 64, 64, 1], dtype=np.float32), axis=3) 390 | # else: 391 | # cls_mask = np.append(cls_mask, np.ones([1, 64, 64, 1], dtype=np.float32), axis=3) 392 | # reg_mask = np.zeros([1, 64, 64, 96], dtype=np.float32) 393 | # cls_mask, reg_mask = project_feature_map([153.57, 72.53, 23.367, 23.367], cls_mask, reg_mask, 394 | # [20, 24, 28, 32], 8, 1) 395 | 396 | # seg_map = np.zeros([1, 512, 512, 4], dtype=np.float32) 397 | # gt_array = np.array([[10.1, 30, 30, 10], [10, 10, 30, 30]]).reshape([2, 4, 1]) 398 | # seg_map = project_feature_map_seg(gt_array, seg_map) 399 | # cv2.imshow('test1', seg_map[0, :, :, 0]) 400 | # cv2.imshow('test2', seg_map[0, :, :, 1]) 401 | # cv2.imshow('test3', seg_map[0, :, :, 2]) 402 | # cv2.imshow('test4', seg_map[0, :, :, 3]) 403 | # cv2.waitKey() 404 | # a = 1 405 | 406 | scale_table = [[184, 208, 232, 256], 407 | [124, 136, 148, 160], 408 | [88, 96, 104, 112], 409 | [56, 64, 72, 80], 410 | [36, 40, 44, 48], 411 | [20, 24, 28, 32], 412 | [4, 8, 6, 10, 12, 16]] 413 | strides = [128, 85.3333, 64, 32, 16, 8, 4] 414 | # test = init_template_f_in(scale_table, [128, 85.3333, 64, 32, 16, 8, 4]) 415 | gt_matrix = np.array([[10, 10, 110, 110], [200, 200, 250, 250], [10, 10, 110, 110], [200, 200, 250, 250], 416 | [10, 10, 110, 110], [200, 200, 250, 250], [10, 10, 110, 110], [200, 200, 250, 250]], 417 | dtype=np.int32) 418 | t0 = time.time() 419 | test1 = project_feature_map_matrix(gt_matrix, scale_table, strides) 420 | print(time.time() - t0) 421 | 422 | gt_matrix_cus = cp.array([[10, 10, 110, 110], [200, 200, 250, 250], [10, 10, 110, 110], [200, 200, 250, 250], 423 | [10, 10, 110, 110], [200, 200, 250, 250], [10, 10, 110, 110], [200, 200, 250, 250], 424 | [10, 10, 110, 110], [200, 200, 250, 250], [10, 10, 110, 110], [200, 200, 250, 250], 425 | [10, 10, 110, 110], [200, 200, 250, 250], [10, 10, 110, 110], [200, 200, 250, 250], 426 | [10, 10, 110, 110], [200, 200, 250, 250], [10, 10, 110, 110], [200, 200, 250, 250], 427 | [10, 10, 110, 110], [200, 200, 250, 250], [10, 10, 110, 110], [200, 200, 250, 250], 428 | [10, 10, 110, 110], [200, 200, 250, 250], [10, 10, 110, 110], [200, 200, 250, 250], 429 | [10, 10, 110, 110], [200, 200, 250, 250], [10, 10, 110, 110], [200, 200, 250, 250], 430 | [10, 10, 110, 110], [200, 200, 250, 250], [10, 10, 110, 110], [200, 200, 250, 250], 431 | [10, 10, 110, 110], [200, 200, 250, 250], [10, 10, 110, 110], [200, 200, 250, 250]], 432 | dtype=cp.int32) 433 | cls_f_in = cp.array(init_template_f_in(scale_table, strides)) 434 | combine_in_gt = cp.zeros((1, 7, 128, 128, 6, 8), dtype=cp.int32) 435 | print('#############') 436 | t1 = time.time() 437 | for gt_matrix_cu in gt_matrix_cus: 438 | gts_index = project_feature_map_matrix_cupy(cls_f_in, gt_matrix_cu, combine_in_gt, 0) 439 | # for gt_index in gts_index: 440 | 441 | print(time.time() - t1) 442 | a = 1 443 | --------------------------------------------------------------------------------